source: trunk/nagios-velvice/velvice.cgi @ 282

Last change on this file since 282 was 282, checked in by g7moreau, 6 years ago
  • Add favicon support
  • Property svn:keywords set to Id
File size: 17.1 KB
Line 
1#!/usr/bin/env perl
2#
3# 2014/05/15 Gabriel Moreau <Gabriel.Moreau@univ-grenoble-alpes.fr>
4# 2017/06/22 Gabriel Moreau - big update
5# 2018/06/25 Gabriel Moreau - make velvice generic
6#
7# velvice.cgi
8# Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
9#
10# Need NagiosStatus http://exchange.nagios.org/directory/Addons/APIs/Perl/NagiosStatus-2Epm/details
11# Possible command http://old.nagios.org/developerinfo/externalcommands/commandlist.php
12#
13# apt-get install perl-modules libnagios-object-perl libhtml-parser-perl liburi-encode-perl libcolor-calc-perl libyaml-syck-perl
14
15use strict;
16use warnings;
17use version; our $VERSION = version->declare('0.8.1');
18
19use CGI;
20use HTML::Entities ();
21use Nagios::StatusLog;
22use URI::Encode qw(uri_encode uri_decode);
23use Color::Calc ();
24use YAML::Syck;
25
26my $query           = CGI->new();
27my $cgi_check       = uri_decode($query->param('check'));
28my $cgi_script_name = $query->script_name();
29my $cgi_path        = $cgi_script_name =~ s{/[^/]+\.cgi$}{}r;
30undef $query;
31
32my $config = {};
33$config = YAML::Syck::LoadFile('/etc/nagios3/velvice.yml') if -e '/etc/nagios3/velvice.yml';
34$config->{'nagios-server'}                ||= {};
35$config->{'nagios-server'}{'status-file'} ||= '/var/cache/nagios3/status.dat';
36$config->{'nagios-server'}{'nagios-cmd'}  ||= '/var/lib/nagios3/rw/nagios.cmd';
37$config->{'nagios-server'}{'portal-url'}  ||= $cgi_path =~ s{/cgi-bin/}{/}r . '/';
38$config->{'nagios-server'}{'status-cgi'}  ||= "$cgi_path/status.cgi";
39$config->{'nagios-server'}{'stylesheets'} ||= $config->{'nagios-server'}{'portal-url'} =~ s{/?$}{/stylesheets}r;
40$config->{'nagios-server'}{'image'}       ||= $config->{'nagios-server'}{'portal-url'} =~ s{/?$}{/image}r;
41$config->{'host-mapping'}                 ||= {};
42$config->{'color-downtime'}               ||= {};
43$config->{'color-downtime'}{'day-min'}    ||=  3;
44$config->{'color-downtime'}{'day-max'}    ||= 50;
45$config->{'color-downtime'}{'factor'}     ||=  0.7;
46$config->{'remote-action'}                ||= {};
47
48my $log = Nagios::StatusLog->new(
49   Filename => $config->{'nagios-server'}{'status-file'},
50   Version  => 3.0
51   );
52
53sub hostmapping {
54   my $host = shift;
55
56   return exists $config->{'host-mapping'}{$host} ? $config->{'host-mapping'}{$host} : $host;
57   }
58
59sub downtime {
60   my ($time_change) = @_;
61
62   my $now = time;
63   return sprintf '%.1f', ($now - $time_change) / (60 * 3600);
64   }
65
66sub alertcolor {
67   my ($color, $downtime) = @_;
68
69   $downtime = $downtime - $config->{'color-downtime'}{'day-min'}; # same color first days
70   $downtime = $config->{'color-downtime'}{'day-max'} if $downtime > $config->{'color-downtime'}{'day-max'}; # max 50 days for color
71   $downtime =  0 if $downtime <  0;
72
73   my $factor = ($downtime * $config->{'color-downtime'}{'factor'}) / $config->{'color-downtime'}{'day-max'};
74   return Color::Calc::color_light_html($color, $factor);
75   }
76
77my %hostdown;
78my @serviceproblems;
79my %hostcount;
80my @futurecheck;
81HOST:
82for my $host (sort $log->list_hosts()) {
83   my $host_stat = $log->host($host);
84
85   if ($host_stat->status eq 'DOWN') {TESTIF:{
86      for my $srv ($log->list_services_on_host($host)) {
87         last TESTIF if $log->service($host, $srv)->status eq 'OK' or $log->service($host, $srv)->status eq 'PENDING';
88         }
89
90      $hostdown{$host} = $host_stat;
91      next HOST;
92      }}
93
94   for my $srv ($log->list_services_on_host($host)) {
95      if ($log->service($host, $srv)->status ne 'OK') {
96         push @serviceproblems, $log->service($host, $srv);
97         $hostcount{$host}++;
98         }
99      }
100   }
101
102my $now = time;
103my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime $now;
104$year += 1900;
105$mon++;
106my $date = sprintf '%04i-%02i-%02i %02i:%02i', $year, $mon, $mday, $hour, $min;
107
108my $htmlpage = <<"ENDH";
109Content-Type: text/html
110
111<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
112<html lang="en">
113<head>
114 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
115 <title>Nagios  Velvice</title>
116 <link rel="stylesheet"    type="text/css"  href="$config->{'nagios-server'}{'stylesheets'}/velvice.css">
117 <link rel="shortcut icon" type="image/ico" href="$config->{'nagios-server'}{'image'}/favicon.ico">
118</head>
119<body>
120<div class="header">
121 <h1>
122  <ul>
123    <li>Nagios Velvice Alert Panel : <a href="$config->{'nagios-server'}{'portal-url'}">Core Server</a></li>
124    <li><small><a id="refresh" href="$cgi_script_name">$date</a></small></li>
125  </ul>
126 </h1>
127</div>
128ENDH
129
130my %service_name   = ();
131my %service_status = ();
132for my $srv (@serviceproblems) {
133   $service_name{$srv->service_description}++;
134   $service_status{$srv->status}++;
135   }
136
137if (scalar @serviceproblems == 0) {
138   $htmlpage .= "<p>No alert to recheck.</p>\n";
139   }
140else {
141
142   $htmlpage .= "<p>Alert to recheck - Level:\n";
143   $htmlpage .= join ",\n",
144      " <a href='$cgi_script_name?check=all'>ALL</a><small>(" . scalar(@serviceproblems) . ')</small>',
145      map(" <a href='$cgi_script_name?check=" . lc(uri_encode($_)) . "'>$_</a>($service_status{$_})", sort keys %service_status);
146   $htmlpage .= ".\n";
147   $htmlpage .= " <br />\n";
148   $htmlpage .= " Service:\n";
149   $htmlpage .= join ",\n",
150      map(" <a href='$cgi_script_name?check=" . lc(uri_encode($_)) . "'>$_</a><small>($service_name{$_})</small>", sort keys %service_name);
151   $htmlpage .= ".\n";
152   $htmlpage .= "</p>\n";
153
154   my $nagios_cmd;
155   open $nagios_cmd, '>>', $config->{'nagios-server'}{'nagios-cmd'} or die "Can't open file filename: $!";
156
157   my %remote_sshdown = ();
158   my %remote_db      = ();
159   my $remote_flag;
160
161   my $current_host  = '';
162   $htmlpage .= "<table border=\"1\">\n";
163   SERVICE_PROBLEMS:
164   for my $srv (@serviceproblems) {
165      my $hostname = $srv->host_name;
166      my $service  = $srv->service_description;
167      my $status   = $srv->status;
168      my $downtime = downtime($srv->last_state_change);
169      my $output   = HTML::Entities::encode($srv->plugin_output) =~ s/^[A-Z_\s]+?[:-]//r;
170
171      my $color = $status eq 'CRITICAL' ? '#F88888' : '#FFFF00';
172      $color = alertcolor($color, $downtime);
173      $htmlpage .= " <tr style='background:$color;'>\n";
174      if ($hostname ne $current_host) {
175         $current_host = $hostname;
176         $htmlpage .= "  <td rowspan='$hostcount{$hostname}' style='vertical-align:middle;'>"
177            . "<a href=\"$cgi_script_name?check=" . uri_encode($hostname) . '">&#8623;</a></td>' . "\n";
178         $htmlpage .= "  <td class='hoop' rowspan='$hostcount{$hostname}' style='vertical-align:middle;'>"
179            . "<a href=\"$config->{'nagios-server'}{'status-cgi'}?host=" . uri_encode($hostname) . "\">$hostname</a></td>\n";
180         }
181
182      my $bold;
183      ACTION_STYLE:
184      for my $act_name (keys %{$config->{'remote-action'}}) {
185         my $act_regex = $config->{'remote-action'}{$act_name}{'regex'};
186         $bold++ if $service =~ m/$act_regex/ and $config->{'remote-action'}{$act_name}{'style'} eq 'bold';
187         }
188      $htmlpage .= $bold ? '  <td class="hoop bold">' : '  <td class="hoop">';
189      $htmlpage .= "$service</td>\n";
190
191      $htmlpage .= "  <td class='hoop'>$status</td>\n";
192      $htmlpage .= "  <td style='max-width:60%;'><small>$output";
193
194      if (($cgi_check =~ m/all/i)
195            or ($cgi_check =~ m/^$service$/i)
196            or ($cgi_check =~ m/critical/i and $status eq 'CRITICAL')
197            or ($cgi_check =~ m/warning/i  and $status eq 'WARNING')
198            or ($cgi_check =~ m/pending/i  and $status eq 'PENDING')
199            or ($cgi_check eq $hostname    and $status =~ m/^(CRITICAL|WARNING|PENDING)$/)
200            ) {
201         $now++;
202         my $interval = $srv->next_check() - $srv->last_check() || 300;
203         $interval =  240 if $interval <  240;
204         $interval = 3000 if $interval > 3000;
205         my $future = $now + 20 + int(rand($interval - 20)); # 5 * 60 = 300
206
207         $htmlpage .= " -- <b>CHECK</b> [$now/" . ($future - $now) . "]";
208         printf $nagios_cmd "[%lu] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%lu\n", $now, $hostname, $service, $now;
209         # delay future command
210         push @futurecheck, sprintf "[%lu] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%lu", $future, $hostname, $service, $future;
211         }
212
213      ACTION_PUSH_AND_DEPEND:
214      for my $act_name (keys %{$config->{'remote-action'}}) {
215         my $act_regex  = $config->{'remote-action'}{$act_name}{'regex'};
216         my $act_status = $config->{'remote-action'}{$act_name}{'status'} || 'ALL';
217         my $act_depend = $config->{'remote-action'}{$act_name}{'depend'} || 'SSH';
218
219         if ($service =~ m/$act_regex/ and ($act_status eq 'ALL' or $status =~ m/$act_status/)) {
220            $remote_db{$act_name} ||= [];
221            push @{$remote_db{$act_name}}, $hostname;
222            $remote_flag++;
223            }
224
225         # check depend service otherwise
226         $remote_sshdown{$act_depend} ||= {};
227         $remote_sshdown{$act_depend}->{$hostname}++ if $service =~ m/$act_depend/;
228         }
229
230      $htmlpage .= "</small></td>\n";
231      $htmlpage .= "  <td style='text-align:right;'>$downtime days</td>\n";
232      $htmlpage .= " </tr>\n";
233      }
234
235   $htmlpage .= "</table>\n";
236   close $nagios_cmd;
237
238   # host down
239   if (%hostdown) {
240      $htmlpage .= "<br />\n";
241      $htmlpage .= "<table border='1'>\n";
242      HOST_DOWN:
243      for my $host (sort keys %hostdown) {
244         my $host_stat = $hostdown{$host};
245         my $hostname = $host_stat->host_name;
246         my $downtime = downtime($host_stat->last_state_change);
247         my $color = alertcolor('#F88888', $downtime);
248         $htmlpage .= " <tr style='background:$color'>\n";
249         $htmlpage .= "  <td><a class='hoop' href=\"$config->{'nagios-server'}{'status-cgi'}?host=" . uri_encode($hostname) . "\">$hostname</a></td>\n";
250         my @host_service;
251         for my $srv ($log->list_services_on_host($host)) {
252            push @host_service, $log->service($host, $srv)->service_description;
253            }
254         $htmlpage .= "  <td><small>" . join(', ', @host_service) . "</small></td>\n";
255         $htmlpage .= "  <td style='text-align:right;'>$downtime days</td>\n";
256         $htmlpage .= " </tr>\n";
257         }
258      $htmlpage .= "</table>\n";
259      }
260
261   # remote action
262   if ($remote_flag) {
263      require Nagios::Object::Config;
264      my $parser = Nagios::Object::Config->new();
265      $parser->parse("/var/cache/nagios3/objects.cache");
266
267      $htmlpage .= "<div class='action'>\n";
268      REMOTE_ACTION:
269      for my $act_name (keys %remote_db) {
270         my $act_depend = $config->{'remote-action'}{$act_name}{'depend'} || 'SSH';
271
272         my @action = grep !exists $remote_sshdown{$act_depend}->{$_}, @{$remote_db{$act_name}};
273         if (@action) {
274            my $srv_title = $config->{'remote-action'}{$act_name}{'title'} || "Action: $act_name";
275            $htmlpage .= "<h2>$srv_title</h2>\n";
276            $htmlpage .= "<pre>\n";
277            my $remote_action = $config->{'remote-action'}{$act_name}{'command'};
278            $remote_action = $config->{'remote-action'}{$act_name}{'command-one'}
279               if @action == 1 and exists $config->{'remote-action'}{$act_name}{'command-one'};
280            my @hosts;
281            for my $host (@action) {
282               my $object = $parser->find_object("$host", "Nagios::Host");
283               push @hosts, hostmapping($object->address =~ s/\..*$//r);
284               }
285            my $hosts_list = join ' ', @hosts;
286            $htmlpage .= ' ' . $remote_action =~ s{\%m}{$hosts_list}r;
287            $htmlpage .= "</pre>\n";
288            }
289         }
290      $htmlpage .= "</div>\n";
291      }
292   }
293
294$htmlpage .= <<"ENDH";
295<hr clear="all">
296<div class="footer">
297 <b><a href="http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/NagiosVelvice">Velvice</a>
298   - version: $VERSION</b>
299   (<a href="http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.html">online manual</a>)
300   - Written by Gabriel Moreau
301 <ul>
302  <li>Licence GNU GPL version 2 or later and Perl equivalent</li>
303  <li>Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France</li>
304 </ul>
305</div>
306</body>
307</html>
308ENDH
309
310print $htmlpage;
311
312# delayed future check
313if (@futurecheck) {
314   sleep 2;
315   my $nagios_cmd;
316   open $nagios_cmd, '>>', $config->{'nagios-server'}{'nagios-cmd'} or die "Can't open file filename: $!";
317   print $nagios_cmd "$_\n" for @futurecheck;
318   close $nagios_cmd;
319   }
320
321__END__
322
323
324=head1 NAME
325
326velvice.cgi - nagios velvice alert panel
327
328=head1 USAGE
329
330 velvice.cgi
331 velvice.cgi?check=XXX
332
333
334=head1 DESCRIPTION
335
336=begin html
337
338<img width="700" alt="Nagios Velvice Alert Panel" title="Nagios Velvice Alert Panel" style="float:right" src="velvice.png" />
339
340=end html
341
342Nagios VELVICE is an acronym for "Nagios leVEL serVICE status".
343
344The Nagios web page is sometimes very graphically charged
345and does not necessarily contain the information you need at a glance.
346For example, it is quite complicated to restart controls on multiple hosts in one click.
347
348For example, a server that is down should take only one line and not one per service...
349Similarly, a service that has been down for 5 minutes or since yesterday
350has more weight than a service that has fallen for 15 days.
351
352With Velvice Panel, a broken down server takes only one line.
353Services that have been falling for a long time gradually lose their color and become pastel colors.
354
355With Velvice Panel, it is possible through a single click
356to redo a check of all services that are in the CRITICAL state.
357Similarly, it is possible to restart a check on all SSH services in breakdowns ...
358In order not to clog the Nagios server, checks are shifted by 2 seconds in time.
359
360There is also a link to the web page of the main Nagios server.
361For each computer, you have a direct link to its dedicated web page on this server.
362
363
364=head1 CONFIGURATION FILE SPECIFICATION
365
366The configuration file must be F</etc/nagios3/velvice.yml>.
367This is not a required file.
368The file is in YAML format because this is a human-readable text file style.
369Other formats could have been Plain XML, RDF, JSON... but they are much less readable.
370
371You can find in the software nagios-velvice an example of configuration:
372L<velvice.sample.yml|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.sample.yml>.
373This one is in fact the master reference specification!
374
375The main keys C<nagios-server> and C<color-downtime> have good default values.
376No secondary key is required...
377The Velvice script try hard to replace ~ by the good value automatically.
378
379 nagios-server:
380   status-file: /var/cache/nagios3/status.dat
381   nagios-cmd:  /var/lib/nagios3/rw/nagios.cmd
382   portal-url:  ~/nagios3/
383   status-cgi:  ~/cgi-bin/nagios3/status.cgi
384   stylesheets: ~/nagios3/stylesheets
385
386The background color of the faulty service line display remains stable with a bright color for at least 3 days.
387Then, it decreases and becomes pastel after 53 days with an intensity of 70% (100% is white and 0% is black).
388
389 color-downtime:
390   day-min:  3
391   day-max: 50
392   factor:   0.7
393
394With key C<host-mapping>,
395it's good to map C<localhost> to the real name of the computer (hostname).
396
397 host-mapping:
398   localhost:  srv-nagios
399   toto:       titi
400
401The only important key is C<remote-action>.
402You can affiliate as many subkeys as you want.
403Let's take an example:
404
405 remote-action:
406   oom-killer:
407     regex: ^OOM Killer
408     title:  OOM Killer
409     command:     tssh -c 'sudo rm /var/lib/nagios3/nagios_oom_killer.log' %m
410     command-one: ssh %m 'sudo rm /var/lib/nagios3/nagios_oom_killer.log'
411     depend: ^SSH
412     status: ALL
413     style: bold
414
415C<oom-killer> is just a key for your remote action.
416The regex is used to find which service has a problem...
417The title is use in the result web page (not mandatory - otherwise, it will be C<Action: oom-killer>).
418The C<command> is just written on this web page.
419You have the responsibility to copy / cut it on a terminal.
420For security reasons, the nagios server does not have the right to launch the command on the remote host.
421The wildcard C<%m> is replaced by the list of the host (separated by the space).
422Sometime, the command could be different if there is only one computer (just SSH and no parallel SSH).
423If your command is based on SSH,
424you can have an SSH action only if the remote SSH is running.
425So you can make the remote action depend on the SSH service through a regular expression of your choice.
426
427The last two keys.
428The C<status> key is for CRITICAL or WARNING (or ALL).
429The key C<style> is there to mark in bold the service in error on the web page.
430
431=head1 SEE ALSO
432
433yamllint(1), ysh(1), YAML, Nagios::StatusLog, Color::Calc
434
435In Debian GNU/Linux distribution, packages for C<yamllint> and C<ysh> are:
436
437=over
438
439=item * C<yamllint> - Linter for YAML files (Python)
440
441=item * C<libyaml-shell-perl> - YAML test shell (Perl)
442
443=back
444
445
446Own project ressources:
447
448=over
449
450=item * L<Web site|http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/NagiosVelvice>
451
452=item * L<Online Manual|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.html>
453
454=item * L<SVN repository|http://servforge.legi.grenoble-inp.fr/svn/soft-trokata/trunk/nagios-velvice>
455
456=back
457
458
459=head1 VERSION
460
461$Id: velvice.cgi 282 2018-07-15 13:55:09Z g7moreau $
462
463
464=head1 AUTHOR
465
466Written by Gabriel Moreau <Gabriel.Moreau(A)univ-grenoble-alpes.fr>, LEGI UMR 5519, CNRS, Grenoble - France
467
468
469=head1 LICENSE AND COPYRIGHT
470
471Licence GNU GPL version 2 or later and Perl equivalent
472
473Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
Note: See TracBrowser for help on using the repository browser.