source: trunk/nagios-velvice/velvice.cgi @ 319

Last change on this file since 319 was 319, checked in by g7moreau, 6 years ago
  • Master color if multiple alert on one host
  • Property svn:keywords set to Id
File size: 18.5 KB
Line 
1#!/usr/bin/env perl
2#
3# 2014/05/15 Gabriel Moreau <Gabriel.Moreau@univ-grenoble-alpes.fr>
4# 2017/06/22 Gabriel Moreau - big update
5# 2018/06/25 Gabriel Moreau - make velvice generic
6#
7# velvice.cgi
8# Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
9#
10# Need NagiosStatus http://exchange.nagios.org/directory/Addons/APIs/Perl/NagiosStatus-2Epm/details
11# Possible command http://old.nagios.org/developerinfo/externalcommands/commandlist.php
12#
13# apt-get install perl-modules libnagios-object-perl libhtml-parser-perl liburi-encode-perl libcolor-calc-perl libyaml-syck-perl
14
15use strict;
16use warnings;
17use version; our $VERSION = version->declare('0.8.10');
18
19use CGI;
20use HTML::Entities ();
21use Nagios::StatusLog;
22use URI::Encode qw(uri_encode uri_decode);
23use Color::Calc ();
24use YAML::Syck;
25
26my $query           = CGI->new();
27my $cgi_check       = uri_decode($query->param('check'));
28my $cgi_script_name = $query->script_name();
29my $cgi_path        = $cgi_script_name =~ s{/[^/]+\.cgi$}{}r;
30undef $query;
31
32my $config = {};
33$config = YAML::Syck::LoadFile('/etc/nagios3/velvice.yml') if -e '/etc/nagios3/velvice.yml';
34$config->{'nagios-server'}                ||= {};
35$config->{'nagios-server'}{'status-file'} ||= '/var/cache/nagios3/status.dat';
36$config->{'nagios-server'}{'nagios-cmd'}  ||= '/var/lib/nagios3/rw/nagios.cmd';
37$config->{'nagios-server'}{'portal-url'}  ||= $cgi_path =~ s{/cgi-bin/}{/}r . '/';
38$config->{'nagios-server'}{'status-cgi'}  ||= "$cgi_path/status.cgi";
39$config->{'nagios-server'}{'stylesheets'} ||= $config->{'nagios-server'}{'portal-url'} =~ s{/?$}{/stylesheets}r;
40$config->{'nagios-server'}{'image'}       ||= $config->{'nagios-server'}{'portal-url'} =~ s{/?$}{/image}r;
41$config->{'host-mapping'}                 ||= {};
42$config->{'color-downtime'}               ||= {};
43$config->{'color-downtime'}{'day-min'}    ||=  3;
44$config->{'color-downtime'}{'day-max'}    ||= 50;
45$config->{'color-downtime'}{'factor'}     ||=  0.7;
46$config->{'remote-action'}                ||= {};
47
48my $log = Nagios::StatusLog->new(
49   Filename => $config->{'nagios-server'}{'status-file'},
50   Version  => 3.0
51   );
52
53sub hostmapping {
54   my $host = shift;
55
56   return exists $config->{'host-mapping'}{$host} ? $config->{'host-mapping'}{$host} : $host;
57   }
58
59sub downtime {
60   my ($time_change) = @_;
61
62   my $now = time;
63   return sprintf '%.1f', ($now - $time_change) / (60 * 3600);
64   }
65
66sub alertcolor {
67   my ($color, $downtime) = @_;
68
69   $downtime = $downtime - $config->{'color-downtime'}{'day-min'}; # same color first days
70   $downtime = $config->{'color-downtime'}{'day-max'} if $downtime > $config->{'color-downtime'}{'day-max'}; # max 50 days for color
71   $downtime =  0 if $downtime <  0;
72
73   my $factor = ($downtime * $config->{'color-downtime'}{'factor'}) / $config->{'color-downtime'}{'day-max'};
74   return Color::Calc::color_light_html($color, $factor);
75   }
76
77sub nosbreak {
78   my ($str) = @_;
79   
80   return $str =~ s/\s/\&nbsp;/gr;
81   }
82
83my %hostdown;
84my @serviceproblems;
85my %hostcount;
86my @futurecheck;
87HOST:
88for my $host (sort $log->list_hosts()) {
89   my $host_stat = $log->host($host);
90
91   if ($host_stat->status eq 'DOWN') {TESTIF:{
92      for my $srv ($log->list_services_on_host($host)) {
93         last TESTIF if $log->service($host, $srv)->status eq 'OK' or $log->service($host, $srv)->status eq 'PENDING';
94         }
95
96      $hostdown{$host} = $host_stat;
97      next HOST;
98      }}
99
100   SRV:
101   for my $srv ($log->list_services_on_host($host)) {
102      my $status = $log->service($host, $srv)->status;
103
104      next SRV if $status eq 'OK';
105
106      push @serviceproblems, $log->service($host, $srv);
107   
108      my $downtime = downtime($log->service($host, $srv)->last_state_change);
109      my $color    = $status eq 'CRITICAL' ? '#F88888' : '#FFFF00';
110      $color = alertcolor($color, $downtime);
111
112      my %status_db = (
113         CRITICAL => 3,
114         WARNING  => 2,
115         PENDING  => 1,
116         );
117
118      my $status_id = 0;
119      $status_id = $status_db{$status} if exists $status_db{$status};
120
121      #$hostcount{$host}++;
122      $hostcount{$host} ||= {count => 0, color => $color, status_id => $status_id, downtime => $downtime};
123      $hostcount{$host}->{'count'}++;
124      if (($status_id >= $hostcount{$host}->{'status_id'}) and ($downtime < $hostcount{$host}->{'downtime'})) {
125         $hostcount{$host}->{'downtime'}  = $downtime;
126         $hostcount{$host}->{'status_id'} = $status_id;
127         $hostcount{$host}->{'color'}     = $color;
128         }
129      }
130   }
131
132my $now = time;
133my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime $now;
134$year += 1900;
135$mon++;
136my $date = nosbreak(sprintf '%04i-%02i-%02i %02i:%02i', $year, $mon, $mday, $hour, $min);
137
138my $htmlpage = <<"ENDH";
139Content-Type: text/html
140
141<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
142<html lang="en">
143<head>
144 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
145 <title>Nagios  Velvice</title>
146 <link rel="stylesheet"    type="text/css"  href="$config->{'nagios-server'}{'stylesheets'}/velvice.css">
147 <link rel="shortcut icon" type="image/ico" href="$config->{'nagios-server'}{'image'}/favicon.ico">
148</head>
149<body>
150<div class="header">
151 <h1>
152  <ul>
153    <li>Nagios Velvice Alert Panel : <a href="$config->{'nagios-server'}{'portal-url'}">Core Server</a></li>
154    <li><small><a id="refresh" href="$cgi_script_name">$date</a></small></li>
155  </ul>
156 </h1>
157</div>
158ENDH
159
160my %service_name   = ();
161my %service_status = ();
162for my $srv (@serviceproblems) {
163   $service_name{$srv->service_description}++;
164   $service_status{$srv->status}++;
165   }
166
167if (scalar @serviceproblems == 0) {
168   $htmlpage .= "<p>No alert to recheck.</p>\n";
169   }
170else {
171
172   $htmlpage .= "<p>Alert to recheck - Level:\n";
173   $htmlpage .= join ",\n",
174      " <span class='button'><a href='$cgi_script_name?check=all'>ALL</a><small>" . scalar(@serviceproblems) . '</small></span>',
175      map(" <span class='button'><a href='$cgi_script_name?check=" . lc(uri_encode($_)) . "'>$_</a><small>$service_status{$_}</small></span>",
176         sort keys %service_status);
177   $htmlpage .= ".\n";
178   $htmlpage .= " <br />\n";
179   $htmlpage .= " Service:\n";
180   $htmlpage .= join ",\n",
181      map(" <span class='button'><a href='$cgi_script_name?check=" . lc(uri_encode($_)) . "'>" . nosbreak($_) . "</a><small>$service_name{$_}</small></span>",
182         sort keys %service_name);
183   $htmlpage .= ".\n";
184   $htmlpage .= "</p>\n";
185
186   my $nagios_cmd;
187   open $nagios_cmd, '>>', $config->{'nagios-server'}{'nagios-cmd'} or die "Can't open file filename: $!";
188
189   my %remote_sshdown = ();
190   my %remote_db      = ();
191   my $remote_flag;
192
193   my $current_host  = '';
194   $htmlpage .= "<table border=\"1\">\n";
195   SERVICE_PROBLEMS:
196   for my $srv (@serviceproblems) {
197      my $hostname = $srv->host_name;
198      my $service  = $srv->service_description;
199      my $status   = $srv->status;
200      my $downtime = downtime($srv->last_state_change);
201      my $output   = HTML::Entities::encode($srv->plugin_output) =~ s/^[A-Z_\s]+?[:-]//r;
202
203      my $color = $status eq 'CRITICAL' ? '#F88888' : '#FFFF00';
204      $color = alertcolor($color, $downtime);
205      my $stylecolor = "style='background:$color;'";
206      $htmlpage .= " <tr>\n";
207      if ($hostname ne $current_host) {
208         $current_host  = $hostname;
209         my $rowspan    = $hostcount{$hostname}->{'count'};
210         my $rowcolor   = "style='background:" . $hostcount{$hostname}->{'color'} . ";'";
211         $htmlpage .= "  <td $rowcolor rowspan='$rowspan'>"
212            . "<a href=\"$cgi_script_name?check=" . uri_encode($hostname) . '">&#8623;</a></td>' . "\n";
213         $htmlpage .= "  <td $rowcolor class='hoop' rowspan='$rowspan'>"
214            . "<a href=\"$config->{'nagios-server'}{'status-cgi'}?host=" . uri_encode($hostname) . "\">$hostname</a></td>\n";
215         }
216
217      my $bold;
218      ACTION_STYLE:
219      for my $act_name (keys %{$config->{'remote-action'}}) {
220         my $act_regex = $config->{'remote-action'}{$act_name}{'regex'};
221         $bold++ if $service =~ m/$act_regex/ and $config->{'remote-action'}{$act_name}{'style'} eq 'bold';
222         }
223      $htmlpage .= $bold ? "  <td $stylecolor class='hoop bold'>" : "  <td $stylecolor class='hoop'>";
224      $htmlpage .= "$service</td>\n";
225
226      $htmlpage .= "  <td $stylecolor class='hoop'>$status</td>\n";
227      $htmlpage .= "  <td $stylecolor class='comment'>$output</td>\n";
228      $htmlpage .= "  <td $stylecolor class='days'>$downtime days</td>\n";
229
230      if (($cgi_check =~ m/all/i)
231            or ($cgi_check =~ m/^$service$/i)
232            or ($cgi_check =~ m/critical/i and $status eq 'CRITICAL')
233            or ($cgi_check =~ m/warning/i  and $status eq 'WARNING')
234            or ($cgi_check =~ m/pending/i  and $status eq 'PENDING')
235            or ($cgi_check eq $hostname    and $status =~ m/^(CRITICAL|WARNING|PENDING)$/)
236            ) {
237         $now++;
238         my $interval = $srv->next_check() - $srv->last_check() || 300; # 5 * 60 = 300
239         $interval =  240 if $interval <  240;
240         $interval = 3000 if $interval > 3000;
241         my $future = $now + 20 + int(rand($interval - 20));
242
243         $htmlpage .= "  <td class='checking'>" . ($future - $now) . "</td>\n";
244         #$htmlpage .= " -- <b>CHECK</b> [$now/" . ($future - $now) . "]";
245         printf $nagios_cmd "[%lu] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%lu\n", $now, $hostname, $service, $now;
246         # delay future command
247         push @futurecheck, sprintf "[%lu] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%lu", $future, $hostname, $service, $future;
248         }
249
250      ACTION_PUSH_AND_DEPEND:
251      for my $act_name (keys %{$config->{'remote-action'}}) {
252         my $act_regex  = $config->{'remote-action'}{$act_name}{'regex'};
253         my $act_status = $config->{'remote-action'}{$act_name}{'status'} || 'ALL';
254         my $act_depend = $config->{'remote-action'}{$act_name}{'depend'} || 'SSH';
255
256         if ($service =~ m/$act_regex/ and ($act_status eq 'ALL' or $status =~ m/$act_status/)) {
257            $remote_db{$act_name} ||= [];
258            push @{$remote_db{$act_name}}, $hostname;
259            $remote_flag++;
260            }
261
262         # check depend service otherwise
263         $remote_sshdown{$act_depend} ||= {};
264         $remote_sshdown{$act_depend}->{$hostname}++ if $service =~ m/$act_depend/;
265         }
266
267      $htmlpage .= " </tr>\n";
268      }
269
270   $htmlpage .= "</table>\n";
271   close $nagios_cmd;
272
273   # host down
274   if (%hostdown) {
275      $htmlpage .= "<br />\n";
276      $htmlpage .= "<table border='1'>\n";
277      HOST_DOWN:
278      for my $host (sort keys %hostdown) {
279         my $host_stat = $hostdown{$host};
280         my $hostname = $host_stat->host_name;
281         my $downtime = downtime($host_stat->last_state_change);
282         my $color = alertcolor('#F88888', $downtime);
283         $htmlpage .= " <tr style='background:$color'>\n";
284         $htmlpage .= "  <td><a class='hoop' href=\"$config->{'nagios-server'}{'status-cgi'}?host=" . uri_encode($hostname) . "\">$hostname</a></td>\n";
285         my @host_service;
286         for my $srv ($log->list_services_on_host($host)) {
287            push @host_service, $log->service($host, $srv)->service_description;
288            }
289         $htmlpage .= "  <td><small>" . join(', ', @host_service) . "</small></td>\n";
290         $htmlpage .= "  <td style='text-align:right;'>$downtime days</td>\n";
291         $htmlpage .= " </tr>\n";
292         }
293      $htmlpage .= "</table>\n";
294      }
295
296   # remote action
297   if ($remote_flag) {
298      require Nagios::Object::Config;
299      my $parser = Nagios::Object::Config->new();
300      $parser->parse("/var/cache/nagios3/objects.cache");
301
302      $htmlpage .= "<div class='action'>\n";
303      REMOTE_ACTION:
304      for my $act_name (keys %remote_db) {
305         my $act_depend = $config->{'remote-action'}{$act_name}{'depend'} || 'SSH';
306
307         my @action = grep !exists $remote_sshdown{$act_depend}->{$_}, @{$remote_db{$act_name}};
308         if (@action) {
309            my $srv_title = $config->{'remote-action'}{$act_name}{'title'} || "Action: $act_name";
310            $htmlpage .= "<h2>$srv_title</h2>\n";
311            $htmlpage .= "<pre>\n";
312            my $remote_action = $config->{'remote-action'}{$act_name}{'command'};
313            $remote_action = $config->{'remote-action'}{$act_name}{'command-one'}
314               if @action == 1 and exists $config->{'remote-action'}{$act_name}{'command-one'};
315            my @hosts;
316            for my $host (@action) {
317               my $object = $parser->find_object("$host", "Nagios::Host");
318               push @hosts, hostmapping($object->address =~ s/\..*$//r);
319               }
320            my $hosts_list = join ' ', @hosts;
321            $htmlpage .= ' ' . $remote_action =~ s{\%m}{$hosts_list}r;
322            $htmlpage .= "</pre>\n";
323            }
324         }
325      $htmlpage .= "</div>\n";
326      }
327   }
328
329$htmlpage .= <<"ENDH";
330<hr clear="all">
331<div class="footer">
332 <b><a href="http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/NagiosVelvice">Velvice</a>
333   - version: $VERSION</b>
334   (<a href="http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.html">online manual</a>)
335   - Written by Gabriel Moreau
336 <ul>
337  <li>Licence GNU GPL version 2 or later and Perl equivalent</li>
338  <li>Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France</li>
339 </ul>
340</div>
341</body>
342</html>
343ENDH
344
345print $htmlpage;
346
347# delayed future check
348if (@futurecheck) {
349   sleep 2;
350   my $nagios_cmd;
351   open $nagios_cmd, '>>', $config->{'nagios-server'}{'nagios-cmd'} or die "Can't open file filename: $!";
352   print $nagios_cmd "$_\n" for @futurecheck;
353   close $nagios_cmd;
354   }
355
356__END__
357
358
359=head1 NAME
360
361velvice.cgi - nagios velvice alert panel
362
363=head1 USAGE
364
365 velvice.cgi
366 velvice.cgi?check=XXX
367
368
369=head1 DESCRIPTION
370
371=begin html
372
373<img width="700" alt="Nagios Velvice Alert Panel" title="Nagios Velvice Alert Panel" style="float:right" src="velvice.png" />
374
375=end html
376
377Nagios VELVICE is an acronym for "Nagios leVEL serVICE status".
378
379The Nagios web page is sometimes very graphically charged
380and does not necessarily contain the information you need at a glance.
381For example, it is quite complicated to restart controls on multiple hosts in one click.
382
383For example, a server that is down should take only one line and not one per service...
384Similarly, a service that has been down for 5 minutes or since yesterday
385has more weight than a service that has fallen for 15 days.
386
387With Velvice Panel, a broken down server takes only one line.
388Services that have been falling for a long time gradually lose their color and become pastel colors.
389
390With Velvice Panel, it is possible through a single click
391to redo a check of all services that are in the CRITICAL state.
392Similarly, it is possible to restart a check on all SSH services in breakdowns ...
393In order not to clog the Nagios server, checks are shifted by 2 seconds in time.
394
395There is also a link to the web page of the main Nagios server.
396For each computer, you have a direct link to its dedicated web page on this server.
397
398
399=head1 CONFIGURATION FILE SPECIFICATION
400
401The configuration file must be F</etc/nagios3/velvice.yml>.
402This is not a required file.
403The file is in YAML format because this is a human-readable text file style.
404Other formats could have been Plain XML, RDF, JSON... but they are much less readable.
405
406You can find in the software nagios-velvice an example of configuration:
407L<velvice.sample.yml|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.sample.yml>.
408This one is in fact the master reference specification!
409
410The main keys C<nagios-server> and C<color-downtime> have good default values.
411No secondary key is required...
412The Velvice script try hard to replace ~ by the good value automatically.
413
414 nagios-server:
415   status-file: /var/cache/nagios3/status.dat
416   nagios-cmd:  /var/lib/nagios3/rw/nagios.cmd
417   portal-url:  ~/nagios3/
418   status-cgi:  ~/cgi-bin/nagios3/status.cgi
419   stylesheets: ~/nagios3/stylesheets
420
421The background color of the faulty service line display remains stable with a bright color for at least 3 days.
422Then, it decreases and becomes pastel after 53 days with an intensity of 70% (100% is white and 0% is black).
423
424 color-downtime:
425   day-min:  3
426   day-max: 50
427   factor:   0.7
428
429With key C<host-mapping>,
430it's good to map C<localhost> to the real name of the computer (hostname).
431
432 host-mapping:
433   localhost:  srv-nagios
434   toto:       titi
435
436The only important key is C<remote-action>.
437You can affiliate as many subkeys as you want.
438Let's take an example:
439
440 remote-action:
441   oom-killer:
442     regex: ^OOM Killer
443     title:  OOM Killer
444     command:     tssh -c 'sudo rm /var/lib/nagios3/nagios_oom_killer.log' %m
445     command-one: ssh %m 'sudo rm /var/lib/nagios3/nagios_oom_killer.log'
446     depend: ^SSH
447     status: ALL
448     style: bold
449
450C<oom-killer> is just a key for your remote action.
451The regex is used to find which service has a problem...
452The title is use in the result web page (not mandatory - otherwise, it will be C<Action: oom-killer>).
453The C<command> is just written on this web page.
454You have the responsibility to copy / cut it on a terminal.
455For security reasons, the nagios server does not have the right to launch the command on the remote host.
456The wildcard C<%m> is replaced by the list of the host (separated by the space).
457Sometime, the command could be different if there is only one computer (just SSH and no parallel SSH).
458If your command is based on SSH,
459you can have an SSH action only if the remote SSH is running.
460So you can make the remote action depend on the SSH service through a regular expression of your choice.
461
462The last two keys.
463The C<status> key is for CRITICAL or WARNING (or ALL).
464The key C<style> is there to mark in bold the service in error on the web page.
465
466=head1 SEE ALSO
467
468yamllint(1), ysh(1), YAML, Nagios::StatusLog, Color::Calc
469
470In Debian GNU/Linux distribution, packages for C<yamllint> and C<ysh> are:
471
472=over
473
474=item * C<yamllint> - Linter for YAML files (Python)
475
476=item * C<libyaml-shell-perl> - YAML test shell (Perl)
477
478=back
479
480
481Own project ressources:
482
483=over
484
485=item * L<Web Site|http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/NagiosVelvice>
486
487=item * L<Online Manual|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.html>
488
489=item * L<SVN Repository|http://servforge.legi.grenoble-inp.fr/svn/soft-trokata/trunk/nagios-velvice>
490
491=item * L<Debian Package|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/download/>
492
493=back
494
495
496=head1 VERSION
497
498$Id: velvice.cgi 319 2018-07-20 10:24:13Z g7moreau $
499
500
501=head1 AUTHOR
502
503Written by Gabriel Moreau <Gabriel.Moreau(A)univ-grenoble-alpes.fr>, LEGI UMR 5519, CNRS, Grenoble - France
504
505
506=head1 LICENSE AND COPYRIGHT
507
508Licence GNU GPL version 2 or later and Perl equivalent
509
510Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
Note: See TracBrowser for help on using the repository browser.