source: trunk/nagios-velvice/velvice.cgi @ 321

Last change on this file since 321 was 321, checked in by g7moreau, 6 years ago
  • Better constant on top
  • Property svn:keywords set to Id
File size: 18.5 KB
Line 
1#!/usr/bin/env perl
2#
3# 2014/05/15 Gabriel Moreau <Gabriel.Moreau@univ-grenoble-alpes.fr>
4# 2017/06/22 Gabriel Moreau - big update
5# 2018/06/25 Gabriel Moreau - make velvice generic
6#
7# velvice.cgi
8# Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
9#
10# Need NagiosStatus http://exchange.nagios.org/directory/Addons/APIs/Perl/NagiosStatus-2Epm/details
11# Possible command http://old.nagios.org/developerinfo/externalcommands/commandlist.php
12#
13# apt-get install perl-modules libnagios-object-perl libhtml-parser-perl liburi-encode-perl libcolor-calc-perl libyaml-syck-perl
14
15use strict;
16use warnings;
17use version; our $VERSION = version->declare('0.8.10');
18
19use CGI;
20use HTML::Entities ();
21use Nagios::StatusLog;
22use URI::Encode qw(uri_encode uri_decode);
23use Color::Calc ();
24use YAML::Syck;
25
26my $query           = CGI->new();
27my $cgi_check       = uri_decode($query->param('check'));
28my $cgi_script_name = $query->script_name();
29my $cgi_path        = $cgi_script_name =~ s{/[^/]+\.cgi$}{}r;
30undef $query;
31
32my %STATUS_DB = (
33   CRITICAL => {id => 3, color => '#F88888'},
34   WARNING  => {id => 2, color => '#FFFF00'},
35   PENDING  => {id => 1, color => '#E0E0E0'},
36   );
37
38my $config = {};
39$config = YAML::Syck::LoadFile('/etc/nagios3/velvice.yml') if -e '/etc/nagios3/velvice.yml';
40$config->{'nagios-server'}                ||= {};
41$config->{'nagios-server'}{'status-file'} ||= '/var/cache/nagios3/status.dat';
42$config->{'nagios-server'}{'nagios-cmd'}  ||= '/var/lib/nagios3/rw/nagios.cmd';
43$config->{'nagios-server'}{'portal-url'}  ||= $cgi_path =~ s{/cgi-bin/}{/}r . '/';
44$config->{'nagios-server'}{'status-cgi'}  ||= "$cgi_path/status.cgi";
45$config->{'nagios-server'}{'stylesheets'} ||= $config->{'nagios-server'}{'portal-url'} =~ s{/?$}{/stylesheets}r;
46$config->{'nagios-server'}{'image'}       ||= $config->{'nagios-server'}{'portal-url'} =~ s{/?$}{/image}r;
47$config->{'host-mapping'}                 ||= {};
48$config->{'color-downtime'}               ||= {};
49$config->{'color-downtime'}{'day-min'}    ||=  3;
50$config->{'color-downtime'}{'day-max'}    ||= 50;
51$config->{'color-downtime'}{'factor'}     ||=  0.7;
52$config->{'remote-action'}                ||= {};
53
54my $log = Nagios::StatusLog->new(
55   Filename => $config->{'nagios-server'}{'status-file'},
56   Version  => 3.0
57   );
58
59sub hostmapping {
60   my $host = shift;
61
62   return exists $config->{'host-mapping'}{$host} ? $config->{'host-mapping'}{$host} : $host;
63   }
64
65sub downtime {
66   my ($time_change) = @_;
67
68   my $now = time;
69   return sprintf '%.1f', ($now - $time_change) / (60 * 3600);
70   }
71
72sub alertcolor {
73   my ($status, $downtime) = @_;
74
75   my $color = '#0000FF';
76   $color = $STATUS_DB{$status}->{'color'} if exists $STATUS_DB{$status};
77
78   $downtime = $downtime - $config->{'color-downtime'}{'day-min'}; # same color first days
79   $downtime = $config->{'color-downtime'}{'day-max'} if $downtime > $config->{'color-downtime'}{'day-max'}; # max 50 days for color
80   $downtime =  0 if $downtime <  0;
81
82   my $factor = ($downtime * $config->{'color-downtime'}{'factor'}) / $config->{'color-downtime'}{'day-max'};
83   return Color::Calc::color_light_html($color, $factor);
84   }
85
86sub nosbreak {
87   my ($str) = @_;
88   
89   return $str =~ s/\s/\&nbsp;/gr;
90   }
91
92my %hostdown;
93my @serviceproblems;
94my %hostcount;
95my @futurecheck;
96HOST:
97for my $host (sort $log->list_hosts()) {
98   my $host_stat = $log->host($host);
99
100   if ($host_stat->status eq 'DOWN') {TESTIF:{
101      for my $srv ($log->list_services_on_host($host)) {
102         last TESTIF if $log->service($host, $srv)->status eq 'OK' or $log->service($host, $srv)->status eq 'PENDING';
103         }
104
105      $hostdown{$host} = $host_stat;
106      next HOST;
107      }}
108
109   SRV:
110   for my $srv ($log->list_services_on_host($host)) {
111      my $status = $log->service($host, $srv)->status;
112
113      next SRV if $status eq 'OK';
114
115      push @serviceproblems, $log->service($host, $srv);
116   
117      my $downtime = downtime($log->service($host, $srv)->last_state_change);
118      my $color    = alertcolor($status, $downtime);
119
120      my $status_id = 0;
121      $status_id = $STATUS_DB{$status}->{'id'} if exists $STATUS_DB{$status};
122
123      #$hostcount{$host}++;
124      $hostcount{$host} ||= {count => 0, color => $color, status_id => $status_id, downtime => $downtime};
125      $hostcount{$host}->{'count'}++;
126      if (($status_id >= $hostcount{$host}->{'status_id'}) and ($downtime < $hostcount{$host}->{'downtime'})) {
127         $hostcount{$host}->{'downtime'}  = $downtime;
128         $hostcount{$host}->{'status_id'} = $status_id;
129         $hostcount{$host}->{'color'}     = $color;
130         }
131      }
132   }
133
134my $now = time;
135my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime $now;
136$year += 1900;
137$mon++;
138my $date = nosbreak(sprintf '%04i-%02i-%02i %02i:%02i', $year, $mon, $mday, $hour, $min);
139
140my $htmlpage = <<"ENDH";
141Content-Type: text/html
142
143<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
144<html lang="en">
145<head>
146 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
147 <title>Nagios  Velvice</title>
148 <link rel="stylesheet"    type="text/css"  href="$config->{'nagios-server'}{'stylesheets'}/velvice.css">
149 <link rel="shortcut icon" type="image/ico" href="$config->{'nagios-server'}{'image'}/favicon.ico">
150</head>
151<body>
152<div class="header">
153 <h1>
154  <ul>
155    <li>Nagios Velvice Alert Panel : <a href="$config->{'nagios-server'}{'portal-url'}">Core Server</a></li>
156    <li><small><a id="refresh" href="$cgi_script_name">$date</a></small></li>
157  </ul>
158 </h1>
159</div>
160ENDH
161
162my %service_name   = ();
163my %service_status = ();
164for my $srv (@serviceproblems) {
165   $service_name{$srv->service_description}++;
166   $service_status{$srv->status}++;
167   }
168
169if (scalar @serviceproblems == 0) {
170   $htmlpage .= "<p>No alert to recheck.</p>\n";
171   }
172else {
173
174   $htmlpage .= "<p>Alert to recheck - Level:\n";
175   $htmlpage .= join ",\n",
176      " <span class='button'><a href='$cgi_script_name?check=all'>ALL</a><small>" . scalar(@serviceproblems) . '</small></span>',
177      map(" <span class='button'><a href='$cgi_script_name?check=" . lc(uri_encode($_)) . "'>$_</a><small>$service_status{$_}</small></span>",
178         sort keys %service_status);
179   $htmlpage .= ".\n";
180   $htmlpage .= " <br />\n";
181   $htmlpage .= " Service:\n";
182   $htmlpage .= join ",\n",
183      map(" <span class='button'><a href='$cgi_script_name?check=" . lc(uri_encode($_)) . "'>" . nosbreak($_) . "</a><small>$service_name{$_}</small></span>",
184         sort keys %service_name);
185   $htmlpage .= ".\n";
186   $htmlpage .= "</p>\n";
187
188   my $nagios_cmd;
189   open $nagios_cmd, '>>', $config->{'nagios-server'}{'nagios-cmd'} or die "Can't open file filename: $!";
190
191   my %remote_sshdown = ();
192   my %remote_db      = ();
193   my $remote_flag;
194
195   my $current_host  = '';
196   $htmlpage .= "<table border=\"1\">\n";
197   SERVICE_PROBLEMS:
198   for my $srv (@serviceproblems) {
199      my $hostname = $srv->host_name;
200      my $service  = $srv->service_description;
201      my $status   = $srv->status;
202      my $downtime = downtime($srv->last_state_change);
203      my $output   = HTML::Entities::encode($srv->plugin_output) =~ s/^[A-Z_\s]+?[:-]//r;
204
205      my $color = alertcolor($status, $downtime);
206      my $stylecolor = "style='background:$color;'";
207      $htmlpage .= " <tr>\n";
208      if ($hostname ne $current_host) {
209         $current_host  = $hostname;
210         my $rowspan    = $hostcount{$hostname}->{'count'};
211         my $rowcolor   = "style='background:" . $hostcount{$hostname}->{'color'} . ";'";
212         $htmlpage .= "  <td $rowcolor rowspan='$rowspan'>"
213            . "<a href=\"$cgi_script_name?check=" . uri_encode($hostname) . '">&#8623;</a></td>' . "\n";
214         $htmlpage .= "  <td $rowcolor class='hoop' rowspan='$rowspan'>"
215            . "<a href=\"$config->{'nagios-server'}{'status-cgi'}?host=" . uri_encode($hostname) . "\">$hostname</a></td>\n";
216         }
217
218      my $bold;
219      ACTION_STYLE:
220      for my $act_name (keys %{$config->{'remote-action'}}) {
221         my $act_regex = $config->{'remote-action'}{$act_name}{'regex'};
222         $bold++ if $service =~ m/$act_regex/ and $config->{'remote-action'}{$act_name}{'style'} eq 'bold';
223         }
224      $htmlpage .= $bold ? "  <td $stylecolor class='hoop bold'>" : "  <td $stylecolor class='hoop'>";
225      $htmlpage .= "$service</td>\n";
226
227      $htmlpage .= "  <td $stylecolor class='hoop'>$status</td>\n";
228      $htmlpage .= "  <td $stylecolor class='comment'>$output</td>\n";
229      $htmlpage .= "  <td $stylecolor class='days'>$downtime days</td>\n";
230
231      if (($cgi_check =~ m/all/i)
232            or ($cgi_check =~ m/^$service$/i)
233            or ($cgi_check =~ m/critical/i and $status eq 'CRITICAL')
234            or ($cgi_check =~ m/warning/i  and $status eq 'WARNING')
235            or ($cgi_check =~ m/pending/i  and $status eq 'PENDING')
236            or ($cgi_check eq $hostname    and $status =~ m/^(CRITICAL|WARNING|PENDING)$/)
237            ) {
238         $now++;
239         my $interval = $srv->next_check() - $srv->last_check() || 300; # 5 * 60 = 300
240         $interval =  240 if $interval <  240;
241         $interval = 3000 if $interval > 3000;
242         my $future = $now + 20 + int(rand($interval - 20));
243
244         $htmlpage .= "  <td class='checking'>" . ($future - $now) . "</td>\n";
245         #$htmlpage .= " -- <b>CHECK</b> [$now/" . ($future - $now) . "]";
246         printf $nagios_cmd "[%lu] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%lu\n", $now, $hostname, $service, $now;
247         # delay future command
248         push @futurecheck, sprintf "[%lu] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%lu", $future, $hostname, $service, $future;
249         }
250
251      ACTION_PUSH_AND_DEPEND:
252      for my $act_name (keys %{$config->{'remote-action'}}) {
253         my $act_regex  = $config->{'remote-action'}{$act_name}{'regex'};
254         my $act_status = $config->{'remote-action'}{$act_name}{'status'} || 'ALL';
255         my $act_depend = $config->{'remote-action'}{$act_name}{'depend'} || 'SSH';
256
257         if ($service =~ m/$act_regex/ and ($act_status eq 'ALL' or $status =~ m/$act_status/)) {
258            $remote_db{$act_name} ||= [];
259            push @{$remote_db{$act_name}}, $hostname;
260            $remote_flag++;
261            }
262
263         # check depend service otherwise
264         $remote_sshdown{$act_depend} ||= {};
265         $remote_sshdown{$act_depend}->{$hostname}++ if $service =~ m/$act_depend/;
266         }
267
268      $htmlpage .= " </tr>\n";
269      }
270
271   $htmlpage .= "</table>\n";
272   close $nagios_cmd;
273
274   # host down
275   if (%hostdown) {
276      $htmlpage .= "<br />\n";
277      $htmlpage .= "<table border='1'>\n";
278      HOST_DOWN:
279      for my $host (sort keys %hostdown) {
280         my $host_stat = $hostdown{$host};
281         my $hostname = $host_stat->host_name;
282         my $downtime = downtime($host_stat->last_state_change);
283         my $color = alertcolor('CRITICAL', $downtime);
284         $htmlpage .= " <tr style='background:$color'>\n";
285         $htmlpage .= "  <td><a class='hoop' href=\"$config->{'nagios-server'}{'status-cgi'}?host=" . uri_encode($hostname) . "\">$hostname</a></td>\n";
286         my @host_service;
287         for my $srv ($log->list_services_on_host($host)) {
288            push @host_service, $log->service($host, $srv)->service_description;
289            }
290         $htmlpage .= "  <td><small>" . join(', ', @host_service) . "</small></td>\n";
291         $htmlpage .= "  <td style='text-align:right;'>$downtime days</td>\n";
292         $htmlpage .= " </tr>\n";
293         }
294      $htmlpage .= "</table>\n";
295      }
296
297   # remote action
298   if ($remote_flag) {
299      require Nagios::Object::Config;
300      my $parser = Nagios::Object::Config->new();
301      $parser->parse("/var/cache/nagios3/objects.cache");
302
303      $htmlpage .= "<div class='action'>\n";
304      REMOTE_ACTION:
305      for my $act_name (keys %remote_db) {
306         my $act_depend = $config->{'remote-action'}{$act_name}{'depend'} || 'SSH';
307
308         my @action = grep !exists $remote_sshdown{$act_depend}->{$_}, @{$remote_db{$act_name}};
309         if (@action) {
310            my $srv_title = $config->{'remote-action'}{$act_name}{'title'} || "Action: $act_name";
311            $htmlpage .= "<h2>$srv_title</h2>\n";
312            $htmlpage .= "<pre>\n";
313            my $remote_action = $config->{'remote-action'}{$act_name}{'command'};
314            $remote_action = $config->{'remote-action'}{$act_name}{'command-one'}
315               if @action == 1 and exists $config->{'remote-action'}{$act_name}{'command-one'};
316            my @hosts;
317            for my $host (@action) {
318               my $object = $parser->find_object("$host", "Nagios::Host");
319               push @hosts, hostmapping($object->address =~ s/\..*$//r);
320               }
321            my $hosts_list = join ' ', @hosts;
322            $htmlpage .= ' ' . $remote_action =~ s{\%m}{$hosts_list}r;
323            $htmlpage .= "</pre>\n";
324            }
325         }
326      $htmlpage .= "</div>\n";
327      }
328   }
329
330$htmlpage .= <<"ENDH";
331<hr clear="all">
332<div class="footer">
333 <b><a href="http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/NagiosVelvice">Velvice</a>
334   - version: $VERSION</b>
335   (<a href="http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.html">online manual</a>)
336   - Written by Gabriel Moreau
337 <ul>
338  <li>Licence GNU GPL version 2 or later and Perl equivalent</li>
339  <li>Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France</li>
340 </ul>
341</div>
342</body>
343</html>
344ENDH
345
346print $htmlpage;
347
348# delayed future check
349if (@futurecheck) {
350   sleep 2;
351   my $nagios_cmd;
352   open $nagios_cmd, '>>', $config->{'nagios-server'}{'nagios-cmd'} or die "Can't open file filename: $!";
353   print $nagios_cmd "$_\n" for @futurecheck;
354   close $nagios_cmd;
355   }
356
357__END__
358
359
360=head1 NAME
361
362velvice.cgi - nagios velvice alert panel
363
364=head1 USAGE
365
366 velvice.cgi
367 velvice.cgi?check=XXX
368
369
370=head1 DESCRIPTION
371
372=begin html
373
374<img width="700" alt="Nagios Velvice Alert Panel" title="Nagios Velvice Alert Panel" style="float:right" src="velvice.png" />
375
376=end html
377
378Nagios VELVICE is an acronym for "Nagios leVEL serVICE status".
379
380The Nagios web page is sometimes very graphically charged
381and does not necessarily contain the information you need at a glance.
382For example, it is quite complicated to restart controls on multiple hosts in one click.
383
384For example, a server that is down should take only one line and not one per service...
385Similarly, a service that has been down for 5 minutes or since yesterday
386has more weight than a service that has fallen for 15 days.
387
388With Velvice Panel, a broken down server takes only one line.
389Services that have been falling for a long time gradually lose their color and become pastel colors.
390
391With Velvice Panel, it is possible through a single click
392to redo a check of all services that are in the CRITICAL state.
393Similarly, it is possible to restart a check on all SSH services in breakdowns ...
394In order not to clog the Nagios server, checks are shifted by 2 seconds in time.
395
396There is also a link to the web page of the main Nagios server.
397For each computer, you have a direct link to its dedicated web page on this server.
398
399
400=head1 CONFIGURATION FILE SPECIFICATION
401
402The configuration file must be F</etc/nagios3/velvice.yml>.
403This is not a required file.
404The file is in YAML format because this is a human-readable text file style.
405Other formats could have been Plain XML, RDF, JSON... but they are much less readable.
406
407You can find in the software nagios-velvice an example of configuration:
408L<velvice.sample.yml|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.sample.yml>.
409This one is in fact the master reference specification!
410
411The main keys C<nagios-server> and C<color-downtime> have good default values.
412No secondary key is required...
413The Velvice script try hard to replace ~ by the good value automatically.
414
415 nagios-server:
416   status-file: /var/cache/nagios3/status.dat
417   nagios-cmd:  /var/lib/nagios3/rw/nagios.cmd
418   portal-url:  ~/nagios3/
419   status-cgi:  ~/cgi-bin/nagios3/status.cgi
420   stylesheets: ~/nagios3/stylesheets
421
422The background color of the faulty service line display remains stable with a bright color for at least 3 days.
423Then, it decreases and becomes pastel after 53 days with an intensity of 70% (100% is white and 0% is black).
424
425 color-downtime:
426   day-min:  3
427   day-max: 50
428   factor:   0.7
429
430With key C<host-mapping>,
431it's good to map C<localhost> to the real name of the computer (hostname).
432
433 host-mapping:
434   localhost:  srv-nagios
435   toto:       titi
436
437The only important key is C<remote-action>.
438You can affiliate as many subkeys as you want.
439Let's take an example:
440
441 remote-action:
442   oom-killer:
443     regex: ^OOM Killer
444     title:  OOM Killer
445     command:     tssh -c 'sudo rm /var/lib/nagios3/nagios_oom_killer.log' %m
446     command-one: ssh %m 'sudo rm /var/lib/nagios3/nagios_oom_killer.log'
447     depend: ^SSH
448     status: ALL
449     style: bold
450
451C<oom-killer> is just a key for your remote action.
452The regex is used to find which service has a problem...
453The title is use in the result web page (not mandatory - otherwise, it will be C<Action: oom-killer>).
454The C<command> is just written on this web page.
455You have the responsibility to copy / cut it on a terminal.
456For security reasons, the nagios server does not have the right to launch the command on the remote host.
457The wildcard C<%m> is replaced by the list of the host (separated by the space).
458Sometime, the command could be different if there is only one computer (just SSH and no parallel SSH).
459If your command is based on SSH,
460you can have an SSH action only if the remote SSH is running.
461So you can make the remote action depend on the SSH service through a regular expression of your choice.
462
463The last two keys.
464The C<status> key is for CRITICAL or WARNING (or ALL).
465The key C<style> is there to mark in bold the service in error on the web page.
466
467=head1 SEE ALSO
468
469yamllint(1), ysh(1), YAML, Nagios::StatusLog, Color::Calc
470
471In Debian GNU/Linux distribution, packages for C<yamllint> and C<ysh> are:
472
473=over
474
475=item * C<yamllint> - Linter for YAML files (Python)
476
477=item * C<libyaml-shell-perl> - YAML test shell (Perl)
478
479=back
480
481
482Own project ressources:
483
484=over
485
486=item * L<Web Site|http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/NagiosVelvice>
487
488=item * L<Online Manual|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.html>
489
490=item * L<SVN Repository|http://servforge.legi.grenoble-inp.fr/svn/soft-trokata/trunk/nagios-velvice>
491
492=item * L<Debian Package|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/download/>
493
494=back
495
496
497=head1 VERSION
498
499$Id: velvice.cgi 321 2018-07-20 11:45:18Z g7moreau $
500
501
502=head1 AUTHOR
503
504Written by Gabriel Moreau <Gabriel.Moreau(A)univ-grenoble-alpes.fr>, LEGI UMR 5519, CNRS, Grenoble - France
505
506
507=head1 LICENSE AND COPYRIGHT
508
509Licence GNU GPL version 2 or later and Perl equivalent
510
511Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
Note: See TracBrowser for help on using the repository browser.