source: trunk/nagios-velvice/velvice.cgi @ 273

Last change on this file since 273 was 273, checked in by g7moreau, 6 years ago
  • Add online manual link
  • Property svn:keywords set to Id
File size: 16.2 KB
Line 
1#!/usr/bin/env perl
2#
3# 2014/05/15 Gabriel Moreau <Gabriel.Moreau@univ-grenoble-alpes.fr>
4# 2017/06/22 Gabriel Moreau - big update
5# 2018/06/25 Gabriel Moreau - make velvice generic
6#
7# velvice.cgi
8# Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
9#
10# Need NagiosStatus http://exchange.nagios.org/directory/Addons/APIs/Perl/NagiosStatus-2Epm/details
11# Possible command http://old.nagios.org/developerinfo/externalcommands/commandlist.php
12#
13# apt-get install perl-modules libnagios-object-perl libhtml-parser-perl liburi-encode-perl libcolor-calc-perl libyaml-syck-perl
14
15use strict;
16use warnings;
17use version; our $VERSION = version->declare('0.7.8');
18
19use CGI;
20use HTML::Entities ();
21use Nagios::StatusLog;
22use URI::Encode qw(uri_encode uri_decode);
23use Color::Calc ();
24use YAML::Syck;
25
26my $query           = CGI->new();
27my $cgi_check       = uri_decode($query->param('check'));
28my $cgi_script_name = $query->script_name();
29my $cgi_path        = $cgi_script_name =~ s{/[^/]+\.cgi$}{}r;
30undef $query;
31
32my $config = {};
33$config = YAML::Syck::LoadFile('/etc/nagios3/velvice.yml') if -e '/etc/nagios3/velvice.yml';
34$config->{'nagios-server'}                ||= {};
35$config->{'nagios-server'}{'status-file'} ||= '/var/cache/nagios3/status.dat';
36$config->{'nagios-server'}{'nagios-cmd'}  ||= '/var/lib/nagios3/rw/nagios.cmd';
37$config->{'nagios-server'}{'portal-url'}  ||= $cgi_path =~ s{/cgi-bin/}{/}r . '/';
38$config->{'nagios-server'}{'status-cgi'}  ||= "$cgi_path/status.cgi";
39$config->{'nagios-server'}{'stylesheets'} ||= $config->{'nagios-server'}{'portal-url'} =~ s{/?$}{/stylesheets}r;
40$config->{'host-mapping'}                 ||= {};
41$config->{'color-downtime'}               ||= {};
42$config->{'color-downtime'}{'day-min'}    ||=  3;
43$config->{'color-downtime'}{'day-max'}    ||= 50;
44$config->{'color-downtime'}{'factor'}     ||=  0.7;
45$config->{'remote-action'}                ||= {};
46
47my $log = Nagios::StatusLog->new(
48   Filename => $config->{'nagios-server'}{'status-file'},
49   Version  => 3.0
50   );
51
52sub hostmapping {
53   my $host = shift;
54
55   return exists $config->{'host-mapping'}{$host} ? $config->{'host-mapping'}{$host} : $host;
56   }
57
58sub downtime {
59   my ($time_change) = @_;
60
61   my $now = time;
62   return sprintf '%.1f', ($now - $time_change) / (60 * 3600);
63   }
64
65sub alertcolor {
66   my ($color, $downtime) = @_;
67
68   $downtime = $downtime - $config->{'color-downtime'}{'day-min'}; # same color first days
69   $downtime = $config->{'color-downtime'}{'day-max'} if $downtime > $config->{'color-downtime'}{'day-max'}; # max 50 days for color
70   $downtime =  0 if $downtime <  0;
71
72   my $factor = ($downtime * $config->{'color-downtime'}{'factor'}) / $config->{'color-downtime'}{'day-max'};
73   return Color::Calc::color_light_html($color, $factor);
74   }
75
76my %hostdown;
77my @serviceproblems;
78my %hostcount;
79my @futurecheck;
80HOST:
81for my $host (sort $log->list_hosts()) {
82   my $host_stat = $log->host($host);
83
84   if ($host_stat->status eq 'DOWN') {TESTIF:{
85      for my $srv ($log->list_services_on_host($host)) {
86         last TESTIF if $log->service($host, $srv)->status eq 'OK' or $log->service($host, $srv)->status eq 'PENDING';
87         }
88
89      $hostdown{$host} = $host_stat;
90      next HOST;
91      }}
92
93   for my $srv ($log->list_services_on_host($host)) {
94      if ($log->service($host, $srv)->status ne 'OK') {
95         push @serviceproblems, $log->service($host, $srv);
96         $hostcount{$host}++;
97         }
98      }
99   }
100
101my $now = time;
102my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime $now;
103$year += 1900;
104$mon++;
105my $date = sprintf '%04i-%02i-%02i %02i:%02i', $year, $mon, $mday, $hour, $min;
106
107my $htmlpage = <<"ENDH";
108Content-Type: text/html
109
110<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
111<html lang="en">
112<head>
113 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
114 <title>Nagios  Velvice</title>
115 <link rel="stylesheet" type="text/css" href="$config->{'nagios-server'}{'stylesheets'}/velvice.css">
116</head>
117<body>
118<h1>
119 <ul>
120   <li>Nagios Velvice Alert Panel : <a href="$config->{'nagios-server'}{'portal-url'}">Core Server</a></li>
121   <li><small>(<a href="$cgi_script_name">UPDATE</a> - $date)</small></li>
122 </ul>
123</h1>
124ENDH
125
126my %service_name   = ();
127my %service_status = ();
128for my $srv (@serviceproblems) {
129   $service_name{$srv->service_description}++;
130   $service_status{$srv->status}++;
131   }
132
133if (scalar @serviceproblems == 0) {
134   $htmlpage .= "<p>No alert to recheck.</p>\n";
135   }
136else {
137
138   $htmlpage .= "<p>Alert to recheck - Level:\n";
139   $htmlpage .= join ",\n",
140      " <a href='$cgi_script_name?check=all'>ALL</a><small>(" . scalar(@serviceproblems) . ')</small>',
141      map(" <a href='$cgi_script_name?check=" . lc(uri_encode($_)) . "'>$_</a>($service_status{$_})", sort keys %service_status);
142   $htmlpage .= ".\n";
143   $htmlpage .= " <br />\n";
144   $htmlpage .= " Service:\n";
145   $htmlpage .= join ",\n", map(" <a href='$cgi_script_name?check=" . lc(uri_encode($_)) . "'>$_</a><small>($service_name{$_})</small>", sort keys %service_name);
146   $htmlpage .= ".\n";
147   $htmlpage .= "</p>\n";
148
149   my $nagios_cmd;
150   open $nagios_cmd, '>>', $config->{'nagios-server'}{'nagios-cmd'} or die "Can't open file filename: $!";
151
152   my %remote_sshdown = ();
153   my %remote_db      = ();
154   my $remote_flag;
155
156   my $current_host  = '';
157   $htmlpage .= "<table border=\"1\">\n";
158   SERVICE_PROBLEMS:
159   for my $srv (@serviceproblems) {
160      my $hostname = $srv->host_name;
161      my $service  = $srv->service_description;
162      my $status   = $srv->status;
163      my $downtime = downtime($srv->last_state_change);
164      my $output   = HTML::Entities::encode($srv->plugin_output) =~ s/^[A-Z_\s]+?[:-]//r;
165
166      my $color = $status eq 'CRITICAL' ? '#F88888' : '#FFFF00';
167      $color = alertcolor($color, $downtime);
168      $htmlpage .= " <tr style='background:$color;'>\n";
169      if ($hostname ne $current_host) {
170         $current_host = $hostname;
171         $htmlpage .= "  <td rowspan='$hostcount{$hostname}' style='vertical-align:middle;'><a href=\"$config->{'nagios-server'}{'status-cgi'}?host=$hostname\">$hostname</a></td>\n";
172         }
173
174      my $bold;
175      ACTION_STYLE:
176      for my $act_name (keys %{$config->{'remote-action'}}) {
177         my $act_regex = $config->{'remote-action'}{$act_name}{'regex'};
178         $bold++ if $service =~ m/$act_regex/ and $config->{'remote-action'}{$act_name}{'style'} eq 'bold';
179         }
180      $htmlpage .= $bold ? '  <td class="bold">' : '  <td>';
181      $htmlpage .= "$service</td>\n";
182
183      $htmlpage .= "  <td>$status</td>\n";
184      $htmlpage .= "  <td style='max-width:60%;'><small>$output";
185
186      if (($cgi_check =~ m/all/i)
187            or ($cgi_check =~ m/^$service$/i)
188            or ($cgi_check =~ m/critical/i and $status eq 'CRITICAL')
189            or ($cgi_check =~ m/warning/i  and $status eq 'WARNING')
190            or ($cgi_check =~ m/pending/i  and $status eq 'PENDING')
191            ) {
192         $now++;
193         my $interval = $srv->next_check() - $srv->last_check() || 300;
194         $interval =  240 if $interval <  240;
195         $interval = 3000 if $interval > 3000;
196         my $future = $now + 20 + int(rand($interval - 20)); # 5 * 60 = 300
197
198         $htmlpage .= " -- <b>CHECK</b> [$now/" . ($future - $now) . "]";
199         printf $nagios_cmd "[%lu] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%lu\n", $now, $hostname, $service, $now;
200         # delay future command
201         push @futurecheck, sprintf "[%lu] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%lu", $future, $hostname, $service, $future;
202         }
203
204      ACTION_PUSH_AND_DEPEND:
205      for my $act_name (keys %{$config->{'remote-action'}}) {
206         my $act_regex  = $config->{'remote-action'}{$act_name}{'regex'};
207         my $act_status = $config->{'remote-action'}{$act_name}{'status'} || 'ALL';
208         my $act_depend = $config->{'remote-action'}{$act_name}{'depend'} || 'SSH';
209
210         if ($service =~ m/$act_regex/ and ($act_status eq 'ALL' or $status =~ m/$act_status/)) {
211            $remote_db{$act_name} ||= [];
212            push @{$remote_db{$act_name}}, $hostname;
213            $remote_flag++;
214            }
215
216         # check depend service otherwise
217         $remote_sshdown{$act_depend} ||= {};
218         $remote_sshdown{$act_depend}->{$hostname}++ if $service =~ m/$act_depend/;
219         }
220
221      $htmlpage .= "</small></td>\n";
222      $htmlpage .= "  <td style='text-align:right;'>$downtime days</td>\n";
223      $htmlpage .= " </tr>\n";
224      }
225
226   $htmlpage .= "</table>\n";
227   close $nagios_cmd;
228
229   # host down
230   if (%hostdown) {
231      $htmlpage .= "<br />\n";
232      $htmlpage .= "<table border='1'>\n";
233      HOST_DOWN:
234      for my $host (sort keys %hostdown) {
235         my $host_stat = $hostdown{$host};
236         my $hostname = $host_stat->host_name;
237         my $downtime = downtime($host_stat->last_state_change);
238         my $color = alertcolor('#F88888', $downtime);
239         $htmlpage .= " <tr style='background:$color'>\n";
240         $htmlpage .= "  <td><a href=\"$config->{'nagios-server'}{'status-cgi'}?host=$hostname\">$hostname</a></td>\n";
241         my @host_service;
242         for my $srv ($log->list_services_on_host($host)) {
243            push @host_service, $log->service($host, $srv)->service_description;
244            }
245         $htmlpage .= "  <td><small>" . join(', ', @host_service) . "</small></td>\n";
246         $htmlpage .= "  <td style='text-align:right;'>$downtime days</td>\n";
247         $htmlpage .= " </tr>\n";
248         }
249      $htmlpage .= "</table>\n";
250      }
251
252   # remote action
253   if ($remote_flag) {
254      require Nagios::Object::Config;
255      my $parser = Nagios::Object::Config->new();
256      $parser->parse("/var/cache/nagios3/objects.cache");
257
258      REMOTE_ACTION:
259      for my $act_name (keys %remote_db) {
260         my $act_depend = $config->{'remote-action'}{$act_name}{'depend'} || 'SSH';
261
262         my @action = grep !exists $remote_sshdown{$act_depend}->{$_}, @{$remote_db{$act_name}};
263         if (@action) {
264            my $srv_title = $config->{'remote-action'}{$act_name}{'title'} || "Action: $act_name";
265            $htmlpage .= "<h2>$srv_title</h2>\n";
266            $htmlpage .= "<pre>\n";
267            my $remote_action = $config->{'remote-action'}{$act_name}{'command'};
268            $remote_action = $config->{'remote-action'}{$act_name}{'command-one'}
269               if @action == 1 and exists $config->{'remote-action'}{$act_name}{'command-one'};
270            my @hosts;
271            for my $host (@action) {
272               my $object = $parser->find_object("$host", "Nagios::Host");
273               push @hosts, hostmapping($object->address =~ s/\..*$//r);
274               }
275            my $hosts_list = join ' ', @hosts;
276            $htmlpage .= ' ' . $remote_action =~ s{\%m}{$hosts_list}r;
277            $htmlpage .= "</pre>\n";
278            }
279         }
280      }
281   }
282
283$htmlpage .= <<"ENDH";
284<hr clear="all">
285<b><a href="http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/NagiosVelvice">Velvice</a> - version: $VERSION</b>
286(<a href="http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.html">online manual</a>)
287<ul>
288 <li>Written by Gabriel Moreau</li>
289 <li>Licence GNU GPL version 2 or later and Perl equivalent</li>
290 <li>Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France</li>
291</ul>
292</body>
293</html>
294ENDH
295
296print $htmlpage;
297
298# delayed future check
299if (@futurecheck) {
300   sleep 2;
301   my $nagios_cmd;
302   open $nagios_cmd, '>>', $config->{'nagios-server'}{'nagios-cmd'} or die "Can't open file filename: $!";
303   print $nagios_cmd "$_\n" for @futurecheck;
304   close $nagios_cmd;
305   }
306
307__END__
308
309
310=head1 NAME
311
312velvice.cgi - nagios velvice alert panel
313
314=head1 USAGE
315
316 velvice.cgi
317 velvice.cgi?check=XXX
318
319
320=head1 DESCRIPTION
321
322Nagios VELVICE is an acronym for "Nagios leVEL serVICE status".
323
324The Nagios web page is sometimes very graphically charged
325and does not necessarily contain the information you need at a glance.
326For example, it is quite complicated to restart controls on multiple hosts in one click.
327
328For example, a server that is down should take only one line and not one per service...
329Similarly, a service that has been down for 5 minutes or since yesterday
330has more weight than a service that has fallen for 15 days.
331
332With Velvice Panel, a broken down server takes only one line.
333Services that have been falling for a long time gradually lose their color and become pastel colors.
334
335With Velvice Panel, it is possible through a single click
336to redo a check of all services that are in the CRITICAL state.
337Similarly, it is possible to restart a check on all SSH services in breakdowns ...
338In order not to clog the Nagios server, checks are shifted by 2 seconds in time.
339
340There is also a link to the web page of the main Nagios server.
341For each computer, you have a direct link to its dedicated web page on this server.
342
343
344=head1 CONFIGURATION FILE SPECIFICATION
345
346The configuration file must be F</etc/nagios3/velvice.yml>.
347This is not a required file.
348The file is in YAML format because this is a human-readable text file style.
349Other formats could have been Plain XML, RDF, JSON... but they are much less readable.
350
351You can find in the software nagios-velvice an example of configuration:
352L<velvice.sample.yml|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.sample.yml>.
353This one is in fact the master reference specification!
354
355The main keys C<nagios-server> and C<color-downtime> have good default values.
356No secondary key is required...
357The Velvice script try hard to replace ~ by the good value automatically.
358
359 nagios-server:
360   status-file: /var/cache/nagios3/status.dat
361   nagios-cmd:  /var/lib/nagios3/rw/nagios.cmd
362   portal-url:  ~/nagios3/
363   status-cgi:  ~/cgi-bin/nagios3/status.cgi
364   stylesheets: ~/nagios3/stylesheets
365
366The background color of the faulty service line display remains stable with a bright color for at least 3 days.
367Then, it decreases and becomes pastel after 53 days with an intensity of 70% (100% is white and 0% is black).
368
369 color-downtime:
370   day-min:  3
371   day-max: 50
372   factor:   0.7
373
374With key C<host-mapping>,
375it's good to map C<localhost> to the real name of the computer (hostname).
376
377 host-mapping:
378   localhost:  srv-nagios
379   toto:       titi
380
381The only important key is C<remote-action>.
382You can affiliate as many subkeys as you want.
383Let's take an example:
384
385 remote-action:
386   oom-killer:
387     regex: ^OOM Killer
388     title:  OOM Killer
389     command:     tssh -c 'sudo rm /var/lib/nagios3/nagios_oom_killer.log' %m
390     command-one: ssh %m 'sudo rm /var/lib/nagios3/nagios_oom_killer.log'
391     depend: ^SSH
392     status: ALL
393     style: bold
394
395C<oom-killer> is just a key for your remote action.
396The regex is used to find which service has a problem...
397The title is use in the result web page (not mandatory - otherwise, it will be C<Action: oom-killer>).
398The C<command> is just written on this web page.
399You have the responsibility to copy / cut it on a terminal.
400For security reasons, the nagios server does not have the right to launch the command on the remote host.
401The wildcard C<%m> is replaced by the list of the host (separated by the space).
402Sometime, the command could be different if there is only one computer (just SSH and no parallel SSH).
403If your command is based on SSH,
404you can have an SSH action only if the remote SSH is running.
405So you can make the remote action depend on the SSH service through a regular expression of your choice.
406
407The last two keys.
408The C<status> key is for CRITICAL or WARNING (or ALL).
409The key C<style> is there to mark in bold the service in error on the web page.
410
411=head1 SEE ALSO
412
413yamllint(1), ysh(1), YAML, Nagios::StatusLog, Color::Calc
414
415In Debian GNU/Linux distribution, packages for C<yamllint> and C<ysh> are:
416
417=over
418
419=item * C<yamllint> - Linter for YAML files (Python)
420
421=item * C<libyaml-shell-perl> - YAML test shell (Perl)
422
423=back
424
425
426Own project ressources:
427
428=over
429
430=item * L<Web site|http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/NagiosVelvice>
431
432=item * L<Online Manual|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.html>
433
434=item * L<SVN repository|http://servforge.legi.grenoble-inp.fr/svn/soft-trokata/trunk/nagios-velvice>
435
436=back
437
438
439=head1 VERSION
440
441$Id: velvice.cgi 273 2018-06-26 18:47:26Z g7moreau $
442
443
444=head1 AUTHOR
445
446Written by Gabriel Moreau <Gabriel.Moreau(A)univ-grenoble-alpes.fr>, LEGI UMR 5519, CNRS, Grenoble - France
447
448
449=head1 LICENSE AND COPYRIGHT
450
451Licence GNU GPL version 2 or later and Perl equivalent
452
453Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
Note: See TracBrowser for help on using the repository browser.