source: trunk/nagios-velvice/velvice.cgi @ 275

Last change on this file since 275 was 275, checked in by g7moreau, 6 years ago
  • Add image in html manual
  • Property svn:keywords set to Id
File size: 16.4 KB
Line 
1#!/usr/bin/env perl
2#
3# 2014/05/15 Gabriel Moreau <Gabriel.Moreau@univ-grenoble-alpes.fr>
4# 2017/06/22 Gabriel Moreau - big update
5# 2018/06/25 Gabriel Moreau - make velvice generic
6#
7# velvice.cgi
8# Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
9#
10# Need NagiosStatus http://exchange.nagios.org/directory/Addons/APIs/Perl/NagiosStatus-2Epm/details
11# Possible command http://old.nagios.org/developerinfo/externalcommands/commandlist.php
12#
13# apt-get install perl-modules libnagios-object-perl libhtml-parser-perl liburi-encode-perl libcolor-calc-perl libyaml-syck-perl
14
15use strict;
16use warnings;
17use version; our $VERSION = version->declare('0.7.9');
18
19use CGI;
20use HTML::Entities ();
21use Nagios::StatusLog;
22use URI::Encode qw(uri_encode uri_decode);
23use Color::Calc ();
24use YAML::Syck;
25
26my $query           = CGI->new();
27my $cgi_check       = uri_decode($query->param('check'));
28my $cgi_script_name = $query->script_name();
29my $cgi_path        = $cgi_script_name =~ s{/[^/]+\.cgi$}{}r;
30undef $query;
31
32my $config = {};
33$config = YAML::Syck::LoadFile('/etc/nagios3/velvice.yml') if -e '/etc/nagios3/velvice.yml';
34$config->{'nagios-server'}                ||= {};
35$config->{'nagios-server'}{'status-file'} ||= '/var/cache/nagios3/status.dat';
36$config->{'nagios-server'}{'nagios-cmd'}  ||= '/var/lib/nagios3/rw/nagios.cmd';
37$config->{'nagios-server'}{'portal-url'}  ||= $cgi_path =~ s{/cgi-bin/}{/}r . '/';
38$config->{'nagios-server'}{'status-cgi'}  ||= "$cgi_path/status.cgi";
39$config->{'nagios-server'}{'stylesheets'} ||= $config->{'nagios-server'}{'portal-url'} =~ s{/?$}{/stylesheets}r;
40$config->{'host-mapping'}                 ||= {};
41$config->{'color-downtime'}               ||= {};
42$config->{'color-downtime'}{'day-min'}    ||=  3;
43$config->{'color-downtime'}{'day-max'}    ||= 50;
44$config->{'color-downtime'}{'factor'}     ||=  0.7;
45$config->{'remote-action'}                ||= {};
46
47my $log = Nagios::StatusLog->new(
48   Filename => $config->{'nagios-server'}{'status-file'},
49   Version  => 3.0
50   );
51
52sub hostmapping {
53   my $host = shift;
54
55   return exists $config->{'host-mapping'}{$host} ? $config->{'host-mapping'}{$host} : $host;
56   }
57
58sub downtime {
59   my ($time_change) = @_;
60
61   my $now = time;
62   return sprintf '%.1f', ($now - $time_change) / (60 * 3600);
63   }
64
65sub alertcolor {
66   my ($color, $downtime) = @_;
67
68   $downtime = $downtime - $config->{'color-downtime'}{'day-min'}; # same color first days
69   $downtime = $config->{'color-downtime'}{'day-max'} if $downtime > $config->{'color-downtime'}{'day-max'}; # max 50 days for color
70   $downtime =  0 if $downtime <  0;
71
72   my $factor = ($downtime * $config->{'color-downtime'}{'factor'}) / $config->{'color-downtime'}{'day-max'};
73   return Color::Calc::color_light_html($color, $factor);
74   }
75
76my %hostdown;
77my @serviceproblems;
78my %hostcount;
79my @futurecheck;
80HOST:
81for my $host (sort $log->list_hosts()) {
82   my $host_stat = $log->host($host);
83
84   if ($host_stat->status eq 'DOWN') {TESTIF:{
85      for my $srv ($log->list_services_on_host($host)) {
86         last TESTIF if $log->service($host, $srv)->status eq 'OK' or $log->service($host, $srv)->status eq 'PENDING';
87         }
88
89      $hostdown{$host} = $host_stat;
90      next HOST;
91      }}
92
93   for my $srv ($log->list_services_on_host($host)) {
94      if ($log->service($host, $srv)->status ne 'OK') {
95         push @serviceproblems, $log->service($host, $srv);
96         $hostcount{$host}++;
97         }
98      }
99   }
100
101my $now = time;
102my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime $now;
103$year += 1900;
104$mon++;
105my $date = sprintf '%04i-%02i-%02i %02i:%02i', $year, $mon, $mday, $hour, $min;
106
107my $htmlpage = <<"ENDH";
108Content-Type: text/html
109
110<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
111<html lang="en">
112<head>
113 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
114 <title>Nagios  Velvice</title>
115 <link rel="stylesheet" type="text/css" href="$config->{'nagios-server'}{'stylesheets'}/velvice.css">
116</head>
117<body>
118<div class="header">
119 <h1>
120  <ul>
121    <li>Nagios Velvice Alert Panel : <a href="$config->{'nagios-server'}{'portal-url'}">Core Server</a></li>
122    <li><small>(<a href="$cgi_script_name">UPDATE</a> - $date)</small></li>
123  </ul>
124 </h1>
125</div>
126ENDH
127
128my %service_name   = ();
129my %service_status = ();
130for my $srv (@serviceproblems) {
131   $service_name{$srv->service_description}++;
132   $service_status{$srv->status}++;
133   }
134
135if (scalar @serviceproblems == 0) {
136   $htmlpage .= "<p>No alert to recheck.</p>\n";
137   }
138else {
139
140   $htmlpage .= "<p>Alert to recheck - Level:\n";
141   $htmlpage .= join ",\n",
142      " <a href='$cgi_script_name?check=all'>ALL</a><small>(" . scalar(@serviceproblems) . ')</small>',
143      map(" <a href='$cgi_script_name?check=" . lc(uri_encode($_)) . "'>$_</a>($service_status{$_})", sort keys %service_status);
144   $htmlpage .= ".\n";
145   $htmlpage .= " <br />\n";
146   $htmlpage .= " Service:\n";
147   $htmlpage .= join ",\n", map(" <a href='$cgi_script_name?check=" . lc(uri_encode($_)) . "'>$_</a><small>($service_name{$_})</small>", sort keys %service_name);
148   $htmlpage .= ".\n";
149   $htmlpage .= "</p>\n";
150
151   my $nagios_cmd;
152   open $nagios_cmd, '>>', $config->{'nagios-server'}{'nagios-cmd'} or die "Can't open file filename: $!";
153
154   my %remote_sshdown = ();
155   my %remote_db      = ();
156   my $remote_flag;
157
158   my $current_host  = '';
159   $htmlpage .= "<table border=\"1\">\n";
160   SERVICE_PROBLEMS:
161   for my $srv (@serviceproblems) {
162      my $hostname = $srv->host_name;
163      my $service  = $srv->service_description;
164      my $status   = $srv->status;
165      my $downtime = downtime($srv->last_state_change);
166      my $output   = HTML::Entities::encode($srv->plugin_output) =~ s/^[A-Z_\s]+?[:-]//r;
167
168      my $color = $status eq 'CRITICAL' ? '#F88888' : '#FFFF00';
169      $color = alertcolor($color, $downtime);
170      $htmlpage .= " <tr style='background:$color;'>\n";
171      if ($hostname ne $current_host) {
172         $current_host = $hostname;
173         $htmlpage .= "  <td rowspan='$hostcount{$hostname}' style='vertical-align:middle;'><a href=\"$config->{'nagios-server'}{'status-cgi'}?host=$hostname\">$hostname</a></td>\n";
174         }
175
176      my $bold;
177      ACTION_STYLE:
178      for my $act_name (keys %{$config->{'remote-action'}}) {
179         my $act_regex = $config->{'remote-action'}{$act_name}{'regex'};
180         $bold++ if $service =~ m/$act_regex/ and $config->{'remote-action'}{$act_name}{'style'} eq 'bold';
181         }
182      $htmlpage .= $bold ? '  <td class="bold">' : '  <td>';
183      $htmlpage .= "$service</td>\n";
184
185      $htmlpage .= "  <td>$status</td>\n";
186      $htmlpage .= "  <td style='max-width:60%;'><small>$output";
187
188      if (($cgi_check =~ m/all/i)
189            or ($cgi_check =~ m/^$service$/i)
190            or ($cgi_check =~ m/critical/i and $status eq 'CRITICAL')
191            or ($cgi_check =~ m/warning/i  and $status eq 'WARNING')
192            or ($cgi_check =~ m/pending/i  and $status eq 'PENDING')
193            ) {
194         $now++;
195         my $interval = $srv->next_check() - $srv->last_check() || 300;
196         $interval =  240 if $interval <  240;
197         $interval = 3000 if $interval > 3000;
198         my $future = $now + 20 + int(rand($interval - 20)); # 5 * 60 = 300
199
200         $htmlpage .= " -- <b>CHECK</b> [$now/" . ($future - $now) . "]";
201         printf $nagios_cmd "[%lu] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%lu\n", $now, $hostname, $service, $now;
202         # delay future command
203         push @futurecheck, sprintf "[%lu] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%lu", $future, $hostname, $service, $future;
204         }
205
206      ACTION_PUSH_AND_DEPEND:
207      for my $act_name (keys %{$config->{'remote-action'}}) {
208         my $act_regex  = $config->{'remote-action'}{$act_name}{'regex'};
209         my $act_status = $config->{'remote-action'}{$act_name}{'status'} || 'ALL';
210         my $act_depend = $config->{'remote-action'}{$act_name}{'depend'} || 'SSH';
211
212         if ($service =~ m/$act_regex/ and ($act_status eq 'ALL' or $status =~ m/$act_status/)) {
213            $remote_db{$act_name} ||= [];
214            push @{$remote_db{$act_name}}, $hostname;
215            $remote_flag++;
216            }
217
218         # check depend service otherwise
219         $remote_sshdown{$act_depend} ||= {};
220         $remote_sshdown{$act_depend}->{$hostname}++ if $service =~ m/$act_depend/;
221         }
222
223      $htmlpage .= "</small></td>\n";
224      $htmlpage .= "  <td style='text-align:right;'>$downtime days</td>\n";
225      $htmlpage .= " </tr>\n";
226      }
227
228   $htmlpage .= "</table>\n";
229   close $nagios_cmd;
230
231   # host down
232   if (%hostdown) {
233      $htmlpage .= "<br />\n";
234      $htmlpage .= "<table border='1'>\n";
235      HOST_DOWN:
236      for my $host (sort keys %hostdown) {
237         my $host_stat = $hostdown{$host};
238         my $hostname = $host_stat->host_name;
239         my $downtime = downtime($host_stat->last_state_change);
240         my $color = alertcolor('#F88888', $downtime);
241         $htmlpage .= " <tr style='background:$color'>\n";
242         $htmlpage .= "  <td><a href=\"$config->{'nagios-server'}{'status-cgi'}?host=$hostname\">$hostname</a></td>\n";
243         my @host_service;
244         for my $srv ($log->list_services_on_host($host)) {
245            push @host_service, $log->service($host, $srv)->service_description;
246            }
247         $htmlpage .= "  <td><small>" . join(', ', @host_service) . "</small></td>\n";
248         $htmlpage .= "  <td style='text-align:right;'>$downtime days</td>\n";
249         $htmlpage .= " </tr>\n";
250         }
251      $htmlpage .= "</table>\n";
252      }
253
254   # remote action
255   if ($remote_flag) {
256      require Nagios::Object::Config;
257      my $parser = Nagios::Object::Config->new();
258      $parser->parse("/var/cache/nagios3/objects.cache");
259
260      REMOTE_ACTION:
261      for my $act_name (keys %remote_db) {
262         my $act_depend = $config->{'remote-action'}{$act_name}{'depend'} || 'SSH';
263
264         my @action = grep !exists $remote_sshdown{$act_depend}->{$_}, @{$remote_db{$act_name}};
265         if (@action) {
266            my $srv_title = $config->{'remote-action'}{$act_name}{'title'} || "Action: $act_name";
267            $htmlpage .= "<h2>$srv_title</h2>\n";
268            $htmlpage .= "<pre>\n";
269            my $remote_action = $config->{'remote-action'}{$act_name}{'command'};
270            $remote_action = $config->{'remote-action'}{$act_name}{'command-one'}
271               if @action == 1 and exists $config->{'remote-action'}{$act_name}{'command-one'};
272            my @hosts;
273            for my $host (@action) {
274               my $object = $parser->find_object("$host", "Nagios::Host");
275               push @hosts, hostmapping($object->address =~ s/\..*$//r);
276               }
277            my $hosts_list = join ' ', @hosts;
278            $htmlpage .= ' ' . $remote_action =~ s{\%m}{$hosts_list}r;
279            $htmlpage .= "</pre>\n";
280            }
281         }
282      }
283   }
284
285$htmlpage .= <<"ENDH";
286<div class="footer">
287 <hr clear="all">
288 <b><a href="http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/NagiosVelvice">Velvice</a>
289   - version: $VERSION</b>
290   (<a href="http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.html">online manual</a>)
291   - Written by Gabriel Moreau
292 <ul>
293  <li>Licence GNU GPL version 2 or later and Perl equivalent</li>
294  <li>Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France</li>
295 </ul>
296</div>
297</body>
298</html>
299ENDH
300
301print $htmlpage;
302
303# delayed future check
304if (@futurecheck) {
305   sleep 2;
306   my $nagios_cmd;
307   open $nagios_cmd, '>>', $config->{'nagios-server'}{'nagios-cmd'} or die "Can't open file filename: $!";
308   print $nagios_cmd "$_\n" for @futurecheck;
309   close $nagios_cmd;
310   }
311
312__END__
313
314
315=head1 NAME
316
317velvice.cgi - nagios velvice alert panel
318
319=head1 USAGE
320
321 velvice.cgi
322 velvice.cgi?check=XXX
323
324
325=head1 DESCRIPTION
326
327=begin html
328
329<img width="700" alt="Nagios Velvice Alert Panel" title="Nagios Velvice Alert Panel" style="float:right" src="velvice.png" />
330
331=end html
332
333Nagios VELVICE is an acronym for "Nagios leVEL serVICE status".
334
335The Nagios web page is sometimes very graphically charged
336and does not necessarily contain the information you need at a glance.
337For example, it is quite complicated to restart controls on multiple hosts in one click.
338
339For example, a server that is down should take only one line and not one per service...
340Similarly, a service that has been down for 5 minutes or since yesterday
341has more weight than a service that has fallen for 15 days.
342
343With Velvice Panel, a broken down server takes only one line.
344Services that have been falling for a long time gradually lose their color and become pastel colors.
345
346With Velvice Panel, it is possible through a single click
347to redo a check of all services that are in the CRITICAL state.
348Similarly, it is possible to restart a check on all SSH services in breakdowns ...
349In order not to clog the Nagios server, checks are shifted by 2 seconds in time.
350
351There is also a link to the web page of the main Nagios server.
352For each computer, you have a direct link to its dedicated web page on this server.
353
354
355=head1 CONFIGURATION FILE SPECIFICATION
356
357The configuration file must be F</etc/nagios3/velvice.yml>.
358This is not a required file.
359The file is in YAML format because this is a human-readable text file style.
360Other formats could have been Plain XML, RDF, JSON... but they are much less readable.
361
362You can find in the software nagios-velvice an example of configuration:
363L<velvice.sample.yml|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.sample.yml>.
364This one is in fact the master reference specification!
365
366The main keys C<nagios-server> and C<color-downtime> have good default values.
367No secondary key is required...
368The Velvice script try hard to replace ~ by the good value automatically.
369
370 nagios-server:
371   status-file: /var/cache/nagios3/status.dat
372   nagios-cmd:  /var/lib/nagios3/rw/nagios.cmd
373   portal-url:  ~/nagios3/
374   status-cgi:  ~/cgi-bin/nagios3/status.cgi
375   stylesheets: ~/nagios3/stylesheets
376
377The background color of the faulty service line display remains stable with a bright color for at least 3 days.
378Then, it decreases and becomes pastel after 53 days with an intensity of 70% (100% is white and 0% is black).
379
380 color-downtime:
381   day-min:  3
382   day-max: 50
383   factor:   0.7
384
385With key C<host-mapping>,
386it's good to map C<localhost> to the real name of the computer (hostname).
387
388 host-mapping:
389   localhost:  srv-nagios
390   toto:       titi
391
392The only important key is C<remote-action>.
393You can affiliate as many subkeys as you want.
394Let's take an example:
395
396 remote-action:
397   oom-killer:
398     regex: ^OOM Killer
399     title:  OOM Killer
400     command:     tssh -c 'sudo rm /var/lib/nagios3/nagios_oom_killer.log' %m
401     command-one: ssh %m 'sudo rm /var/lib/nagios3/nagios_oom_killer.log'
402     depend: ^SSH
403     status: ALL
404     style: bold
405
406C<oom-killer> is just a key for your remote action.
407The regex is used to find which service has a problem...
408The title is use in the result web page (not mandatory - otherwise, it will be C<Action: oom-killer>).
409The C<command> is just written on this web page.
410You have the responsibility to copy / cut it on a terminal.
411For security reasons, the nagios server does not have the right to launch the command on the remote host.
412The wildcard C<%m> is replaced by the list of the host (separated by the space).
413Sometime, the command could be different if there is only one computer (just SSH and no parallel SSH).
414If your command is based on SSH,
415you can have an SSH action only if the remote SSH is running.
416So you can make the remote action depend on the SSH service through a regular expression of your choice.
417
418The last two keys.
419The C<status> key is for CRITICAL or WARNING (or ALL).
420The key C<style> is there to mark in bold the service in error on the web page.
421
422=head1 SEE ALSO
423
424yamllint(1), ysh(1), YAML, Nagios::StatusLog, Color::Calc
425
426In Debian GNU/Linux distribution, packages for C<yamllint> and C<ysh> are:
427
428=over
429
430=item * C<yamllint> - Linter for YAML files (Python)
431
432=item * C<libyaml-shell-perl> - YAML test shell (Perl)
433
434=back
435
436
437Own project ressources:
438
439=over
440
441=item * L<Web site|http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/NagiosVelvice>
442
443=item * L<Online Manual|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.html>
444
445=item * L<SVN repository|http://servforge.legi.grenoble-inp.fr/svn/soft-trokata/trunk/nagios-velvice>
446
447=back
448
449
450=head1 VERSION
451
452$Id: velvice.cgi 275 2018-07-03 11:54:15Z g7moreau $
453
454
455=head1 AUTHOR
456
457Written by Gabriel Moreau <Gabriel.Moreau(A)univ-grenoble-alpes.fr>, LEGI UMR 5519, CNRS, Grenoble - France
458
459
460=head1 LICENSE AND COPYRIGHT
461
462Licence GNU GPL version 2 or later and Perl equivalent
463
464Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
Note: See TracBrowser for help on using the repository browser.