source: trunk/nagios-velvice/velvice.cgi @ 280

Last change on this file since 280 was 280, checked in by g7moreau, 6 years ago
  • more fat design
  • Property svn:keywords set to Id
File size: 16.9 KB
Line 
1#!/usr/bin/env perl
2#
3# 2014/05/15 Gabriel Moreau <Gabriel.Moreau@univ-grenoble-alpes.fr>
4# 2017/06/22 Gabriel Moreau - big update
5# 2018/06/25 Gabriel Moreau - make velvice generic
6#
7# velvice.cgi
8# Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
9#
10# Need NagiosStatus http://exchange.nagios.org/directory/Addons/APIs/Perl/NagiosStatus-2Epm/details
11# Possible command http://old.nagios.org/developerinfo/externalcommands/commandlist.php
12#
13# apt-get install perl-modules libnagios-object-perl libhtml-parser-perl liburi-encode-perl libcolor-calc-perl libyaml-syck-perl
14
15use strict;
16use warnings;
17use version; our $VERSION = version->declare('0.8.1');
18
19use CGI;
20use HTML::Entities ();
21use Nagios::StatusLog;
22use URI::Encode qw(uri_encode uri_decode);
23use Color::Calc ();
24use YAML::Syck;
25
26my $query           = CGI->new();
27my $cgi_check       = uri_decode($query->param('check'));
28my $cgi_script_name = $query->script_name();
29my $cgi_path        = $cgi_script_name =~ s{/[^/]+\.cgi$}{}r;
30undef $query;
31
32my $config = {};
33$config = YAML::Syck::LoadFile('/etc/nagios3/velvice.yml') if -e '/etc/nagios3/velvice.yml';
34$config->{'nagios-server'}                ||= {};
35$config->{'nagios-server'}{'status-file'} ||= '/var/cache/nagios3/status.dat';
36$config->{'nagios-server'}{'nagios-cmd'}  ||= '/var/lib/nagios3/rw/nagios.cmd';
37$config->{'nagios-server'}{'portal-url'}  ||= $cgi_path =~ s{/cgi-bin/}{/}r . '/';
38$config->{'nagios-server'}{'status-cgi'}  ||= "$cgi_path/status.cgi";
39$config->{'nagios-server'}{'stylesheets'} ||= $config->{'nagios-server'}{'portal-url'} =~ s{/?$}{/stylesheets}r;
40$config->{'host-mapping'}                 ||= {};
41$config->{'color-downtime'}               ||= {};
42$config->{'color-downtime'}{'day-min'}    ||=  3;
43$config->{'color-downtime'}{'day-max'}    ||= 50;
44$config->{'color-downtime'}{'factor'}     ||=  0.7;
45$config->{'remote-action'}                ||= {};
46
47my $log = Nagios::StatusLog->new(
48   Filename => $config->{'nagios-server'}{'status-file'},
49   Version  => 3.0
50   );
51
52sub hostmapping {
53   my $host = shift;
54
55   return exists $config->{'host-mapping'}{$host} ? $config->{'host-mapping'}{$host} : $host;
56   }
57
58sub downtime {
59   my ($time_change) = @_;
60
61   my $now = time;
62   return sprintf '%.1f', ($now - $time_change) / (60 * 3600);
63   }
64
65sub alertcolor {
66   my ($color, $downtime) = @_;
67
68   $downtime = $downtime - $config->{'color-downtime'}{'day-min'}; # same color first days
69   $downtime = $config->{'color-downtime'}{'day-max'} if $downtime > $config->{'color-downtime'}{'day-max'}; # max 50 days for color
70   $downtime =  0 if $downtime <  0;
71
72   my $factor = ($downtime * $config->{'color-downtime'}{'factor'}) / $config->{'color-downtime'}{'day-max'};
73   return Color::Calc::color_light_html($color, $factor);
74   }
75
76my %hostdown;
77my @serviceproblems;
78my %hostcount;
79my @futurecheck;
80HOST:
81for my $host (sort $log->list_hosts()) {
82   my $host_stat = $log->host($host);
83
84   if ($host_stat->status eq 'DOWN') {TESTIF:{
85      for my $srv ($log->list_services_on_host($host)) {
86         last TESTIF if $log->service($host, $srv)->status eq 'OK' or $log->service($host, $srv)->status eq 'PENDING';
87         }
88
89      $hostdown{$host} = $host_stat;
90      next HOST;
91      }}
92
93   for my $srv ($log->list_services_on_host($host)) {
94      if ($log->service($host, $srv)->status ne 'OK') {
95         push @serviceproblems, $log->service($host, $srv);
96         $hostcount{$host}++;
97         }
98      }
99   }
100
101my $now = time;
102my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime $now;
103$year += 1900;
104$mon++;
105my $date = sprintf '%04i-%02i-%02i %02i:%02i', $year, $mon, $mday, $hour, $min;
106
107my $htmlpage = <<"ENDH";
108Content-Type: text/html
109
110<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
111<html lang="en">
112<head>
113 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
114 <title>Nagios  Velvice</title>
115 <link rel="stylesheet" type="text/css" href="$config->{'nagios-server'}{'stylesheets'}/velvice.css">
116</head>
117<body>
118<div class="header">
119 <h1>
120  <ul>
121    <li>Nagios Velvice Alert Panel : <a href="$config->{'nagios-server'}{'portal-url'}">Core Server</a></li>
122    <li><small><a id="refresh" href="$cgi_script_name">$date</a></small></li>
123  </ul>
124 </h1>
125</div>
126ENDH
127
128my %service_name   = ();
129my %service_status = ();
130for my $srv (@serviceproblems) {
131   $service_name{$srv->service_description}++;
132   $service_status{$srv->status}++;
133   }
134
135if (scalar @serviceproblems == 0) {
136   $htmlpage .= "<p>No alert to recheck.</p>\n";
137   }
138else {
139
140   $htmlpage .= "<p>Alert to recheck - Level:\n";
141   $htmlpage .= join ",\n",
142      " <a href='$cgi_script_name?check=all'>ALL</a><small>(" . scalar(@serviceproblems) . ')</small>',
143      map(" <a href='$cgi_script_name?check=" . lc(uri_encode($_)) . "'>$_</a>($service_status{$_})", sort keys %service_status);
144   $htmlpage .= ".\n";
145   $htmlpage .= " <br />\n";
146   $htmlpage .= " Service:\n";
147   $htmlpage .= join ",\n",
148      map(" <a href='$cgi_script_name?check=" . lc(uri_encode($_)) . "'>$_</a><small>($service_name{$_})</small>", sort keys %service_name);
149   $htmlpage .= ".\n";
150   $htmlpage .= "</p>\n";
151
152   my $nagios_cmd;
153   open $nagios_cmd, '>>', $config->{'nagios-server'}{'nagios-cmd'} or die "Can't open file filename: $!";
154
155   my %remote_sshdown = ();
156   my %remote_db      = ();
157   my $remote_flag;
158
159   my $current_host  = '';
160   $htmlpage .= "<table border=\"1\">\n";
161   SERVICE_PROBLEMS:
162   for my $srv (@serviceproblems) {
163      my $hostname = $srv->host_name;
164      my $service  = $srv->service_description;
165      my $status   = $srv->status;
166      my $downtime = downtime($srv->last_state_change);
167      my $output   = HTML::Entities::encode($srv->plugin_output) =~ s/^[A-Z_\s]+?[:-]//r;
168
169      my $color = $status eq 'CRITICAL' ? '#F88888' : '#FFFF00';
170      $color = alertcolor($color, $downtime);
171      $htmlpage .= " <tr style='background:$color;'>\n";
172      if ($hostname ne $current_host) {
173         $current_host = $hostname;
174         $htmlpage .= "  <td rowspan='$hostcount{$hostname}' style='vertical-align:middle;'>"
175            . "<a href=\"$cgi_script_name?check=" . uri_encode($hostname) . '">&#8623;</a></td>' . "\n";
176         $htmlpage .= "  <td class='hoop' rowspan='$hostcount{$hostname}' style='vertical-align:middle;'>"
177            . "<a href=\"$config->{'nagios-server'}{'status-cgi'}?host=" . uri_encode($hostname) . "\">$hostname</a></td>\n";
178         }
179
180      my $bold;
181      ACTION_STYLE:
182      for my $act_name (keys %{$config->{'remote-action'}}) {
183         my $act_regex = $config->{'remote-action'}{$act_name}{'regex'};
184         $bold++ if $service =~ m/$act_regex/ and $config->{'remote-action'}{$act_name}{'style'} eq 'bold';
185         }
186      $htmlpage .= $bold ? '  <td class="hoop bold">' : '  <td class="hoop">';
187      $htmlpage .= "$service</td>\n";
188
189      $htmlpage .= "  <td class='hoop'>$status</td>\n";
190      $htmlpage .= "  <td style='max-width:60%;'><small>$output";
191
192      if (($cgi_check =~ m/all/i)
193            or ($cgi_check =~ m/^$service$/i)
194            or ($cgi_check =~ m/critical/i and $status eq 'CRITICAL')
195            or ($cgi_check =~ m/warning/i  and $status eq 'WARNING')
196            or ($cgi_check =~ m/pending/i  and $status eq 'PENDING')
197            or ($cgi_check eq $hostname    and $status =~ m/^(CRITICAL|WARNING|PENDING)$/)
198            ) {
199         $now++;
200         my $interval = $srv->next_check() - $srv->last_check() || 300;
201         $interval =  240 if $interval <  240;
202         $interval = 3000 if $interval > 3000;
203         my $future = $now + 20 + int(rand($interval - 20)); # 5 * 60 = 300
204
205         $htmlpage .= " -- <b>CHECK</b> [$now/" . ($future - $now) . "]";
206         printf $nagios_cmd "[%lu] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%lu\n", $now, $hostname, $service, $now;
207         # delay future command
208         push @futurecheck, sprintf "[%lu] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%lu", $future, $hostname, $service, $future;
209         }
210
211      ACTION_PUSH_AND_DEPEND:
212      for my $act_name (keys %{$config->{'remote-action'}}) {
213         my $act_regex  = $config->{'remote-action'}{$act_name}{'regex'};
214         my $act_status = $config->{'remote-action'}{$act_name}{'status'} || 'ALL';
215         my $act_depend = $config->{'remote-action'}{$act_name}{'depend'} || 'SSH';
216
217         if ($service =~ m/$act_regex/ and ($act_status eq 'ALL' or $status =~ m/$act_status/)) {
218            $remote_db{$act_name} ||= [];
219            push @{$remote_db{$act_name}}, $hostname;
220            $remote_flag++;
221            }
222
223         # check depend service otherwise
224         $remote_sshdown{$act_depend} ||= {};
225         $remote_sshdown{$act_depend}->{$hostname}++ if $service =~ m/$act_depend/;
226         }
227
228      $htmlpage .= "</small></td>\n";
229      $htmlpage .= "  <td style='text-align:right;'>$downtime days</td>\n";
230      $htmlpage .= " </tr>\n";
231      }
232
233   $htmlpage .= "</table>\n";
234   close $nagios_cmd;
235
236   # host down
237   if (%hostdown) {
238      $htmlpage .= "<br />\n";
239      $htmlpage .= "<table border='1'>\n";
240      HOST_DOWN:
241      for my $host (sort keys %hostdown) {
242         my $host_stat = $hostdown{$host};
243         my $hostname = $host_stat->host_name;
244         my $downtime = downtime($host_stat->last_state_change);
245         my $color = alertcolor('#F88888', $downtime);
246         $htmlpage .= " <tr style='background:$color'>\n";
247         $htmlpage .= "  <td><a class='hoop' href=\"$config->{'nagios-server'}{'status-cgi'}?host=" . uri_encode($hostname) . "\">$hostname</a></td>\n";
248         my @host_service;
249         for my $srv ($log->list_services_on_host($host)) {
250            push @host_service, $log->service($host, $srv)->service_description;
251            }
252         $htmlpage .= "  <td><small>" . join(', ', @host_service) . "</small></td>\n";
253         $htmlpage .= "  <td style='text-align:right;'>$downtime days</td>\n";
254         $htmlpage .= " </tr>\n";
255         }
256      $htmlpage .= "</table>\n";
257      }
258
259   # remote action
260   if ($remote_flag) {
261      require Nagios::Object::Config;
262      my $parser = Nagios::Object::Config->new();
263      $parser->parse("/var/cache/nagios3/objects.cache");
264
265      $htmlpage .= "<div class='action'>\n";
266      REMOTE_ACTION:
267      for my $act_name (keys %remote_db) {
268         my $act_depend = $config->{'remote-action'}{$act_name}{'depend'} || 'SSH';
269
270         my @action = grep !exists $remote_sshdown{$act_depend}->{$_}, @{$remote_db{$act_name}};
271         if (@action) {
272            my $srv_title = $config->{'remote-action'}{$act_name}{'title'} || "Action: $act_name";
273            $htmlpage .= "<h2>$srv_title</h2>\n";
274            $htmlpage .= "<pre>\n";
275            my $remote_action = $config->{'remote-action'}{$act_name}{'command'};
276            $remote_action = $config->{'remote-action'}{$act_name}{'command-one'}
277               if @action == 1 and exists $config->{'remote-action'}{$act_name}{'command-one'};
278            my @hosts;
279            for my $host (@action) {
280               my $object = $parser->find_object("$host", "Nagios::Host");
281               push @hosts, hostmapping($object->address =~ s/\..*$//r);
282               }
283            my $hosts_list = join ' ', @hosts;
284            $htmlpage .= ' ' . $remote_action =~ s{\%m}{$hosts_list}r;
285            $htmlpage .= "</pre>\n";
286            }
287         }
288      $htmlpage .= "</div>\n";
289      }
290   }
291
292$htmlpage .= <<"ENDH";
293<hr clear="all">
294<div class="footer">
295 <b><a href="http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/NagiosVelvice">Velvice</a>
296   - version: $VERSION</b>
297   (<a href="http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.html">online manual</a>)
298   - Written by Gabriel Moreau
299 <ul>
300  <li>Licence GNU GPL version 2 or later and Perl equivalent</li>
301  <li>Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France</li>
302 </ul>
303</div>
304</body>
305</html>
306ENDH
307
308print $htmlpage;
309
310# delayed future check
311if (@futurecheck) {
312   sleep 2;
313   my $nagios_cmd;
314   open $nagios_cmd, '>>', $config->{'nagios-server'}{'nagios-cmd'} or die "Can't open file filename: $!";
315   print $nagios_cmd "$_\n" for @futurecheck;
316   close $nagios_cmd;
317   }
318
319__END__
320
321
322=head1 NAME
323
324velvice.cgi - nagios velvice alert panel
325
326=head1 USAGE
327
328 velvice.cgi
329 velvice.cgi?check=XXX
330
331
332=head1 DESCRIPTION
333
334=begin html
335
336<img width="700" alt="Nagios Velvice Alert Panel" title="Nagios Velvice Alert Panel" style="float:right" src="velvice.png" />
337
338=end html
339
340Nagios VELVICE is an acronym for "Nagios leVEL serVICE status".
341
342The Nagios web page is sometimes very graphically charged
343and does not necessarily contain the information you need at a glance.
344For example, it is quite complicated to restart controls on multiple hosts in one click.
345
346For example, a server that is down should take only one line and not one per service...
347Similarly, a service that has been down for 5 minutes or since yesterday
348has more weight than a service that has fallen for 15 days.
349
350With Velvice Panel, a broken down server takes only one line.
351Services that have been falling for a long time gradually lose their color and become pastel colors.
352
353With Velvice Panel, it is possible through a single click
354to redo a check of all services that are in the CRITICAL state.
355Similarly, it is possible to restart a check on all SSH services in breakdowns ...
356In order not to clog the Nagios server, checks are shifted by 2 seconds in time.
357
358There is also a link to the web page of the main Nagios server.
359For each computer, you have a direct link to its dedicated web page on this server.
360
361
362=head1 CONFIGURATION FILE SPECIFICATION
363
364The configuration file must be F</etc/nagios3/velvice.yml>.
365This is not a required file.
366The file is in YAML format because this is a human-readable text file style.
367Other formats could have been Plain XML, RDF, JSON... but they are much less readable.
368
369You can find in the software nagios-velvice an example of configuration:
370L<velvice.sample.yml|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.sample.yml>.
371This one is in fact the master reference specification!
372
373The main keys C<nagios-server> and C<color-downtime> have good default values.
374No secondary key is required...
375The Velvice script try hard to replace ~ by the good value automatically.
376
377 nagios-server:
378   status-file: /var/cache/nagios3/status.dat
379   nagios-cmd:  /var/lib/nagios3/rw/nagios.cmd
380   portal-url:  ~/nagios3/
381   status-cgi:  ~/cgi-bin/nagios3/status.cgi
382   stylesheets: ~/nagios3/stylesheets
383
384The background color of the faulty service line display remains stable with a bright color for at least 3 days.
385Then, it decreases and becomes pastel after 53 days with an intensity of 70% (100% is white and 0% is black).
386
387 color-downtime:
388   day-min:  3
389   day-max: 50
390   factor:   0.7
391
392With key C<host-mapping>,
393it's good to map C<localhost> to the real name of the computer (hostname).
394
395 host-mapping:
396   localhost:  srv-nagios
397   toto:       titi
398
399The only important key is C<remote-action>.
400You can affiliate as many subkeys as you want.
401Let's take an example:
402
403 remote-action:
404   oom-killer:
405     regex: ^OOM Killer
406     title:  OOM Killer
407     command:     tssh -c 'sudo rm /var/lib/nagios3/nagios_oom_killer.log' %m
408     command-one: ssh %m 'sudo rm /var/lib/nagios3/nagios_oom_killer.log'
409     depend: ^SSH
410     status: ALL
411     style: bold
412
413C<oom-killer> is just a key for your remote action.
414The regex is used to find which service has a problem...
415The title is use in the result web page (not mandatory - otherwise, it will be C<Action: oom-killer>).
416The C<command> is just written on this web page.
417You have the responsibility to copy / cut it on a terminal.
418For security reasons, the nagios server does not have the right to launch the command on the remote host.
419The wildcard C<%m> is replaced by the list of the host (separated by the space).
420Sometime, the command could be different if there is only one computer (just SSH and no parallel SSH).
421If your command is based on SSH,
422you can have an SSH action only if the remote SSH is running.
423So you can make the remote action depend on the SSH service through a regular expression of your choice.
424
425The last two keys.
426The C<status> key is for CRITICAL or WARNING (or ALL).
427The key C<style> is there to mark in bold the service in error on the web page.
428
429=head1 SEE ALSO
430
431yamllint(1), ysh(1), YAML, Nagios::StatusLog, Color::Calc
432
433In Debian GNU/Linux distribution, packages for C<yamllint> and C<ysh> are:
434
435=over
436
437=item * C<yamllint> - Linter for YAML files (Python)
438
439=item * C<libyaml-shell-perl> - YAML test shell (Perl)
440
441=back
442
443
444Own project ressources:
445
446=over
447
448=item * L<Web site|http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/NagiosVelvice>
449
450=item * L<Online Manual|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.html>
451
452=item * L<SVN repository|http://servforge.legi.grenoble-inp.fr/svn/soft-trokata/trunk/nagios-velvice>
453
454=back
455
456
457=head1 VERSION
458
459$Id: velvice.cgi 280 2018-07-15 11:55:46Z g7moreau $
460
461
462=head1 AUTHOR
463
464Written by Gabriel Moreau <Gabriel.Moreau(A)univ-grenoble-alpes.fr>, LEGI UMR 5519, CNRS, Grenoble - France
465
466
467=head1 LICENSE AND COPYRIGHT
468
469Licence GNU GPL version 2 or later and Perl equivalent
470
471Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
Note: See TracBrowser for help on using the repository browser.