source: trunk/nagios-velvice/velvice.cgi @ 283

Last change on this file since 283 was 283, checked in by g7moreau, 6 years ago
  • Create button
  • Property svn:keywords set to Id
File size: 17.2 KB
Line 
1#!/usr/bin/env perl
2#
3# 2014/05/15 Gabriel Moreau <Gabriel.Moreau@univ-grenoble-alpes.fr>
4# 2017/06/22 Gabriel Moreau - big update
5# 2018/06/25 Gabriel Moreau - make velvice generic
6#
7# velvice.cgi
8# Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
9#
10# Need NagiosStatus http://exchange.nagios.org/directory/Addons/APIs/Perl/NagiosStatus-2Epm/details
11# Possible command http://old.nagios.org/developerinfo/externalcommands/commandlist.php
12#
13# apt-get install perl-modules libnagios-object-perl libhtml-parser-perl liburi-encode-perl libcolor-calc-perl libyaml-syck-perl
14
15use strict;
16use warnings;
17use version; our $VERSION = version->declare('0.8.2');
18
19use CGI;
20use HTML::Entities ();
21use Nagios::StatusLog;
22use URI::Encode qw(uri_encode uri_decode);
23use Color::Calc ();
24use YAML::Syck;
25
26my $query           = CGI->new();
27my $cgi_check       = uri_decode($query->param('check'));
28my $cgi_script_name = $query->script_name();
29my $cgi_path        = $cgi_script_name =~ s{/[^/]+\.cgi$}{}r;
30undef $query;
31
32my $config = {};
33$config = YAML::Syck::LoadFile('/etc/nagios3/velvice.yml') if -e '/etc/nagios3/velvice.yml';
34$config->{'nagios-server'}                ||= {};
35$config->{'nagios-server'}{'status-file'} ||= '/var/cache/nagios3/status.dat';
36$config->{'nagios-server'}{'nagios-cmd'}  ||= '/var/lib/nagios3/rw/nagios.cmd';
37$config->{'nagios-server'}{'portal-url'}  ||= $cgi_path =~ s{/cgi-bin/}{/}r . '/';
38$config->{'nagios-server'}{'status-cgi'}  ||= "$cgi_path/status.cgi";
39$config->{'nagios-server'}{'stylesheets'} ||= $config->{'nagios-server'}{'portal-url'} =~ s{/?$}{/stylesheets}r;
40$config->{'nagios-server'}{'image'}       ||= $config->{'nagios-server'}{'portal-url'} =~ s{/?$}{/image}r;
41$config->{'host-mapping'}                 ||= {};
42$config->{'color-downtime'}               ||= {};
43$config->{'color-downtime'}{'day-min'}    ||=  3;
44$config->{'color-downtime'}{'day-max'}    ||= 50;
45$config->{'color-downtime'}{'factor'}     ||=  0.7;
46$config->{'remote-action'}                ||= {};
47
48my $log = Nagios::StatusLog->new(
49   Filename => $config->{'nagios-server'}{'status-file'},
50   Version  => 3.0
51   );
52
53sub hostmapping {
54   my $host = shift;
55
56   return exists $config->{'host-mapping'}{$host} ? $config->{'host-mapping'}{$host} : $host;
57   }
58
59sub downtime {
60   my ($time_change) = @_;
61
62   my $now = time;
63   return sprintf '%.1f', ($now - $time_change) / (60 * 3600);
64   }
65
66sub alertcolor {
67   my ($color, $downtime) = @_;
68
69   $downtime = $downtime - $config->{'color-downtime'}{'day-min'}; # same color first days
70   $downtime = $config->{'color-downtime'}{'day-max'} if $downtime > $config->{'color-downtime'}{'day-max'}; # max 50 days for color
71   $downtime =  0 if $downtime <  0;
72
73   my $factor = ($downtime * $config->{'color-downtime'}{'factor'}) / $config->{'color-downtime'}{'day-max'};
74   return Color::Calc::color_light_html($color, $factor);
75   }
76
77my %hostdown;
78my @serviceproblems;
79my %hostcount;
80my @futurecheck;
81HOST:
82for my $host (sort $log->list_hosts()) {
83   my $host_stat = $log->host($host);
84
85   if ($host_stat->status eq 'DOWN') {TESTIF:{
86      for my $srv ($log->list_services_on_host($host)) {
87         last TESTIF if $log->service($host, $srv)->status eq 'OK' or $log->service($host, $srv)->status eq 'PENDING';
88         }
89
90      $hostdown{$host} = $host_stat;
91      next HOST;
92      }}
93
94   for my $srv ($log->list_services_on_host($host)) {
95      if ($log->service($host, $srv)->status ne 'OK') {
96         push @serviceproblems, $log->service($host, $srv);
97         $hostcount{$host}++;
98         }
99      }
100   }
101
102my $now = time;
103my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime $now;
104$year += 1900;
105$mon++;
106my $date = sprintf '%04i-%02i-%02i %02i:%02i', $year, $mon, $mday, $hour, $min;
107
108my $htmlpage = <<"ENDH";
109Content-Type: text/html
110
111<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
112<html lang="en">
113<head>
114 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
115 <title>Nagios  Velvice</title>
116 <link rel="stylesheet"    type="text/css"  href="$config->{'nagios-server'}{'stylesheets'}/velvice.css">
117 <link rel="shortcut icon" type="image/ico" href="$config->{'nagios-server'}{'image'}/favicon.ico">
118</head>
119<body>
120<div class="header">
121 <h1>
122  <ul>
123    <li>Nagios Velvice Alert Panel : <a href="$config->{'nagios-server'}{'portal-url'}">Core Server</a></li>
124    <li><small><a id="refresh" href="$cgi_script_name">$date</a></small></li>
125  </ul>
126 </h1>
127</div>
128ENDH
129
130my %service_name   = ();
131my %service_status = ();
132for my $srv (@serviceproblems) {
133   $service_name{$srv->service_description}++;
134   $service_status{$srv->status}++;
135   }
136
137if (scalar @serviceproblems == 0) {
138   $htmlpage .= "<p>No alert to recheck.</p>\n";
139   }
140else {
141
142   $htmlpage .= "<p>Alert to recheck - Level:\n";
143   $htmlpage .= join ",\n",
144      " <span class='button'><a href='$cgi_script_name?check=all'>ALL</a><small>" . scalar(@serviceproblems) . '</small></span>',
145      map(" <span class='button'><a href='$cgi_script_name?check=" . lc(uri_encode($_)) . "'>$_</a><small>$service_status{$_}</small></span>",
146         sort keys %service_status);
147   $htmlpage .= ".\n";
148   $htmlpage .= " <br />\n";
149   $htmlpage .= " Service:\n";
150   $htmlpage .= join ",\n",
151      map(" <span class='button'><a href='$cgi_script_name?check=" . lc(uri_encode($_)) . "'>$_</a><small>$service_name{$_}</small></span>",
152         sort keys %service_name);
153   $htmlpage .= ".\n";
154   $htmlpage .= "</p>\n";
155
156   my $nagios_cmd;
157   open $nagios_cmd, '>>', $config->{'nagios-server'}{'nagios-cmd'} or die "Can't open file filename: $!";
158
159   my %remote_sshdown = ();
160   my %remote_db      = ();
161   my $remote_flag;
162
163   my $current_host  = '';
164   $htmlpage .= "<table border=\"1\">\n";
165   SERVICE_PROBLEMS:
166   for my $srv (@serviceproblems) {
167      my $hostname = $srv->host_name;
168      my $service  = $srv->service_description;
169      my $status   = $srv->status;
170      my $downtime = downtime($srv->last_state_change);
171      my $output   = HTML::Entities::encode($srv->plugin_output) =~ s/^[A-Z_\s]+?[:-]//r;
172
173      my $color = $status eq 'CRITICAL' ? '#F88888' : '#FFFF00';
174      $color = alertcolor($color, $downtime);
175      $htmlpage .= " <tr style='background:$color;'>\n";
176      if ($hostname ne $current_host) {
177         $current_host = $hostname;
178         $htmlpage .= "  <td rowspan='$hostcount{$hostname}' style='vertical-align:middle;'>"
179            . "<a href=\"$cgi_script_name?check=" . uri_encode($hostname) . '">&#8623;</a></td>' . "\n";
180         $htmlpage .= "  <td class='hoop' rowspan='$hostcount{$hostname}' style='vertical-align:middle;'>"
181            . "<a href=\"$config->{'nagios-server'}{'status-cgi'}?host=" . uri_encode($hostname) . "\">$hostname</a></td>\n";
182         }
183
184      my $bold;
185      ACTION_STYLE:
186      for my $act_name (keys %{$config->{'remote-action'}}) {
187         my $act_regex = $config->{'remote-action'}{$act_name}{'regex'};
188         $bold++ if $service =~ m/$act_regex/ and $config->{'remote-action'}{$act_name}{'style'} eq 'bold';
189         }
190      $htmlpage .= $bold ? '  <td class="hoop bold">' : '  <td class="hoop">';
191      $htmlpage .= "$service</td>\n";
192
193      $htmlpage .= "  <td class='hoop'>$status</td>\n";
194      $htmlpage .= "  <td style='max-width:60%;'><small>$output";
195
196      if (($cgi_check =~ m/all/i)
197            or ($cgi_check =~ m/^$service$/i)
198            or ($cgi_check =~ m/critical/i and $status eq 'CRITICAL')
199            or ($cgi_check =~ m/warning/i  and $status eq 'WARNING')
200            or ($cgi_check =~ m/pending/i  and $status eq 'PENDING')
201            or ($cgi_check eq $hostname    and $status =~ m/^(CRITICAL|WARNING|PENDING)$/)
202            ) {
203         $now++;
204         my $interval = $srv->next_check() - $srv->last_check() || 300;
205         $interval =  240 if $interval <  240;
206         $interval = 3000 if $interval > 3000;
207         my $future = $now + 20 + int(rand($interval - 20)); # 5 * 60 = 300
208
209         $htmlpage .= " -- <b>CHECK</b> [$now/" . ($future - $now) . "]";
210         printf $nagios_cmd "[%lu] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%lu\n", $now, $hostname, $service, $now;
211         # delay future command
212         push @futurecheck, sprintf "[%lu] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%lu", $future, $hostname, $service, $future;
213         }
214
215      ACTION_PUSH_AND_DEPEND:
216      for my $act_name (keys %{$config->{'remote-action'}}) {
217         my $act_regex  = $config->{'remote-action'}{$act_name}{'regex'};
218         my $act_status = $config->{'remote-action'}{$act_name}{'status'} || 'ALL';
219         my $act_depend = $config->{'remote-action'}{$act_name}{'depend'} || 'SSH';
220
221         if ($service =~ m/$act_regex/ and ($act_status eq 'ALL' or $status =~ m/$act_status/)) {
222            $remote_db{$act_name} ||= [];
223            push @{$remote_db{$act_name}}, $hostname;
224            $remote_flag++;
225            }
226
227         # check depend service otherwise
228         $remote_sshdown{$act_depend} ||= {};
229         $remote_sshdown{$act_depend}->{$hostname}++ if $service =~ m/$act_depend/;
230         }
231
232      $htmlpage .= "</small></td>\n";
233      $htmlpage .= "  <td style='text-align:right;'>$downtime days</td>\n";
234      $htmlpage .= " </tr>\n";
235      }
236
237   $htmlpage .= "</table>\n";
238   close $nagios_cmd;
239
240   # host down
241   if (%hostdown) {
242      $htmlpage .= "<br />\n";
243      $htmlpage .= "<table border='1'>\n";
244      HOST_DOWN:
245      for my $host (sort keys %hostdown) {
246         my $host_stat = $hostdown{$host};
247         my $hostname = $host_stat->host_name;
248         my $downtime = downtime($host_stat->last_state_change);
249         my $color = alertcolor('#F88888', $downtime);
250         $htmlpage .= " <tr style='background:$color'>\n";
251         $htmlpage .= "  <td><a class='hoop' href=\"$config->{'nagios-server'}{'status-cgi'}?host=" . uri_encode($hostname) . "\">$hostname</a></td>\n";
252         my @host_service;
253         for my $srv ($log->list_services_on_host($host)) {
254            push @host_service, $log->service($host, $srv)->service_description;
255            }
256         $htmlpage .= "  <td><small>" . join(', ', @host_service) . "</small></td>\n";
257         $htmlpage .= "  <td style='text-align:right;'>$downtime days</td>\n";
258         $htmlpage .= " </tr>\n";
259         }
260      $htmlpage .= "</table>\n";
261      }
262
263   # remote action
264   if ($remote_flag) {
265      require Nagios::Object::Config;
266      my $parser = Nagios::Object::Config->new();
267      $parser->parse("/var/cache/nagios3/objects.cache");
268
269      $htmlpage .= "<div class='action'>\n";
270      REMOTE_ACTION:
271      for my $act_name (keys %remote_db) {
272         my $act_depend = $config->{'remote-action'}{$act_name}{'depend'} || 'SSH';
273
274         my @action = grep !exists $remote_sshdown{$act_depend}->{$_}, @{$remote_db{$act_name}};
275         if (@action) {
276            my $srv_title = $config->{'remote-action'}{$act_name}{'title'} || "Action: $act_name";
277            $htmlpage .= "<h2>$srv_title</h2>\n";
278            $htmlpage .= "<pre>\n";
279            my $remote_action = $config->{'remote-action'}{$act_name}{'command'};
280            $remote_action = $config->{'remote-action'}{$act_name}{'command-one'}
281               if @action == 1 and exists $config->{'remote-action'}{$act_name}{'command-one'};
282            my @hosts;
283            for my $host (@action) {
284               my $object = $parser->find_object("$host", "Nagios::Host");
285               push @hosts, hostmapping($object->address =~ s/\..*$//r);
286               }
287            my $hosts_list = join ' ', @hosts;
288            $htmlpage .= ' ' . $remote_action =~ s{\%m}{$hosts_list}r;
289            $htmlpage .= "</pre>\n";
290            }
291         }
292      $htmlpage .= "</div>\n";
293      }
294   }
295
296$htmlpage .= <<"ENDH";
297<hr clear="all">
298<div class="footer">
299 <b><a href="http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/NagiosVelvice">Velvice</a>
300   - version: $VERSION</b>
301   (<a href="http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.html">online manual</a>)
302   - Written by Gabriel Moreau
303 <ul>
304  <li>Licence GNU GPL version 2 or later and Perl equivalent</li>
305  <li>Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France</li>
306 </ul>
307</div>
308</body>
309</html>
310ENDH
311
312print $htmlpage;
313
314# delayed future check
315if (@futurecheck) {
316   sleep 2;
317   my $nagios_cmd;
318   open $nagios_cmd, '>>', $config->{'nagios-server'}{'nagios-cmd'} or die "Can't open file filename: $!";
319   print $nagios_cmd "$_\n" for @futurecheck;
320   close $nagios_cmd;
321   }
322
323__END__
324
325
326=head1 NAME
327
328velvice.cgi - nagios velvice alert panel
329
330=head1 USAGE
331
332 velvice.cgi
333 velvice.cgi?check=XXX
334
335
336=head1 DESCRIPTION
337
338=begin html
339
340<img width="700" alt="Nagios Velvice Alert Panel" title="Nagios Velvice Alert Panel" style="float:right" src="velvice.png" />
341
342=end html
343
344Nagios VELVICE is an acronym for "Nagios leVEL serVICE status".
345
346The Nagios web page is sometimes very graphically charged
347and does not necessarily contain the information you need at a glance.
348For example, it is quite complicated to restart controls on multiple hosts in one click.
349
350For example, a server that is down should take only one line and not one per service...
351Similarly, a service that has been down for 5 minutes or since yesterday
352has more weight than a service that has fallen for 15 days.
353
354With Velvice Panel, a broken down server takes only one line.
355Services that have been falling for a long time gradually lose their color and become pastel colors.
356
357With Velvice Panel, it is possible through a single click
358to redo a check of all services that are in the CRITICAL state.
359Similarly, it is possible to restart a check on all SSH services in breakdowns ...
360In order not to clog the Nagios server, checks are shifted by 2 seconds in time.
361
362There is also a link to the web page of the main Nagios server.
363For each computer, you have a direct link to its dedicated web page on this server.
364
365
366=head1 CONFIGURATION FILE SPECIFICATION
367
368The configuration file must be F</etc/nagios3/velvice.yml>.
369This is not a required file.
370The file is in YAML format because this is a human-readable text file style.
371Other formats could have been Plain XML, RDF, JSON... but they are much less readable.
372
373You can find in the software nagios-velvice an example of configuration:
374L<velvice.sample.yml|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.sample.yml>.
375This one is in fact the master reference specification!
376
377The main keys C<nagios-server> and C<color-downtime> have good default values.
378No secondary key is required...
379The Velvice script try hard to replace ~ by the good value automatically.
380
381 nagios-server:
382   status-file: /var/cache/nagios3/status.dat
383   nagios-cmd:  /var/lib/nagios3/rw/nagios.cmd
384   portal-url:  ~/nagios3/
385   status-cgi:  ~/cgi-bin/nagios3/status.cgi
386   stylesheets: ~/nagios3/stylesheets
387
388The background color of the faulty service line display remains stable with a bright color for at least 3 days.
389Then, it decreases and becomes pastel after 53 days with an intensity of 70% (100% is white and 0% is black).
390
391 color-downtime:
392   day-min:  3
393   day-max: 50
394   factor:   0.7
395
396With key C<host-mapping>,
397it's good to map C<localhost> to the real name of the computer (hostname).
398
399 host-mapping:
400   localhost:  srv-nagios
401   toto:       titi
402
403The only important key is C<remote-action>.
404You can affiliate as many subkeys as you want.
405Let's take an example:
406
407 remote-action:
408   oom-killer:
409     regex: ^OOM Killer
410     title:  OOM Killer
411     command:     tssh -c 'sudo rm /var/lib/nagios3/nagios_oom_killer.log' %m
412     command-one: ssh %m 'sudo rm /var/lib/nagios3/nagios_oom_killer.log'
413     depend: ^SSH
414     status: ALL
415     style: bold
416
417C<oom-killer> is just a key for your remote action.
418The regex is used to find which service has a problem...
419The title is use in the result web page (not mandatory - otherwise, it will be C<Action: oom-killer>).
420The C<command> is just written on this web page.
421You have the responsibility to copy / cut it on a terminal.
422For security reasons, the nagios server does not have the right to launch the command on the remote host.
423The wildcard C<%m> is replaced by the list of the host (separated by the space).
424Sometime, the command could be different if there is only one computer (just SSH and no parallel SSH).
425If your command is based on SSH,
426you can have an SSH action only if the remote SSH is running.
427So you can make the remote action depend on the SSH service through a regular expression of your choice.
428
429The last two keys.
430The C<status> key is for CRITICAL or WARNING (or ALL).
431The key C<style> is there to mark in bold the service in error on the web page.
432
433=head1 SEE ALSO
434
435yamllint(1), ysh(1), YAML, Nagios::StatusLog, Color::Calc
436
437In Debian GNU/Linux distribution, packages for C<yamllint> and C<ysh> are:
438
439=over
440
441=item * C<yamllint> - Linter for YAML files (Python)
442
443=item * C<libyaml-shell-perl> - YAML test shell (Perl)
444
445=back
446
447
448Own project ressources:
449
450=over
451
452=item * L<Web site|http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/NagiosVelvice>
453
454=item * L<Online Manual|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.html>
455
456=item * L<SVN repository|http://servforge.legi.grenoble-inp.fr/svn/soft-trokata/trunk/nagios-velvice>
457
458=back
459
460
461=head1 VERSION
462
463$Id: velvice.cgi 283 2018-07-15 14:49:27Z g7moreau $
464
465
466=head1 AUTHOR
467
468Written by Gabriel Moreau <Gabriel.Moreau(A)univ-grenoble-alpes.fr>, LEGI UMR 5519, CNRS, Grenoble - France
469
470
471=head1 LICENSE AND COPYRIGHT
472
473Licence GNU GPL version 2 or later and Perl equivalent
474
475Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
Note: See TracBrowser for help on using the repository browser.