source: trunk/nagios-velvice/velvice.cgi @ 286

Last change on this file since 286 was 286, checked in by g7moreau, 6 years ago
  • Try to push force check with a red arrow
  • Property svn:keywords set to Id
File size: 17.4 KB
Line 
1#!/usr/bin/env perl
2#
3# 2014/05/15 Gabriel Moreau <Gabriel.Moreau@univ-grenoble-alpes.fr>
4# 2017/06/22 Gabriel Moreau - big update
5# 2018/06/25 Gabriel Moreau - make velvice generic
6#
7# velvice.cgi
8# Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
9#
10# Need NagiosStatus http://exchange.nagios.org/directory/Addons/APIs/Perl/NagiosStatus-2Epm/details
11# Possible command http://old.nagios.org/developerinfo/externalcommands/commandlist.php
12#
13# apt-get install perl-modules libnagios-object-perl libhtml-parser-perl liburi-encode-perl libcolor-calc-perl libyaml-syck-perl
14
15use strict;
16use warnings;
17use version; our $VERSION = version->declare('0.8.5');
18
19use CGI;
20use HTML::Entities ();
21use Nagios::StatusLog;
22use URI::Encode qw(uri_encode uri_decode);
23use Color::Calc ();
24use YAML::Syck;
25
26my $query           = CGI->new();
27my $cgi_check       = uri_decode($query->param('check'));
28my $cgi_script_name = $query->script_name();
29my $cgi_path        = $cgi_script_name =~ s{/[^/]+\.cgi$}{}r;
30undef $query;
31
32my $config = {};
33$config = YAML::Syck::LoadFile('/etc/nagios3/velvice.yml') if -e '/etc/nagios3/velvice.yml';
34$config->{'nagios-server'}                ||= {};
35$config->{'nagios-server'}{'status-file'} ||= '/var/cache/nagios3/status.dat';
36$config->{'nagios-server'}{'nagios-cmd'}  ||= '/var/lib/nagios3/rw/nagios.cmd';
37$config->{'nagios-server'}{'portal-url'}  ||= $cgi_path =~ s{/cgi-bin/}{/}r . '/';
38$config->{'nagios-server'}{'status-cgi'}  ||= "$cgi_path/status.cgi";
39$config->{'nagios-server'}{'stylesheets'} ||= $config->{'nagios-server'}{'portal-url'} =~ s{/?$}{/stylesheets}r;
40$config->{'nagios-server'}{'image'}       ||= $config->{'nagios-server'}{'portal-url'} =~ s{/?$}{/image}r;
41$config->{'host-mapping'}                 ||= {};
42$config->{'color-downtime'}               ||= {};
43$config->{'color-downtime'}{'day-min'}    ||=  3;
44$config->{'color-downtime'}{'day-max'}    ||= 50;
45$config->{'color-downtime'}{'factor'}     ||=  0.7;
46$config->{'remote-action'}                ||= {};
47
48my $log = Nagios::StatusLog->new(
49   Filename => $config->{'nagios-server'}{'status-file'},
50   Version  => 3.0
51   );
52
53sub hostmapping {
54   my $host = shift;
55
56   return exists $config->{'host-mapping'}{$host} ? $config->{'host-mapping'}{$host} : $host;
57   }
58
59sub downtime {
60   my ($time_change) = @_;
61
62   my $now = time;
63   return sprintf '%.1f', ($now - $time_change) / (60 * 3600);
64   }
65
66sub alertcolor {
67   my ($color, $downtime) = @_;
68
69   $downtime = $downtime - $config->{'color-downtime'}{'day-min'}; # same color first days
70   $downtime = $config->{'color-downtime'}{'day-max'} if $downtime > $config->{'color-downtime'}{'day-max'}; # max 50 days for color
71   $downtime =  0 if $downtime <  0;
72
73   my $factor = ($downtime * $config->{'color-downtime'}{'factor'}) / $config->{'color-downtime'}{'day-max'};
74   return Color::Calc::color_light_html($color, $factor);
75   }
76
77sub nosbreak {
78   my ($str) = @_;
79   
80   return $str =~ s/\s/\&nbsp;/gr;
81   }
82
83my %hostdown;
84my @serviceproblems;
85my %hostcount;
86my @futurecheck;
87HOST:
88for my $host (sort $log->list_hosts()) {
89   my $host_stat = $log->host($host);
90
91   if ($host_stat->status eq 'DOWN') {TESTIF:{
92      for my $srv ($log->list_services_on_host($host)) {
93         last TESTIF if $log->service($host, $srv)->status eq 'OK' or $log->service($host, $srv)->status eq 'PENDING';
94         }
95
96      $hostdown{$host} = $host_stat;
97      next HOST;
98      }}
99
100   for my $srv ($log->list_services_on_host($host)) {
101      if ($log->service($host, $srv)->status ne 'OK') {
102         push @serviceproblems, $log->service($host, $srv);
103         $hostcount{$host}++;
104         }
105      }
106   }
107
108my $now = time;
109my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime $now;
110$year += 1900;
111$mon++;
112my $date = sprintf '%04i-%02i-%02i %02i:%02i', $year, $mon, $mday, $hour, $min;
113
114my $htmlpage = <<"ENDH";
115Content-Type: text/html
116
117<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
118<html lang="en">
119<head>
120 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
121 <title>Nagios  Velvice</title>
122 <link rel="stylesheet"    type="text/css"  href="$config->{'nagios-server'}{'stylesheets'}/velvice.css">
123 <link rel="shortcut icon" type="image/ico" href="$config->{'nagios-server'}{'image'}/favicon.ico">
124</head>
125<body>
126<div class="header">
127 <h1>
128  <ul>
129    <li>Nagios Velvice Alert Panel : <a href="$config->{'nagios-server'}{'portal-url'}">Core Server</a></li>
130    <li><small><a id="refresh" href="$cgi_script_name">$date</a></small></li>
131  </ul>
132 </h1>
133</div>
134ENDH
135
136my %service_name   = ();
137my %service_status = ();
138for my $srv (@serviceproblems) {
139   $service_name{$srv->service_description}++;
140   $service_status{$srv->status}++;
141   }
142
143if (scalar @serviceproblems == 0) {
144   $htmlpage .= "<p>No alert to recheck.</p>\n";
145   }
146else {
147
148   $htmlpage .= "<p>Alert to recheck - Level:\n";
149   $htmlpage .= join ",\n",
150      " <span class='button'><a href='$cgi_script_name?check=all'>ALL</a><small>" . scalar(@serviceproblems) . '</small></span>',
151      map(" <span class='button'><a href='$cgi_script_name?check=" . lc(uri_encode($_)) . "'>$_</a><small>$service_status{$_}</small></span>",
152         sort keys %service_status);
153   $htmlpage .= ".\n";
154   $htmlpage .= " <br />\n";
155   $htmlpage .= " Service:\n";
156   $htmlpage .= join ",\n",
157      map(" <span class='button'><a href='$cgi_script_name?check=" . lc(uri_encode($_)) . "'>" . nosbreak($_) . "</a><small>$service_name{$_}</small></span>",
158         sort keys %service_name);
159   $htmlpage .= ".\n";
160   $htmlpage .= "</p>\n";
161
162   my $nagios_cmd;
163   open $nagios_cmd, '>>', $config->{'nagios-server'}{'nagios-cmd'} or die "Can't open file filename: $!";
164
165   my %remote_sshdown = ();
166   my %remote_db      = ();
167   my $remote_flag;
168
169   my $current_host  = '';
170   $htmlpage .= "<table border=\"1\">\n";
171   SERVICE_PROBLEMS:
172   for my $srv (@serviceproblems) {
173      my $hostname = $srv->host_name;
174      my $service  = $srv->service_description;
175      my $status   = $srv->status;
176      my $downtime = downtime($srv->last_state_change);
177      my $output   = HTML::Entities::encode($srv->plugin_output) =~ s/^[A-Z_\s]+?[:-]//r;
178
179      my $color = $status eq 'CRITICAL' ? '#F88888' : '#FFFF00';
180      $color = alertcolor($color, $downtime);
181      my $stylecolor = "style='background:$color;'";
182      $htmlpage .= " <tr>\n";
183      if ($hostname ne $current_host) {
184         $current_host = $hostname;
185         $htmlpage .= "  <td rowspan='$hostcount{$hostname}' $stylecolor>"
186            . "<a href=\"$cgi_script_name?check=" . uri_encode($hostname) . '">&#8623;</a></td>' . "\n";
187         $htmlpage .= "  <td class='hoop' rowspan='$hostcount{$hostname}' $stylecolor>"
188            . "<a href=\"$config->{'nagios-server'}{'status-cgi'}?host=" . uri_encode($hostname) . "\">$hostname</a></td>\n";
189         }
190
191      my $bold;
192      ACTION_STYLE:
193      for my $act_name (keys %{$config->{'remote-action'}}) {
194         my $act_regex = $config->{'remote-action'}{$act_name}{'regex'};
195         $bold++ if $service =~ m/$act_regex/ and $config->{'remote-action'}{$act_name}{'style'} eq 'bold';
196         }
197      $htmlpage .= $bold ? "  <td class='hoop bold' $stylecolor>" : "  <td class='hoop' $stylecolor>";
198      $htmlpage .= "$service</td>\n";
199
200      $htmlpage .= "  <td class='hoop' $stylecolor>$status</td>\n";
201      $htmlpage .= "  <td class='comment' $stylecolor>$output</td>\n";
202      $htmlpage .= "  <td class='days";
203
204      if (($cgi_check =~ m/all/i)
205            or ($cgi_check =~ m/^$service$/i)
206            or ($cgi_check =~ m/critical/i and $status eq 'CRITICAL')
207            or ($cgi_check =~ m/warning/i  and $status eq 'WARNING')
208            or ($cgi_check =~ m/pending/i  and $status eq 'PENDING')
209            or ($cgi_check eq $hostname    and $status =~ m/^(CRITICAL|WARNING|PENDING)$/)
210            ) {
211         $now++;
212         my $interval = $srv->next_check() - $srv->last_check() || 300;
213         $interval =  240 if $interval <  240;
214         $interval = 3000 if $interval > 3000;
215         my $future = $now + 20 + int(rand($interval - 20)); # 5 * 60 = 300
216
217         $htmlpage .= ' checking'; # add class checking
218         #$htmlpage .= " -- <b>CHECK</b> [$now/" . ($future - $now) . "]";
219         printf $nagios_cmd "[%lu] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%lu\n", $now, $hostname, $service, $now;
220         # delay future command
221         push @futurecheck, sprintf "[%lu] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%lu", $future, $hostname, $service, $future;
222         }
223
224      ACTION_PUSH_AND_DEPEND:
225      for my $act_name (keys %{$config->{'remote-action'}}) {
226         my $act_regex  = $config->{'remote-action'}{$act_name}{'regex'};
227         my $act_status = $config->{'remote-action'}{$act_name}{'status'} || 'ALL';
228         my $act_depend = $config->{'remote-action'}{$act_name}{'depend'} || 'SSH';
229
230         if ($service =~ m/$act_regex/ and ($act_status eq 'ALL' or $status =~ m/$act_status/)) {
231            $remote_db{$act_name} ||= [];
232            push @{$remote_db{$act_name}}, $hostname;
233            $remote_flag++;
234            }
235
236         # check depend service otherwise
237         $remote_sshdown{$act_depend} ||= {};
238         $remote_sshdown{$act_depend}->{$hostname}++ if $service =~ m/$act_depend/;
239         }
240
241      $htmlpage .= "' $stylecolor>$downtime days</td>\n";
242      $htmlpage .= " </tr>\n";
243      }
244
245   $htmlpage .= "</table>\n";
246   close $nagios_cmd;
247
248   # host down
249   if (%hostdown) {
250      $htmlpage .= "<br />\n";
251      $htmlpage .= "<table border='1'>\n";
252      HOST_DOWN:
253      for my $host (sort keys %hostdown) {
254         my $host_stat = $hostdown{$host};
255         my $hostname = $host_stat->host_name;
256         my $downtime = downtime($host_stat->last_state_change);
257         my $color = alertcolor('#F88888', $downtime);
258         $htmlpage .= " <tr style='background:$color'>\n";
259         $htmlpage .= "  <td><a class='hoop' href=\"$config->{'nagios-server'}{'status-cgi'}?host=" . uri_encode($hostname) . "\">$hostname</a></td>\n";
260         my @host_service;
261         for my $srv ($log->list_services_on_host($host)) {
262            push @host_service, $log->service($host, $srv)->service_description;
263            }
264         $htmlpage .= "  <td><small>" . join(', ', @host_service) . "</small></td>\n";
265         $htmlpage .= "  <td style='text-align:right;'>$downtime days</td>\n";
266         $htmlpage .= " </tr>\n";
267         }
268      $htmlpage .= "</table>\n";
269      }
270
271   # remote action
272   if ($remote_flag) {
273      require Nagios::Object::Config;
274      my $parser = Nagios::Object::Config->new();
275      $parser->parse("/var/cache/nagios3/objects.cache");
276
277      $htmlpage .= "<div class='action'>\n";
278      REMOTE_ACTION:
279      for my $act_name (keys %remote_db) {
280         my $act_depend = $config->{'remote-action'}{$act_name}{'depend'} || 'SSH';
281
282         my @action = grep !exists $remote_sshdown{$act_depend}->{$_}, @{$remote_db{$act_name}};
283         if (@action) {
284            my $srv_title = $config->{'remote-action'}{$act_name}{'title'} || "Action: $act_name";
285            $htmlpage .= "<h2>$srv_title</h2>\n";
286            $htmlpage .= "<pre>\n";
287            my $remote_action = $config->{'remote-action'}{$act_name}{'command'};
288            $remote_action = $config->{'remote-action'}{$act_name}{'command-one'}
289               if @action == 1 and exists $config->{'remote-action'}{$act_name}{'command-one'};
290            my @hosts;
291            for my $host (@action) {
292               my $object = $parser->find_object("$host", "Nagios::Host");
293               push @hosts, hostmapping($object->address =~ s/\..*$//r);
294               }
295            my $hosts_list = join ' ', @hosts;
296            $htmlpage .= ' ' . $remote_action =~ s{\%m}{$hosts_list}r;
297            $htmlpage .= "</pre>\n";
298            }
299         }
300      $htmlpage .= "</div>\n";
301      }
302   }
303
304$htmlpage .= <<"ENDH";
305<hr clear="all">
306<div class="footer">
307 <b><a href="http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/NagiosVelvice">Velvice</a>
308   - version: $VERSION</b>
309   (<a href="http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.html">online manual</a>)
310   - Written by Gabriel Moreau
311 <ul>
312  <li>Licence GNU GPL version 2 or later and Perl equivalent</li>
313  <li>Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France</li>
314 </ul>
315</div>
316</body>
317</html>
318ENDH
319
320print $htmlpage;
321
322# delayed future check
323if (@futurecheck) {
324   sleep 2;
325   my $nagios_cmd;
326   open $nagios_cmd, '>>', $config->{'nagios-server'}{'nagios-cmd'} or die "Can't open file filename: $!";
327   print $nagios_cmd "$_\n" for @futurecheck;
328   close $nagios_cmd;
329   }
330
331__END__
332
333
334=head1 NAME
335
336velvice.cgi - nagios velvice alert panel
337
338=head1 USAGE
339
340 velvice.cgi
341 velvice.cgi?check=XXX
342
343
344=head1 DESCRIPTION
345
346=begin html
347
348<img width="700" alt="Nagios Velvice Alert Panel" title="Nagios Velvice Alert Panel" style="float:right" src="velvice.png" />
349
350=end html
351
352Nagios VELVICE is an acronym for "Nagios leVEL serVICE status".
353
354The Nagios web page is sometimes very graphically charged
355and does not necessarily contain the information you need at a glance.
356For example, it is quite complicated to restart controls on multiple hosts in one click.
357
358For example, a server that is down should take only one line and not one per service...
359Similarly, a service that has been down for 5 minutes or since yesterday
360has more weight than a service that has fallen for 15 days.
361
362With Velvice Panel, a broken down server takes only one line.
363Services that have been falling for a long time gradually lose their color and become pastel colors.
364
365With Velvice Panel, it is possible through a single click
366to redo a check of all services that are in the CRITICAL state.
367Similarly, it is possible to restart a check on all SSH services in breakdowns ...
368In order not to clog the Nagios server, checks are shifted by 2 seconds in time.
369
370There is also a link to the web page of the main Nagios server.
371For each computer, you have a direct link to its dedicated web page on this server.
372
373
374=head1 CONFIGURATION FILE SPECIFICATION
375
376The configuration file must be F</etc/nagios3/velvice.yml>.
377This is not a required file.
378The file is in YAML format because this is a human-readable text file style.
379Other formats could have been Plain XML, RDF, JSON... but they are much less readable.
380
381You can find in the software nagios-velvice an example of configuration:
382L<velvice.sample.yml|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.sample.yml>.
383This one is in fact the master reference specification!
384
385The main keys C<nagios-server> and C<color-downtime> have good default values.
386No secondary key is required...
387The Velvice script try hard to replace ~ by the good value automatically.
388
389 nagios-server:
390   status-file: /var/cache/nagios3/status.dat
391   nagios-cmd:  /var/lib/nagios3/rw/nagios.cmd
392   portal-url:  ~/nagios3/
393   status-cgi:  ~/cgi-bin/nagios3/status.cgi
394   stylesheets: ~/nagios3/stylesheets
395
396The background color of the faulty service line display remains stable with a bright color for at least 3 days.
397Then, it decreases and becomes pastel after 53 days with an intensity of 70% (100% is white and 0% is black).
398
399 color-downtime:
400   day-min:  3
401   day-max: 50
402   factor:   0.7
403
404With key C<host-mapping>,
405it's good to map C<localhost> to the real name of the computer (hostname).
406
407 host-mapping:
408   localhost:  srv-nagios
409   toto:       titi
410
411The only important key is C<remote-action>.
412You can affiliate as many subkeys as you want.
413Let's take an example:
414
415 remote-action:
416   oom-killer:
417     regex: ^OOM Killer
418     title:  OOM Killer
419     command:     tssh -c 'sudo rm /var/lib/nagios3/nagios_oom_killer.log' %m
420     command-one: ssh %m 'sudo rm /var/lib/nagios3/nagios_oom_killer.log'
421     depend: ^SSH
422     status: ALL
423     style: bold
424
425C<oom-killer> is just a key for your remote action.
426The regex is used to find which service has a problem...
427The title is use in the result web page (not mandatory - otherwise, it will be C<Action: oom-killer>).
428The C<command> is just written on this web page.
429You have the responsibility to copy / cut it on a terminal.
430For security reasons, the nagios server does not have the right to launch the command on the remote host.
431The wildcard C<%m> is replaced by the list of the host (separated by the space).
432Sometime, the command could be different if there is only one computer (just SSH and no parallel SSH).
433If your command is based on SSH,
434you can have an SSH action only if the remote SSH is running.
435So you can make the remote action depend on the SSH service through a regular expression of your choice.
436
437The last two keys.
438The C<status> key is for CRITICAL or WARNING (or ALL).
439The key C<style> is there to mark in bold the service in error on the web page.
440
441=head1 SEE ALSO
442
443yamllint(1), ysh(1), YAML, Nagios::StatusLog, Color::Calc
444
445In Debian GNU/Linux distribution, packages for C<yamllint> and C<ysh> are:
446
447=over
448
449=item * C<yamllint> - Linter for YAML files (Python)
450
451=item * C<libyaml-shell-perl> - YAML test shell (Perl)
452
453=back
454
455
456Own project ressources:
457
458=over
459
460=item * L<Web site|http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/NagiosVelvice>
461
462=item * L<Online Manual|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.html>
463
464=item * L<SVN repository|http://servforge.legi.grenoble-inp.fr/svn/soft-trokata/trunk/nagios-velvice>
465
466=back
467
468
469=head1 VERSION
470
471$Id: velvice.cgi 286 2018-07-16 17:36:34Z g7moreau $
472
473
474=head1 AUTHOR
475
476Written by Gabriel Moreau <Gabriel.Moreau(A)univ-grenoble-alpes.fr>, LEGI UMR 5519, CNRS, Grenoble - France
477
478
479=head1 LICENSE AND COPYRIGHT
480
481Licence GNU GPL version 2 or later and Perl equivalent
482
483Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
Note: See TracBrowser for help on using the repository browser.