source: trunk/nagios-velvice/velvice.cgi @ 287

Last change on this file since 287 was 287, checked in by g7moreau, 6 years ago

Continue replacing check with red arrow

  • Property svn:keywords set to Id
File size: 17.4 KB
Line 
1#!/usr/bin/env perl
2#
3# 2014/05/15 Gabriel Moreau <Gabriel.Moreau@univ-grenoble-alpes.fr>
4# 2017/06/22 Gabriel Moreau - big update
5# 2018/06/25 Gabriel Moreau - make velvice generic
6#
7# velvice.cgi
8# Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
9#
10# Need NagiosStatus http://exchange.nagios.org/directory/Addons/APIs/Perl/NagiosStatus-2Epm/details
11# Possible command http://old.nagios.org/developerinfo/externalcommands/commandlist.php
12#
13# apt-get install perl-modules libnagios-object-perl libhtml-parser-perl liburi-encode-perl libcolor-calc-perl libyaml-syck-perl
14
15use strict;
16use warnings;
17use version; our $VERSION = version->declare('0.8.6');
18
19use CGI;
20use HTML::Entities ();
21use Nagios::StatusLog;
22use URI::Encode qw(uri_encode uri_decode);
23use Color::Calc ();
24use YAML::Syck;
25
26my $query           = CGI->new();
27my $cgi_check       = uri_decode($query->param('check'));
28my $cgi_script_name = $query->script_name();
29my $cgi_path        = $cgi_script_name =~ s{/[^/]+\.cgi$}{}r;
30undef $query;
31
32my $config = {};
33$config = YAML::Syck::LoadFile('/etc/nagios3/velvice.yml') if -e '/etc/nagios3/velvice.yml';
34$config->{'nagios-server'}                ||= {};
35$config->{'nagios-server'}{'status-file'} ||= '/var/cache/nagios3/status.dat';
36$config->{'nagios-server'}{'nagios-cmd'}  ||= '/var/lib/nagios3/rw/nagios.cmd';
37$config->{'nagios-server'}{'portal-url'}  ||= $cgi_path =~ s{/cgi-bin/}{/}r . '/';
38$config->{'nagios-server'}{'status-cgi'}  ||= "$cgi_path/status.cgi";
39$config->{'nagios-server'}{'stylesheets'} ||= $config->{'nagios-server'}{'portal-url'} =~ s{/?$}{/stylesheets}r;
40$config->{'nagios-server'}{'image'}       ||= $config->{'nagios-server'}{'portal-url'} =~ s{/?$}{/image}r;
41$config->{'host-mapping'}                 ||= {};
42$config->{'color-downtime'}               ||= {};
43$config->{'color-downtime'}{'day-min'}    ||=  3;
44$config->{'color-downtime'}{'day-max'}    ||= 50;
45$config->{'color-downtime'}{'factor'}     ||=  0.7;
46$config->{'remote-action'}                ||= {};
47
48my $log = Nagios::StatusLog->new(
49   Filename => $config->{'nagios-server'}{'status-file'},
50   Version  => 3.0
51   );
52
53sub hostmapping {
54   my $host = shift;
55
56   return exists $config->{'host-mapping'}{$host} ? $config->{'host-mapping'}{$host} : $host;
57   }
58
59sub downtime {
60   my ($time_change) = @_;
61
62   my $now = time;
63   return sprintf '%.1f', ($now - $time_change) / (60 * 3600);
64   }
65
66sub alertcolor {
67   my ($color, $downtime) = @_;
68
69   $downtime = $downtime - $config->{'color-downtime'}{'day-min'}; # same color first days
70   $downtime = $config->{'color-downtime'}{'day-max'} if $downtime > $config->{'color-downtime'}{'day-max'}; # max 50 days for color
71   $downtime =  0 if $downtime <  0;
72
73   my $factor = ($downtime * $config->{'color-downtime'}{'factor'}) / $config->{'color-downtime'}{'day-max'};
74   return Color::Calc::color_light_html($color, $factor);
75   }
76
77sub nosbreak {
78   my ($str) = @_;
79   
80   return $str =~ s/\s/\&nbsp;/gr;
81   }
82
83my %hostdown;
84my @serviceproblems;
85my %hostcount;
86my @futurecheck;
87HOST:
88for my $host (sort $log->list_hosts()) {
89   my $host_stat = $log->host($host);
90
91   if ($host_stat->status eq 'DOWN') {TESTIF:{
92      for my $srv ($log->list_services_on_host($host)) {
93         last TESTIF if $log->service($host, $srv)->status eq 'OK' or $log->service($host, $srv)->status eq 'PENDING';
94         }
95
96      $hostdown{$host} = $host_stat;
97      next HOST;
98      }}
99
100   for my $srv ($log->list_services_on_host($host)) {
101      if ($log->service($host, $srv)->status ne 'OK') {
102         push @serviceproblems, $log->service($host, $srv);
103         $hostcount{$host}++;
104         }
105      }
106   }
107
108my $now = time;
109my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime $now;
110$year += 1900;
111$mon++;
112my $date = sprintf '%04i-%02i-%02i %02i:%02i', $year, $mon, $mday, $hour, $min;
113
114my $htmlpage = <<"ENDH";
115Content-Type: text/html
116
117<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
118<html lang="en">
119<head>
120 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
121 <title>Nagios  Velvice</title>
122 <link rel="stylesheet"    type="text/css"  href="$config->{'nagios-server'}{'stylesheets'}/velvice.css">
123 <link rel="shortcut icon" type="image/ico" href="$config->{'nagios-server'}{'image'}/favicon.ico">
124</head>
125<body>
126<div class="header">
127 <h1>
128  <ul>
129    <li>Nagios Velvice Alert Panel : <a href="$config->{'nagios-server'}{'portal-url'}">Core Server</a></li>
130    <li><small><a id="refresh" href="$cgi_script_name">$date</a></small></li>
131  </ul>
132 </h1>
133</div>
134ENDH
135
136my %service_name   = ();
137my %service_status = ();
138for my $srv (@serviceproblems) {
139   $service_name{$srv->service_description}++;
140   $service_status{$srv->status}++;
141   }
142
143if (scalar @serviceproblems == 0) {
144   $htmlpage .= "<p>No alert to recheck.</p>\n";
145   }
146else {
147
148   $htmlpage .= "<p>Alert to recheck - Level:\n";
149   $htmlpage .= join ",\n",
150      " <span class='button'><a href='$cgi_script_name?check=all'>ALL</a><small>" . scalar(@serviceproblems) . '</small></span>',
151      map(" <span class='button'><a href='$cgi_script_name?check=" . lc(uri_encode($_)) . "'>$_</a><small>$service_status{$_}</small></span>",
152         sort keys %service_status);
153   $htmlpage .= ".\n";
154   $htmlpage .= " <br />\n";
155   $htmlpage .= " Service:\n";
156   $htmlpage .= join ",\n",
157      map(" <span class='button'><a href='$cgi_script_name?check=" . lc(uri_encode($_)) . "'>" . nosbreak($_) . "</a><small>$service_name{$_}</small></span>",
158         sort keys %service_name);
159   $htmlpage .= ".\n";
160   $htmlpage .= "</p>\n";
161
162   my $nagios_cmd;
163   open $nagios_cmd, '>>', $config->{'nagios-server'}{'nagios-cmd'} or die "Can't open file filename: $!";
164
165   my %remote_sshdown = ();
166   my %remote_db      = ();
167   my $remote_flag;
168
169   my $current_host  = '';
170   $htmlpage .= "<table border=\"1\">\n";
171   SERVICE_PROBLEMS:
172   for my $srv (@serviceproblems) {
173      my $hostname = $srv->host_name;
174      my $service  = $srv->service_description;
175      my $status   = $srv->status;
176      my $downtime = downtime($srv->last_state_change);
177      my $output   = HTML::Entities::encode($srv->plugin_output) =~ s/^[A-Z_\s]+?[:-]//r;
178
179      my $color = $status eq 'CRITICAL' ? '#F88888' : '#FFFF00';
180      $color = alertcolor($color, $downtime);
181      my $stylecolor = "style='background:$color;'";
182      $htmlpage .= " <tr>\n";
183      if ($hostname ne $current_host) {
184         $current_host = $hostname;
185         $htmlpage .= "  <td $stylecolor rowspan='$hostcount{$hostname}'>"
186            . "<a href=\"$cgi_script_name?check=" . uri_encode($hostname) . '">&#8623;</a></td>' . "\n";
187         $htmlpage .= "  <td $stylecolor class='hoop' rowspan='$hostcount{$hostname}'>"
188            . "<a href=\"$config->{'nagios-server'}{'status-cgi'}?host=" . uri_encode($hostname) . "\">$hostname</a></td>\n";
189         }
190
191      my $bold;
192      ACTION_STYLE:
193      for my $act_name (keys %{$config->{'remote-action'}}) {
194         my $act_regex = $config->{'remote-action'}{$act_name}{'regex'};
195         $bold++ if $service =~ m/$act_regex/ and $config->{'remote-action'}{$act_name}{'style'} eq 'bold';
196         }
197      $htmlpage .= $bold ? "  <td $stylecolor class='hoop bold'>" : "  <td $stylecolor class='hoop'>";
198      $htmlpage .= "$service</td>\n";
199
200      $htmlpage .= "  <td $stylecolor class='hoop'>$status</td>\n";
201      $htmlpage .= "  <td $stylecolor class='comment'>$output</td>\n";
202      $htmlpage .= "  <td $stylecolor class='days'>$downtime days</td>\n";
203
204      if (($cgi_check =~ m/all/i)
205            or ($cgi_check =~ m/^$service$/i)
206            or ($cgi_check =~ m/critical/i and $status eq 'CRITICAL')
207            or ($cgi_check =~ m/warning/i  and $status eq 'WARNING')
208            or ($cgi_check =~ m/pending/i  and $status eq 'PENDING')
209            or ($cgi_check eq $hostname    and $status =~ m/^(CRITICAL|WARNING|PENDING)$/)
210            ) {
211         $now++;
212         my $interval = $srv->next_check() - $srv->last_check() || 300;
213         $interval =  240 if $interval <  240;
214         $interval = 3000 if $interval > 3000;
215         my $future = $now + 20 + int(rand($interval - 20)); # 5 * 60 = 300
216
217         $htmlpage .= "  <td class='checking'>" . ($future - $now) . "</td>\n"
218         #$htmlpage .= " -- <b>CHECK</b> [$now/" . ($future - $now) . "]";
219         printf $nagios_cmd "[%lu] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%lu\n", $now, $hostname, $service, $now;
220         # delay future command
221         push @futurecheck, sprintf "[%lu] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%lu", $future, $hostname, $service, $future;
222         }
223
224      ACTION_PUSH_AND_DEPEND:
225      for my $act_name (keys %{$config->{'remote-action'}}) {
226         my $act_regex  = $config->{'remote-action'}{$act_name}{'regex'};
227         my $act_status = $config->{'remote-action'}{$act_name}{'status'} || 'ALL';
228         my $act_depend = $config->{'remote-action'}{$act_name}{'depend'} || 'SSH';
229
230         if ($service =~ m/$act_regex/ and ($act_status eq 'ALL' or $status =~ m/$act_status/)) {
231            $remote_db{$act_name} ||= [];
232            push @{$remote_db{$act_name}}, $hostname;
233            $remote_flag++;
234            }
235
236         # check depend service otherwise
237         $remote_sshdown{$act_depend} ||= {};
238         $remote_sshdown{$act_depend}->{$hostname}++ if $service =~ m/$act_depend/;
239         }
240
241      $htmlpage .= " </tr>\n";
242      }
243
244   $htmlpage .= "</table>\n";
245   close $nagios_cmd;
246
247   # host down
248   if (%hostdown) {
249      $htmlpage .= "<br />\n";
250      $htmlpage .= "<table border='1'>\n";
251      HOST_DOWN:
252      for my $host (sort keys %hostdown) {
253         my $host_stat = $hostdown{$host};
254         my $hostname = $host_stat->host_name;
255         my $downtime = downtime($host_stat->last_state_change);
256         my $color = alertcolor('#F88888', $downtime);
257         $htmlpage .= " <tr style='background:$color'>\n";
258         $htmlpage .= "  <td><a class='hoop' href=\"$config->{'nagios-server'}{'status-cgi'}?host=" . uri_encode($hostname) . "\">$hostname</a></td>\n";
259         my @host_service;
260         for my $srv ($log->list_services_on_host($host)) {
261            push @host_service, $log->service($host, $srv)->service_description;
262            }
263         $htmlpage .= "  <td><small>" . join(', ', @host_service) . "</small></td>\n";
264         $htmlpage .= "  <td style='text-align:right;'>$downtime days</td>\n";
265         $htmlpage .= " </tr>\n";
266         }
267      $htmlpage .= "</table>\n";
268      }
269
270   # remote action
271   if ($remote_flag) {
272      require Nagios::Object::Config;
273      my $parser = Nagios::Object::Config->new();
274      $parser->parse("/var/cache/nagios3/objects.cache");
275
276      $htmlpage .= "<div class='action'>\n";
277      REMOTE_ACTION:
278      for my $act_name (keys %remote_db) {
279         my $act_depend = $config->{'remote-action'}{$act_name}{'depend'} || 'SSH';
280
281         my @action = grep !exists $remote_sshdown{$act_depend}->{$_}, @{$remote_db{$act_name}};
282         if (@action) {
283            my $srv_title = $config->{'remote-action'}{$act_name}{'title'} || "Action: $act_name";
284            $htmlpage .= "<h2>$srv_title</h2>\n";
285            $htmlpage .= "<pre>\n";
286            my $remote_action = $config->{'remote-action'}{$act_name}{'command'};
287            $remote_action = $config->{'remote-action'}{$act_name}{'command-one'}
288               if @action == 1 and exists $config->{'remote-action'}{$act_name}{'command-one'};
289            my @hosts;
290            for my $host (@action) {
291               my $object = $parser->find_object("$host", "Nagios::Host");
292               push @hosts, hostmapping($object->address =~ s/\..*$//r);
293               }
294            my $hosts_list = join ' ', @hosts;
295            $htmlpage .= ' ' . $remote_action =~ s{\%m}{$hosts_list}r;
296            $htmlpage .= "</pre>\n";
297            }
298         }
299      $htmlpage .= "</div>\n";
300      }
301   }
302
303$htmlpage .= <<"ENDH";
304<hr clear="all">
305<div class="footer">
306 <b><a href="http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/NagiosVelvice">Velvice</a>
307   - version: $VERSION</b>
308   (<a href="http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.html">online manual</a>)
309   - Written by Gabriel Moreau
310 <ul>
311  <li>Licence GNU GPL version 2 or later and Perl equivalent</li>
312  <li>Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France</li>
313 </ul>
314</div>
315</body>
316</html>
317ENDH
318
319print $htmlpage;
320
321# delayed future check
322if (@futurecheck) {
323   sleep 2;
324   my $nagios_cmd;
325   open $nagios_cmd, '>>', $config->{'nagios-server'}{'nagios-cmd'} or die "Can't open file filename: $!";
326   print $nagios_cmd "$_\n" for @futurecheck;
327   close $nagios_cmd;
328   }
329
330__END__
331
332
333=head1 NAME
334
335velvice.cgi - nagios velvice alert panel
336
337=head1 USAGE
338
339 velvice.cgi
340 velvice.cgi?check=XXX
341
342
343=head1 DESCRIPTION
344
345=begin html
346
347<img width="700" alt="Nagios Velvice Alert Panel" title="Nagios Velvice Alert Panel" style="float:right" src="velvice.png" />
348
349=end html
350
351Nagios VELVICE is an acronym for "Nagios leVEL serVICE status".
352
353The Nagios web page is sometimes very graphically charged
354and does not necessarily contain the information you need at a glance.
355For example, it is quite complicated to restart controls on multiple hosts in one click.
356
357For example, a server that is down should take only one line and not one per service...
358Similarly, a service that has been down for 5 minutes or since yesterday
359has more weight than a service that has fallen for 15 days.
360
361With Velvice Panel, a broken down server takes only one line.
362Services that have been falling for a long time gradually lose their color and become pastel colors.
363
364With Velvice Panel, it is possible through a single click
365to redo a check of all services that are in the CRITICAL state.
366Similarly, it is possible to restart a check on all SSH services in breakdowns ...
367In order not to clog the Nagios server, checks are shifted by 2 seconds in time.
368
369There is also a link to the web page of the main Nagios server.
370For each computer, you have a direct link to its dedicated web page on this server.
371
372
373=head1 CONFIGURATION FILE SPECIFICATION
374
375The configuration file must be F</etc/nagios3/velvice.yml>.
376This is not a required file.
377The file is in YAML format because this is a human-readable text file style.
378Other formats could have been Plain XML, RDF, JSON... but they are much less readable.
379
380You can find in the software nagios-velvice an example of configuration:
381L<velvice.sample.yml|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.sample.yml>.
382This one is in fact the master reference specification!
383
384The main keys C<nagios-server> and C<color-downtime> have good default values.
385No secondary key is required...
386The Velvice script try hard to replace ~ by the good value automatically.
387
388 nagios-server:
389   status-file: /var/cache/nagios3/status.dat
390   nagios-cmd:  /var/lib/nagios3/rw/nagios.cmd
391   portal-url:  ~/nagios3/
392   status-cgi:  ~/cgi-bin/nagios3/status.cgi
393   stylesheets: ~/nagios3/stylesheets
394
395The background color of the faulty service line display remains stable with a bright color for at least 3 days.
396Then, it decreases and becomes pastel after 53 days with an intensity of 70% (100% is white and 0% is black).
397
398 color-downtime:
399   day-min:  3
400   day-max: 50
401   factor:   0.7
402
403With key C<host-mapping>,
404it's good to map C<localhost> to the real name of the computer (hostname).
405
406 host-mapping:
407   localhost:  srv-nagios
408   toto:       titi
409
410The only important key is C<remote-action>.
411You can affiliate as many subkeys as you want.
412Let's take an example:
413
414 remote-action:
415   oom-killer:
416     regex: ^OOM Killer
417     title:  OOM Killer
418     command:     tssh -c 'sudo rm /var/lib/nagios3/nagios_oom_killer.log' %m
419     command-one: ssh %m 'sudo rm /var/lib/nagios3/nagios_oom_killer.log'
420     depend: ^SSH
421     status: ALL
422     style: bold
423
424C<oom-killer> is just a key for your remote action.
425The regex is used to find which service has a problem...
426The title is use in the result web page (not mandatory - otherwise, it will be C<Action: oom-killer>).
427The C<command> is just written on this web page.
428You have the responsibility to copy / cut it on a terminal.
429For security reasons, the nagios server does not have the right to launch the command on the remote host.
430The wildcard C<%m> is replaced by the list of the host (separated by the space).
431Sometime, the command could be different if there is only one computer (just SSH and no parallel SSH).
432If your command is based on SSH,
433you can have an SSH action only if the remote SSH is running.
434So you can make the remote action depend on the SSH service through a regular expression of your choice.
435
436The last two keys.
437The C<status> key is for CRITICAL or WARNING (or ALL).
438The key C<style> is there to mark in bold the service in error on the web page.
439
440=head1 SEE ALSO
441
442yamllint(1), ysh(1), YAML, Nagios::StatusLog, Color::Calc
443
444In Debian GNU/Linux distribution, packages for C<yamllint> and C<ysh> are:
445
446=over
447
448=item * C<yamllint> - Linter for YAML files (Python)
449
450=item * C<libyaml-shell-perl> - YAML test shell (Perl)
451
452=back
453
454
455Own project ressources:
456
457=over
458
459=item * L<Web site|http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/NagiosVelvice>
460
461=item * L<Online Manual|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.html>
462
463=item * L<SVN repository|http://servforge.legi.grenoble-inp.fr/svn/soft-trokata/trunk/nagios-velvice>
464
465=back
466
467
468=head1 VERSION
469
470$Id: velvice.cgi 287 2018-07-16 17:47:01Z g7moreau $
471
472
473=head1 AUTHOR
474
475Written by Gabriel Moreau <Gabriel.Moreau(A)univ-grenoble-alpes.fr>, LEGI UMR 5519, CNRS, Grenoble - France
476
477
478=head1 LICENSE AND COPYRIGHT
479
480Licence GNU GPL version 2 or later and Perl equivalent
481
482Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
Note: See TracBrowser for help on using the repository browser.