source: trunk/nagios-velvice/velvice.cgi @ 284

Last change on this file since 284 was 284, checked in by g7moreau, 6 years ago
  • No break text on button
  • Property svn:keywords set to Id
File size: 17.3 KB
Line 
1#!/usr/bin/env perl
2#
3# 2014/05/15 Gabriel Moreau <Gabriel.Moreau@univ-grenoble-alpes.fr>
4# 2017/06/22 Gabriel Moreau - big update
5# 2018/06/25 Gabriel Moreau - make velvice generic
6#
7# velvice.cgi
8# Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
9#
10# Need NagiosStatus http://exchange.nagios.org/directory/Addons/APIs/Perl/NagiosStatus-2Epm/details
11# Possible command http://old.nagios.org/developerinfo/externalcommands/commandlist.php
12#
13# apt-get install perl-modules libnagios-object-perl libhtml-parser-perl liburi-encode-perl libcolor-calc-perl libyaml-syck-perl
14
15use strict;
16use warnings;
17use version; our $VERSION = version->declare('0.8.3');
18
19use CGI;
20use HTML::Entities ();
21use Nagios::StatusLog;
22use URI::Encode qw(uri_encode uri_decode);
23use Color::Calc ();
24use YAML::Syck;
25
26my $query           = CGI->new();
27my $cgi_check       = uri_decode($query->param('check'));
28my $cgi_script_name = $query->script_name();
29my $cgi_path        = $cgi_script_name =~ s{/[^/]+\.cgi$}{}r;
30undef $query;
31
32my $config = {};
33$config = YAML::Syck::LoadFile('/etc/nagios3/velvice.yml') if -e '/etc/nagios3/velvice.yml';
34$config->{'nagios-server'}                ||= {};
35$config->{'nagios-server'}{'status-file'} ||= '/var/cache/nagios3/status.dat';
36$config->{'nagios-server'}{'nagios-cmd'}  ||= '/var/lib/nagios3/rw/nagios.cmd';
37$config->{'nagios-server'}{'portal-url'}  ||= $cgi_path =~ s{/cgi-bin/}{/}r . '/';
38$config->{'nagios-server'}{'status-cgi'}  ||= "$cgi_path/status.cgi";
39$config->{'nagios-server'}{'stylesheets'} ||= $config->{'nagios-server'}{'portal-url'} =~ s{/?$}{/stylesheets}r;
40$config->{'nagios-server'}{'image'}       ||= $config->{'nagios-server'}{'portal-url'} =~ s{/?$}{/image}r;
41$config->{'host-mapping'}                 ||= {};
42$config->{'color-downtime'}               ||= {};
43$config->{'color-downtime'}{'day-min'}    ||=  3;
44$config->{'color-downtime'}{'day-max'}    ||= 50;
45$config->{'color-downtime'}{'factor'}     ||=  0.7;
46$config->{'remote-action'}                ||= {};
47
48my $log = Nagios::StatusLog->new(
49   Filename => $config->{'nagios-server'}{'status-file'},
50   Version  => 3.0
51   );
52
53sub hostmapping {
54   my $host = shift;
55
56   return exists $config->{'host-mapping'}{$host} ? $config->{'host-mapping'}{$host} : $host;
57   }
58
59sub downtime {
60   my ($time_change) = @_;
61
62   my $now = time;
63   return sprintf '%.1f', ($now - $time_change) / (60 * 3600);
64   }
65
66sub alertcolor {
67   my ($color, $downtime) = @_;
68
69   $downtime = $downtime - $config->{'color-downtime'}{'day-min'}; # same color first days
70   $downtime = $config->{'color-downtime'}{'day-max'} if $downtime > $config->{'color-downtime'}{'day-max'}; # max 50 days for color
71   $downtime =  0 if $downtime <  0;
72
73   my $factor = ($downtime * $config->{'color-downtime'}{'factor'}) / $config->{'color-downtime'}{'day-max'};
74   return Color::Calc::color_light_html($color, $factor);
75   }
76
77sub nosbreak {
78   my ($str) = @_;
79   
80   return $str =~ s/\s/\&nbps;/gr;
81   }
82
83my %hostdown;
84my @serviceproblems;
85my %hostcount;
86my @futurecheck;
87HOST:
88for my $host (sort $log->list_hosts()) {
89   my $host_stat = $log->host($host);
90
91   if ($host_stat->status eq 'DOWN') {TESTIF:{
92      for my $srv ($log->list_services_on_host($host)) {
93         last TESTIF if $log->service($host, $srv)->status eq 'OK' or $log->service($host, $srv)->status eq 'PENDING';
94         }
95
96      $hostdown{$host} = $host_stat;
97      next HOST;
98      }}
99
100   for my $srv ($log->list_services_on_host($host)) {
101      if ($log->service($host, $srv)->status ne 'OK') {
102         push @serviceproblems, $log->service($host, $srv);
103         $hostcount{$host}++;
104         }
105      }
106   }
107
108my $now = time;
109my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime $now;
110$year += 1900;
111$mon++;
112my $date = sprintf '%04i-%02i-%02i %02i:%02i', $year, $mon, $mday, $hour, $min;
113
114my $htmlpage = <<"ENDH";
115Content-Type: text/html
116
117<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
118<html lang="en">
119<head>
120 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
121 <title>Nagios  Velvice</title>
122 <link rel="stylesheet"    type="text/css"  href="$config->{'nagios-server'}{'stylesheets'}/velvice.css">
123 <link rel="shortcut icon" type="image/ico" href="$config->{'nagios-server'}{'image'}/favicon.ico">
124</head>
125<body>
126<div class="header">
127 <h1>
128  <ul>
129    <li>Nagios Velvice Alert Panel : <a href="$config->{'nagios-server'}{'portal-url'}">Core Server</a></li>
130    <li><small><a id="refresh" href="$cgi_script_name">$date</a></small></li>
131  </ul>
132 </h1>
133</div>
134ENDH
135
136my %service_name   = ();
137my %service_status = ();
138for my $srv (@serviceproblems) {
139   $service_name{$srv->service_description}++;
140   $service_status{$srv->status}++;
141   }
142
143if (scalar @serviceproblems == 0) {
144   $htmlpage .= "<p>No alert to recheck.</p>\n";
145   }
146else {
147
148   $htmlpage .= "<p>Alert to recheck - Level:\n";
149   $htmlpage .= join ",\n",
150      " <span class='button'><a href='$cgi_script_name?check=all'>ALL</a><small>" . scalar(@serviceproblems) . '</small></span>',
151      map(" <span class='button'><a href='$cgi_script_name?check=" . lc(uri_encode($_)) . "'>$_</a><small>$service_status{$_}</small></span>",
152         sort keys %service_status);
153   $htmlpage .= ".\n";
154   $htmlpage .= " <br />\n";
155   $htmlpage .= " Service:\n";
156   $htmlpage .= join ",\n",
157      map(" <span class='button'><a href='$cgi_script_name?check=" . lc(uri_encode($_)) . "'>" . nosbreak($_) . "</a><small>$service_name{$_}</small></span>",
158         sort keys %service_name);
159   $htmlpage .= ".\n";
160   $htmlpage .= "</p>\n";
161
162   my $nagios_cmd;
163   open $nagios_cmd, '>>', $config->{'nagios-server'}{'nagios-cmd'} or die "Can't open file filename: $!";
164
165   my %remote_sshdown = ();
166   my %remote_db      = ();
167   my $remote_flag;
168
169   my $current_host  = '';
170   $htmlpage .= "<table border=\"1\">\n";
171   SERVICE_PROBLEMS:
172   for my $srv (@serviceproblems) {
173      my $hostname = $srv->host_name;
174      my $service  = $srv->service_description;
175      my $status   = $srv->status;
176      my $downtime = downtime($srv->last_state_change);
177      my $output   = HTML::Entities::encode($srv->plugin_output) =~ s/^[A-Z_\s]+?[:-]//r;
178
179      my $color = $status eq 'CRITICAL' ? '#F88888' : '#FFFF00';
180      $color = alertcolor($color, $downtime);
181      $htmlpage .= " <tr style='background:$color;'>\n";
182      if ($hostname ne $current_host) {
183         $current_host = $hostname;
184         $htmlpage .= "  <td rowspan='$hostcount{$hostname}' style='vertical-align:middle;'>"
185            . "<a href=\"$cgi_script_name?check=" . uri_encode($hostname) . '">&#8623;</a></td>' . "\n";
186         $htmlpage .= "  <td class='hoop' rowspan='$hostcount{$hostname}' style='vertical-align:middle;'>"
187            . "<a href=\"$config->{'nagios-server'}{'status-cgi'}?host=" . uri_encode($hostname) . "\">$hostname</a></td>\n";
188         }
189
190      my $bold;
191      ACTION_STYLE:
192      for my $act_name (keys %{$config->{'remote-action'}}) {
193         my $act_regex = $config->{'remote-action'}{$act_name}{'regex'};
194         $bold++ if $service =~ m/$act_regex/ and $config->{'remote-action'}{$act_name}{'style'} eq 'bold';
195         }
196      $htmlpage .= $bold ? '  <td class="hoop bold">' : '  <td class="hoop">';
197      $htmlpage .= "$service</td>\n";
198
199      $htmlpage .= "  <td class='hoop'>$status</td>\n";
200      $htmlpage .= "  <td style='max-width:60%;'><small>$output";
201
202      if (($cgi_check =~ m/all/i)
203            or ($cgi_check =~ m/^$service$/i)
204            or ($cgi_check =~ m/critical/i and $status eq 'CRITICAL')
205            or ($cgi_check =~ m/warning/i  and $status eq 'WARNING')
206            or ($cgi_check =~ m/pending/i  and $status eq 'PENDING')
207            or ($cgi_check eq $hostname    and $status =~ m/^(CRITICAL|WARNING|PENDING)$/)
208            ) {
209         $now++;
210         my $interval = $srv->next_check() - $srv->last_check() || 300;
211         $interval =  240 if $interval <  240;
212         $interval = 3000 if $interval > 3000;
213         my $future = $now + 20 + int(rand($interval - 20)); # 5 * 60 = 300
214
215         $htmlpage .= " -- <b>CHECK</b> [$now/" . ($future - $now) . "]";
216         printf $nagios_cmd "[%lu] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%lu\n", $now, $hostname, $service, $now;
217         # delay future command
218         push @futurecheck, sprintf "[%lu] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%lu", $future, $hostname, $service, $future;
219         }
220
221      ACTION_PUSH_AND_DEPEND:
222      for my $act_name (keys %{$config->{'remote-action'}}) {
223         my $act_regex  = $config->{'remote-action'}{$act_name}{'regex'};
224         my $act_status = $config->{'remote-action'}{$act_name}{'status'} || 'ALL';
225         my $act_depend = $config->{'remote-action'}{$act_name}{'depend'} || 'SSH';
226
227         if ($service =~ m/$act_regex/ and ($act_status eq 'ALL' or $status =~ m/$act_status/)) {
228            $remote_db{$act_name} ||= [];
229            push @{$remote_db{$act_name}}, $hostname;
230            $remote_flag++;
231            }
232
233         # check depend service otherwise
234         $remote_sshdown{$act_depend} ||= {};
235         $remote_sshdown{$act_depend}->{$hostname}++ if $service =~ m/$act_depend/;
236         }
237
238      $htmlpage .= "</small></td>\n";
239      $htmlpage .= "  <td style='text-align:right;'>$downtime days</td>\n";
240      $htmlpage .= " </tr>\n";
241      }
242
243   $htmlpage .= "</table>\n";
244   close $nagios_cmd;
245
246   # host down
247   if (%hostdown) {
248      $htmlpage .= "<br />\n";
249      $htmlpage .= "<table border='1'>\n";
250      HOST_DOWN:
251      for my $host (sort keys %hostdown) {
252         my $host_stat = $hostdown{$host};
253         my $hostname = $host_stat->host_name;
254         my $downtime = downtime($host_stat->last_state_change);
255         my $color = alertcolor('#F88888', $downtime);
256         $htmlpage .= " <tr style='background:$color'>\n";
257         $htmlpage .= "  <td><a class='hoop' href=\"$config->{'nagios-server'}{'status-cgi'}?host=" . uri_encode($hostname) . "\">$hostname</a></td>\n";
258         my @host_service;
259         for my $srv ($log->list_services_on_host($host)) {
260            push @host_service, $log->service($host, $srv)->service_description;
261            }
262         $htmlpage .= "  <td><small>" . join(', ', @host_service) . "</small></td>\n";
263         $htmlpage .= "  <td style='text-align:right;'>$downtime days</td>\n";
264         $htmlpage .= " </tr>\n";
265         }
266      $htmlpage .= "</table>\n";
267      }
268
269   # remote action
270   if ($remote_flag) {
271      require Nagios::Object::Config;
272      my $parser = Nagios::Object::Config->new();
273      $parser->parse("/var/cache/nagios3/objects.cache");
274
275      $htmlpage .= "<div class='action'>\n";
276      REMOTE_ACTION:
277      for my $act_name (keys %remote_db) {
278         my $act_depend = $config->{'remote-action'}{$act_name}{'depend'} || 'SSH';
279
280         my @action = grep !exists $remote_sshdown{$act_depend}->{$_}, @{$remote_db{$act_name}};
281         if (@action) {
282            my $srv_title = $config->{'remote-action'}{$act_name}{'title'} || "Action: $act_name";
283            $htmlpage .= "<h2>$srv_title</h2>\n";
284            $htmlpage .= "<pre>\n";
285            my $remote_action = $config->{'remote-action'}{$act_name}{'command'};
286            $remote_action = $config->{'remote-action'}{$act_name}{'command-one'}
287               if @action == 1 and exists $config->{'remote-action'}{$act_name}{'command-one'};
288            my @hosts;
289            for my $host (@action) {
290               my $object = $parser->find_object("$host", "Nagios::Host");
291               push @hosts, hostmapping($object->address =~ s/\..*$//r);
292               }
293            my $hosts_list = join ' ', @hosts;
294            $htmlpage .= ' ' . $remote_action =~ s{\%m}{$hosts_list}r;
295            $htmlpage .= "</pre>\n";
296            }
297         }
298      $htmlpage .= "</div>\n";
299      }
300   }
301
302$htmlpage .= <<"ENDH";
303<hr clear="all">
304<div class="footer">
305 <b><a href="http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/NagiosVelvice">Velvice</a>
306   - version: $VERSION</b>
307   (<a href="http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.html">online manual</a>)
308   - Written by Gabriel Moreau
309 <ul>
310  <li>Licence GNU GPL version 2 or later and Perl equivalent</li>
311  <li>Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France</li>
312 </ul>
313</div>
314</body>
315</html>
316ENDH
317
318print $htmlpage;
319
320# delayed future check
321if (@futurecheck) {
322   sleep 2;
323   my $nagios_cmd;
324   open $nagios_cmd, '>>', $config->{'nagios-server'}{'nagios-cmd'} or die "Can't open file filename: $!";
325   print $nagios_cmd "$_\n" for @futurecheck;
326   close $nagios_cmd;
327   }
328
329__END__
330
331
332=head1 NAME
333
334velvice.cgi - nagios velvice alert panel
335
336=head1 USAGE
337
338 velvice.cgi
339 velvice.cgi?check=XXX
340
341
342=head1 DESCRIPTION
343
344=begin html
345
346<img width="700" alt="Nagios Velvice Alert Panel" title="Nagios Velvice Alert Panel" style="float:right" src="velvice.png" />
347
348=end html
349
350Nagios VELVICE is an acronym for "Nagios leVEL serVICE status".
351
352The Nagios web page is sometimes very graphically charged
353and does not necessarily contain the information you need at a glance.
354For example, it is quite complicated to restart controls on multiple hosts in one click.
355
356For example, a server that is down should take only one line and not one per service...
357Similarly, a service that has been down for 5 minutes or since yesterday
358has more weight than a service that has fallen for 15 days.
359
360With Velvice Panel, a broken down server takes only one line.
361Services that have been falling for a long time gradually lose their color and become pastel colors.
362
363With Velvice Panel, it is possible through a single click
364to redo a check of all services that are in the CRITICAL state.
365Similarly, it is possible to restart a check on all SSH services in breakdowns ...
366In order not to clog the Nagios server, checks are shifted by 2 seconds in time.
367
368There is also a link to the web page of the main Nagios server.
369For each computer, you have a direct link to its dedicated web page on this server.
370
371
372=head1 CONFIGURATION FILE SPECIFICATION
373
374The configuration file must be F</etc/nagios3/velvice.yml>.
375This is not a required file.
376The file is in YAML format because this is a human-readable text file style.
377Other formats could have been Plain XML, RDF, JSON... but they are much less readable.
378
379You can find in the software nagios-velvice an example of configuration:
380L<velvice.sample.yml|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.sample.yml>.
381This one is in fact the master reference specification!
382
383The main keys C<nagios-server> and C<color-downtime> have good default values.
384No secondary key is required...
385The Velvice script try hard to replace ~ by the good value automatically.
386
387 nagios-server:
388   status-file: /var/cache/nagios3/status.dat
389   nagios-cmd:  /var/lib/nagios3/rw/nagios.cmd
390   portal-url:  ~/nagios3/
391   status-cgi:  ~/cgi-bin/nagios3/status.cgi
392   stylesheets: ~/nagios3/stylesheets
393
394The background color of the faulty service line display remains stable with a bright color for at least 3 days.
395Then, it decreases and becomes pastel after 53 days with an intensity of 70% (100% is white and 0% is black).
396
397 color-downtime:
398   day-min:  3
399   day-max: 50
400   factor:   0.7
401
402With key C<host-mapping>,
403it's good to map C<localhost> to the real name of the computer (hostname).
404
405 host-mapping:
406   localhost:  srv-nagios
407   toto:       titi
408
409The only important key is C<remote-action>.
410You can affiliate as many subkeys as you want.
411Let's take an example:
412
413 remote-action:
414   oom-killer:
415     regex: ^OOM Killer
416     title:  OOM Killer
417     command:     tssh -c 'sudo rm /var/lib/nagios3/nagios_oom_killer.log' %m
418     command-one: ssh %m 'sudo rm /var/lib/nagios3/nagios_oom_killer.log'
419     depend: ^SSH
420     status: ALL
421     style: bold
422
423C<oom-killer> is just a key for your remote action.
424The regex is used to find which service has a problem...
425The title is use in the result web page (not mandatory - otherwise, it will be C<Action: oom-killer>).
426The C<command> is just written on this web page.
427You have the responsibility to copy / cut it on a terminal.
428For security reasons, the nagios server does not have the right to launch the command on the remote host.
429The wildcard C<%m> is replaced by the list of the host (separated by the space).
430Sometime, the command could be different if there is only one computer (just SSH and no parallel SSH).
431If your command is based on SSH,
432you can have an SSH action only if the remote SSH is running.
433So you can make the remote action depend on the SSH service through a regular expression of your choice.
434
435The last two keys.
436The C<status> key is for CRITICAL or WARNING (or ALL).
437The key C<style> is there to mark in bold the service in error on the web page.
438
439=head1 SEE ALSO
440
441yamllint(1), ysh(1), YAML, Nagios::StatusLog, Color::Calc
442
443In Debian GNU/Linux distribution, packages for C<yamllint> and C<ysh> are:
444
445=over
446
447=item * C<yamllint> - Linter for YAML files (Python)
448
449=item * C<libyaml-shell-perl> - YAML test shell (Perl)
450
451=back
452
453
454Own project ressources:
455
456=over
457
458=item * L<Web site|http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/NagiosVelvice>
459
460=item * L<Online Manual|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.html>
461
462=item * L<SVN repository|http://servforge.legi.grenoble-inp.fr/svn/soft-trokata/trunk/nagios-velvice>
463
464=back
465
466
467=head1 VERSION
468
469$Id: velvice.cgi 284 2018-07-15 16:01:21Z g7moreau $
470
471
472=head1 AUTHOR
473
474Written by Gabriel Moreau <Gabriel.Moreau(A)univ-grenoble-alpes.fr>, LEGI UMR 5519, CNRS, Grenoble - France
475
476
477=head1 LICENSE AND COPYRIGHT
478
479Licence GNU GPL version 2 or later and Perl equivalent
480
481Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
Note: See TracBrowser for help on using the repository browser.