source: trunk/nagios-velvice/velvice.cgi @ 278

Last change on this file since 278 was 278, checked in by g7moreau, 6 years ago
  • Change update zone
  • Property svn:keywords set to Id
File size: 16.5 KB
Line 
1#!/usr/bin/env perl
2#
3# 2014/05/15 Gabriel Moreau <Gabriel.Moreau@univ-grenoble-alpes.fr>
4# 2017/06/22 Gabriel Moreau - big update
5# 2018/06/25 Gabriel Moreau - make velvice generic
6#
7# velvice.cgi
8# Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
9#
10# Need NagiosStatus http://exchange.nagios.org/directory/Addons/APIs/Perl/NagiosStatus-2Epm/details
11# Possible command http://old.nagios.org/developerinfo/externalcommands/commandlist.php
12#
13# apt-get install perl-modules libnagios-object-perl libhtml-parser-perl liburi-encode-perl libcolor-calc-perl libyaml-syck-perl
14
15use strict;
16use warnings;
17use version; our $VERSION = version->declare('0.7.11');
18
19use CGI;
20use HTML::Entities ();
21use Nagios::StatusLog;
22use URI::Encode qw(uri_encode uri_decode);
23use Color::Calc ();
24use YAML::Syck;
25
26my $query           = CGI->new();
27my $cgi_check       = uri_decode($query->param('check'));
28my $cgi_script_name = $query->script_name();
29my $cgi_path        = $cgi_script_name =~ s{/[^/]+\.cgi$}{}r;
30undef $query;
31
32my $config = {};
33$config = YAML::Syck::LoadFile('/etc/nagios3/velvice.yml') if -e '/etc/nagios3/velvice.yml';
34$config->{'nagios-server'}                ||= {};
35$config->{'nagios-server'}{'status-file'} ||= '/var/cache/nagios3/status.dat';
36$config->{'nagios-server'}{'nagios-cmd'}  ||= '/var/lib/nagios3/rw/nagios.cmd';
37$config->{'nagios-server'}{'portal-url'}  ||= $cgi_path =~ s{/cgi-bin/}{/}r . '/';
38$config->{'nagios-server'}{'status-cgi'}  ||= "$cgi_path/status.cgi";
39$config->{'nagios-server'}{'stylesheets'} ||= $config->{'nagios-server'}{'portal-url'} =~ s{/?$}{/stylesheets}r;
40$config->{'host-mapping'}                 ||= {};
41$config->{'color-downtime'}               ||= {};
42$config->{'color-downtime'}{'day-min'}    ||=  3;
43$config->{'color-downtime'}{'day-max'}    ||= 50;
44$config->{'color-downtime'}{'factor'}     ||=  0.7;
45$config->{'remote-action'}                ||= {};
46
47my $log = Nagios::StatusLog->new(
48   Filename => $config->{'nagios-server'}{'status-file'},
49   Version  => 3.0
50   );
51
52sub hostmapping {
53   my $host = shift;
54
55   return exists $config->{'host-mapping'}{$host} ? $config->{'host-mapping'}{$host} : $host;
56   }
57
58sub downtime {
59   my ($time_change) = @_;
60
61   my $now = time;
62   return sprintf '%.1f', ($now - $time_change) / (60 * 3600);
63   }
64
65sub alertcolor {
66   my ($color, $downtime) = @_;
67
68   $downtime = $downtime - $config->{'color-downtime'}{'day-min'}; # same color first days
69   $downtime = $config->{'color-downtime'}{'day-max'} if $downtime > $config->{'color-downtime'}{'day-max'}; # max 50 days for color
70   $downtime =  0 if $downtime <  0;
71
72   my $factor = ($downtime * $config->{'color-downtime'}{'factor'}) / $config->{'color-downtime'}{'day-max'};
73   return Color::Calc::color_light_html($color, $factor);
74   }
75
76my %hostdown;
77my @serviceproblems;
78my %hostcount;
79my @futurecheck;
80HOST:
81for my $host (sort $log->list_hosts()) {
82   my $host_stat = $log->host($host);
83
84   if ($host_stat->status eq 'DOWN') {TESTIF:{
85      for my $srv ($log->list_services_on_host($host)) {
86         last TESTIF if $log->service($host, $srv)->status eq 'OK' or $log->service($host, $srv)->status eq 'PENDING';
87         }
88
89      $hostdown{$host} = $host_stat;
90      next HOST;
91      }}
92
93   for my $srv ($log->list_services_on_host($host)) {
94      if ($log->service($host, $srv)->status ne 'OK') {
95         push @serviceproblems, $log->service($host, $srv);
96         $hostcount{$host}++;
97         }
98      }
99   }
100
101my $now = time;
102my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime $now;
103$year += 1900;
104$mon++;
105my $date = sprintf '%04i-%02i-%02i %02i:%02i', $year, $mon, $mday, $hour, $min;
106
107my $htmlpage = <<"ENDH";
108Content-Type: text/html
109
110<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
111<html lang="en">
112<head>
113 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
114 <title>Nagios  Velvice</title>
115 <link rel="stylesheet" type="text/css" href="$config->{'nagios-server'}{'stylesheets'}/velvice.css">
116</head>
117<body>
118<div class="header">
119 <h1>
120  <ul>
121    <li>Nagios Velvice Alert Panel : <a href="$config->{'nagios-server'}{'portal-url'}">Core Server</a></li>
122    <li><small><a id="refresh" href="$cgi_script_name">$date</a></small></li>
123  </ul>
124 </h1>
125</div>
126ENDH
127
128my %service_name   = ();
129my %service_status = ();
130for my $srv (@serviceproblems) {
131   $service_name{$srv->service_description}++;
132   $service_status{$srv->status}++;
133   }
134
135if (scalar @serviceproblems == 0) {
136   $htmlpage .= "<p>No alert to recheck.</p>\n";
137   }
138else {
139
140   $htmlpage .= "<p>Alert to recheck - Level:\n";
141   $htmlpage .= join ",\n",
142      " <a href='$cgi_script_name?check=all'>ALL</a><small>(" . scalar(@serviceproblems) . ')</small>',
143      map(" <a href='$cgi_script_name?check=" . lc(uri_encode($_)) . "'>$_</a>($service_status{$_})", sort keys %service_status);
144   $htmlpage .= ".\n";
145   $htmlpage .= " <br />\n";
146   $htmlpage .= " Service:\n";
147   $htmlpage .= join ",\n", map(" <a href='$cgi_script_name?check=" . lc(uri_encode($_)) . "'>$_</a><small>($service_name{$_})</small>", sort keys %service_name);
148   $htmlpage .= ".\n";
149   $htmlpage .= "</p>\n";
150
151   my $nagios_cmd;
152   open $nagios_cmd, '>>', $config->{'nagios-server'}{'nagios-cmd'} or die "Can't open file filename: $!";
153
154   my %remote_sshdown = ();
155   my %remote_db      = ();
156   my $remote_flag;
157
158   my $current_host  = '';
159   $htmlpage .= "<table border=\"1\">\n";
160   SERVICE_PROBLEMS:
161   for my $srv (@serviceproblems) {
162      my $hostname = $srv->host_name;
163      my $service  = $srv->service_description;
164      my $status   = $srv->status;
165      my $downtime = downtime($srv->last_state_change);
166      my $output   = HTML::Entities::encode($srv->plugin_output) =~ s/^[A-Z_\s]+?[:-]//r;
167
168      my $color = $status eq 'CRITICAL' ? '#F88888' : '#FFFF00';
169      $color = alertcolor($color, $downtime);
170      $htmlpage .= " <tr style='background:$color;'>\n";
171      if ($hostname ne $current_host) {
172         $current_host = $hostname;
173         $htmlpage .= "  <td rowspan='$hostcount{$hostname}' style='vertical-align:middle;'><a href=\"$config->{'nagios-server'}{'status-cgi'}?host=$hostname\">$hostname</a></td>\n";
174         }
175
176      my $bold;
177      ACTION_STYLE:
178      for my $act_name (keys %{$config->{'remote-action'}}) {
179         my $act_regex = $config->{'remote-action'}{$act_name}{'regex'};
180         $bold++ if $service =~ m/$act_regex/ and $config->{'remote-action'}{$act_name}{'style'} eq 'bold';
181         }
182      $htmlpage .= $bold ? '  <td class="bold">' : '  <td>';
183      $htmlpage .= "$service</td>\n";
184
185      $htmlpage .= "  <td>$status</td>\n";
186      $htmlpage .= "  <td style='max-width:60%;'><small>$output";
187
188      if (($cgi_check =~ m/all/i)
189            or ($cgi_check =~ m/^$service$/i)
190            or ($cgi_check =~ m/critical/i and $status eq 'CRITICAL')
191            or ($cgi_check =~ m/warning/i  and $status eq 'WARNING')
192            or ($cgi_check =~ m/pending/i  and $status eq 'PENDING')
193            ) {
194         $now++;
195         my $interval = $srv->next_check() - $srv->last_check() || 300;
196         $interval =  240 if $interval <  240;
197         $interval = 3000 if $interval > 3000;
198         my $future = $now + 20 + int(rand($interval - 20)); # 5 * 60 = 300
199
200         $htmlpage .= " -- <b>CHECK</b> [$now/" . ($future - $now) . "]";
201         printf $nagios_cmd "[%lu] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%lu\n", $now, $hostname, $service, $now;
202         # delay future command
203         push @futurecheck, sprintf "[%lu] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%lu", $future, $hostname, $service, $future;
204         }
205
206      ACTION_PUSH_AND_DEPEND:
207      for my $act_name (keys %{$config->{'remote-action'}}) {
208         my $act_regex  = $config->{'remote-action'}{$act_name}{'regex'};
209         my $act_status = $config->{'remote-action'}{$act_name}{'status'} || 'ALL';
210         my $act_depend = $config->{'remote-action'}{$act_name}{'depend'} || 'SSH';
211
212         if ($service =~ m/$act_regex/ and ($act_status eq 'ALL' or $status =~ m/$act_status/)) {
213            $remote_db{$act_name} ||= [];
214            push @{$remote_db{$act_name}}, $hostname;
215            $remote_flag++;
216            }
217
218         # check depend service otherwise
219         $remote_sshdown{$act_depend} ||= {};
220         $remote_sshdown{$act_depend}->{$hostname}++ if $service =~ m/$act_depend/;
221         }
222
223      $htmlpage .= "</small></td>\n";
224      $htmlpage .= "  <td style='text-align:right;'>$downtime days</td>\n";
225      $htmlpage .= " </tr>\n";
226      }
227
228   $htmlpage .= "</table>\n";
229   close $nagios_cmd;
230
231   # host down
232   if (%hostdown) {
233      $htmlpage .= "<br />\n";
234      $htmlpage .= "<table border='1'>\n";
235      HOST_DOWN:
236      for my $host (sort keys %hostdown) {
237         my $host_stat = $hostdown{$host};
238         my $hostname = $host_stat->host_name;
239         my $downtime = downtime($host_stat->last_state_change);
240         my $color = alertcolor('#F88888', $downtime);
241         $htmlpage .= " <tr style='background:$color'>\n";
242         $htmlpage .= "  <td><a href=\"$config->{'nagios-server'}{'status-cgi'}?host=$hostname\">$hostname</a></td>\n";
243         my @host_service;
244         for my $srv ($log->list_services_on_host($host)) {
245            push @host_service, $log->service($host, $srv)->service_description;
246            }
247         $htmlpage .= "  <td><small>" . join(', ', @host_service) . "</small></td>\n";
248         $htmlpage .= "  <td style='text-align:right;'>$downtime days</td>\n";
249         $htmlpage .= " </tr>\n";
250         }
251      $htmlpage .= "</table>\n";
252      }
253
254   # remote action
255   if ($remote_flag) {
256      require Nagios::Object::Config;
257      my $parser = Nagios::Object::Config->new();
258      $parser->parse("/var/cache/nagios3/objects.cache");
259
260      $htmlpage .= "<div class='action'>\n";
261      REMOTE_ACTION:
262      for my $act_name (keys %remote_db) {
263         my $act_depend = $config->{'remote-action'}{$act_name}{'depend'} || 'SSH';
264
265         my @action = grep !exists $remote_sshdown{$act_depend}->{$_}, @{$remote_db{$act_name}};
266         if (@action) {
267            my $srv_title = $config->{'remote-action'}{$act_name}{'title'} || "Action: $act_name";
268            $htmlpage .= "<h2>$srv_title</h2>\n";
269            $htmlpage .= "<pre>\n";
270            my $remote_action = $config->{'remote-action'}{$act_name}{'command'};
271            $remote_action = $config->{'remote-action'}{$act_name}{'command-one'}
272               if @action == 1 and exists $config->{'remote-action'}{$act_name}{'command-one'};
273            my @hosts;
274            for my $host (@action) {
275               my $object = $parser->find_object("$host", "Nagios::Host");
276               push @hosts, hostmapping($object->address =~ s/\..*$//r);
277               }
278            my $hosts_list = join ' ', @hosts;
279            $htmlpage .= ' ' . $remote_action =~ s{\%m}{$hosts_list}r;
280            $htmlpage .= "</pre>\n";
281            }
282         }
283      $htmlpage .= "</div>\n";
284      }
285   }
286
287$htmlpage .= <<"ENDH";
288<hr clear="all">
289<div class="footer">
290 <b><a href="http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/NagiosVelvice">Velvice</a>
291   - version: $VERSION</b>
292   (<a href="http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.html">online manual</a>)
293   - Written by Gabriel Moreau
294 <ul>
295  <li>Licence GNU GPL version 2 or later and Perl equivalent</li>
296  <li>Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France</li>
297 </ul>
298</div>
299</body>
300</html>
301ENDH
302
303print $htmlpage;
304
305# delayed future check
306if (@futurecheck) {
307   sleep 2;
308   my $nagios_cmd;
309   open $nagios_cmd, '>>', $config->{'nagios-server'}{'nagios-cmd'} or die "Can't open file filename: $!";
310   print $nagios_cmd "$_\n" for @futurecheck;
311   close $nagios_cmd;
312   }
313
314__END__
315
316
317=head1 NAME
318
319velvice.cgi - nagios velvice alert panel
320
321=head1 USAGE
322
323 velvice.cgi
324 velvice.cgi?check=XXX
325
326
327=head1 DESCRIPTION
328
329=begin html
330
331<img width="700" alt="Nagios Velvice Alert Panel" title="Nagios Velvice Alert Panel" style="float:right" src="velvice.png" />
332
333=end html
334
335Nagios VELVICE is an acronym for "Nagios leVEL serVICE status".
336
337The Nagios web page is sometimes very graphically charged
338and does not necessarily contain the information you need at a glance.
339For example, it is quite complicated to restart controls on multiple hosts in one click.
340
341For example, a server that is down should take only one line and not one per service...
342Similarly, a service that has been down for 5 minutes or since yesterday
343has more weight than a service that has fallen for 15 days.
344
345With Velvice Panel, a broken down server takes only one line.
346Services that have been falling for a long time gradually lose their color and become pastel colors.
347
348With Velvice Panel, it is possible through a single click
349to redo a check of all services that are in the CRITICAL state.
350Similarly, it is possible to restart a check on all SSH services in breakdowns ...
351In order not to clog the Nagios server, checks are shifted by 2 seconds in time.
352
353There is also a link to the web page of the main Nagios server.
354For each computer, you have a direct link to its dedicated web page on this server.
355
356
357=head1 CONFIGURATION FILE SPECIFICATION
358
359The configuration file must be F</etc/nagios3/velvice.yml>.
360This is not a required file.
361The file is in YAML format because this is a human-readable text file style.
362Other formats could have been Plain XML, RDF, JSON... but they are much less readable.
363
364You can find in the software nagios-velvice an example of configuration:
365L<velvice.sample.yml|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.sample.yml>.
366This one is in fact the master reference specification!
367
368The main keys C<nagios-server> and C<color-downtime> have good default values.
369No secondary key is required...
370The Velvice script try hard to replace ~ by the good value automatically.
371
372 nagios-server:
373   status-file: /var/cache/nagios3/status.dat
374   nagios-cmd:  /var/lib/nagios3/rw/nagios.cmd
375   portal-url:  ~/nagios3/
376   status-cgi:  ~/cgi-bin/nagios3/status.cgi
377   stylesheets: ~/nagios3/stylesheets
378
379The background color of the faulty service line display remains stable with a bright color for at least 3 days.
380Then, it decreases and becomes pastel after 53 days with an intensity of 70% (100% is white and 0% is black).
381
382 color-downtime:
383   day-min:  3
384   day-max: 50
385   factor:   0.7
386
387With key C<host-mapping>,
388it's good to map C<localhost> to the real name of the computer (hostname).
389
390 host-mapping:
391   localhost:  srv-nagios
392   toto:       titi
393
394The only important key is C<remote-action>.
395You can affiliate as many subkeys as you want.
396Let's take an example:
397
398 remote-action:
399   oom-killer:
400     regex: ^OOM Killer
401     title:  OOM Killer
402     command:     tssh -c 'sudo rm /var/lib/nagios3/nagios_oom_killer.log' %m
403     command-one: ssh %m 'sudo rm /var/lib/nagios3/nagios_oom_killer.log'
404     depend: ^SSH
405     status: ALL
406     style: bold
407
408C<oom-killer> is just a key for your remote action.
409The regex is used to find which service has a problem...
410The title is use in the result web page (not mandatory - otherwise, it will be C<Action: oom-killer>).
411The C<command> is just written on this web page.
412You have the responsibility to copy / cut it on a terminal.
413For security reasons, the nagios server does not have the right to launch the command on the remote host.
414The wildcard C<%m> is replaced by the list of the host (separated by the space).
415Sometime, the command could be different if there is only one computer (just SSH and no parallel SSH).
416If your command is based on SSH,
417you can have an SSH action only if the remote SSH is running.
418So you can make the remote action depend on the SSH service through a regular expression of your choice.
419
420The last two keys.
421The C<status> key is for CRITICAL or WARNING (or ALL).
422The key C<style> is there to mark in bold the service in error on the web page.
423
424=head1 SEE ALSO
425
426yamllint(1), ysh(1), YAML, Nagios::StatusLog, Color::Calc
427
428In Debian GNU/Linux distribution, packages for C<yamllint> and C<ysh> are:
429
430=over
431
432=item * C<yamllint> - Linter for YAML files (Python)
433
434=item * C<libyaml-shell-perl> - YAML test shell (Perl)
435
436=back
437
438
439Own project ressources:
440
441=over
442
443=item * L<Web site|http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/NagiosVelvice>
444
445=item * L<Online Manual|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.html>
446
447=item * L<SVN repository|http://servforge.legi.grenoble-inp.fr/svn/soft-trokata/trunk/nagios-velvice>
448
449=back
450
451
452=head1 VERSION
453
454$Id: velvice.cgi 278 2018-07-15 10:18:37Z g7moreau $
455
456
457=head1 AUTHOR
458
459Written by Gabriel Moreau <Gabriel.Moreau(A)univ-grenoble-alpes.fr>, LEGI UMR 5519, CNRS, Grenoble - France
460
461
462=head1 LICENSE AND COPYRIGHT
463
464Licence GNU GPL version 2 or later and Perl equivalent
465
466Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
Note: See TracBrowser for help on using the repository browser.