#!/usr/bin/env perl
#
# 2014/05/15 Gabriel Moreau <Gabriel.Moreau@univ-grenoble-alpes.fr>
# 2017/06/22 Gabriel Moreau - big update
# 2018/06/25 Gabriel Moreau - make velvice generic
#
# velvice.cgi
# Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
#
# Need NagiosStatus http://exchange.nagios.org/directory/Addons/APIs/Perl/NagiosStatus-2Epm/details
# Possible command http://old.nagios.org/developerinfo/externalcommands/commandlist.php
#
# apt-get install perl-modules libnagios-object-perl libhtml-parser-perl liburi-encode-perl libcolor-calc-perl libyaml-syck-perl

use strict;
use warnings;
use version; our $VERSION = version->declare('0.8.0');

use CGI;
use HTML::Entities ();
use Nagios::StatusLog;
use URI::Encode qw(uri_encode uri_decode);
use Color::Calc ();
use YAML::Syck;

my $query           = CGI->new();
my $cgi_check       = uri_decode($query->param('check'));
my $cgi_script_name = $query->script_name();
my $cgi_path        = $cgi_script_name =~ s{/[^/]+\.cgi$}{}r;
undef $query;

my $config = {};
$config = YAML::Syck::LoadFile('/etc/nagios3/velvice.yml') if -e '/etc/nagios3/velvice.yml';
$config->{'nagios-server'}                ||= {};
$config->{'nagios-server'}{'status-file'} ||= '/var/cache/nagios3/status.dat';
$config->{'nagios-server'}{'nagios-cmd'}  ||= '/var/lib/nagios3/rw/nagios.cmd';
$config->{'nagios-server'}{'portal-url'}  ||= $cgi_path =~ s{/cgi-bin/}{/}r . '/';
$config->{'nagios-server'}{'status-cgi'}  ||= "$cgi_path/status.cgi";
$config->{'nagios-server'}{'stylesheets'} ||= $config->{'nagios-server'}{'portal-url'} =~ s{/?$}{/stylesheets}r;
$config->{'host-mapping'}                 ||= {};
$config->{'color-downtime'}               ||= {};
$config->{'color-downtime'}{'day-min'}    ||=  3;
$config->{'color-downtime'}{'day-max'}    ||= 50;
$config->{'color-downtime'}{'factor'}     ||=  0.7;
$config->{'remote-action'}                ||= {};

my $log = Nagios::StatusLog->new(
   Filename => $config->{'nagios-server'}{'status-file'},
   Version  => 3.0
   );

sub hostmapping {
   my $host = shift;

   return exists $config->{'host-mapping'}{$host} ? $config->{'host-mapping'}{$host} : $host;
   }

sub downtime {
   my ($time_change) = @_;

   my $now = time;
   return sprintf '%.1f', ($now - $time_change) / (60 * 3600);
   }

sub alertcolor {
   my ($color, $downtime) = @_;

   $downtime = $downtime - $config->{'color-downtime'}{'day-min'}; # same color first days
   $downtime = $config->{'color-downtime'}{'day-max'} if $downtime > $config->{'color-downtime'}{'day-max'}; # max 50 days for color
   $downtime =  0 if $downtime <  0;

   my $factor = ($downtime * $config->{'color-downtime'}{'factor'}) / $config->{'color-downtime'}{'day-max'};
   return Color::Calc::color_light_html($color, $factor);
   }

my %hostdown;
my @serviceproblems;
my %hostcount;
my @futurecheck;
HOST:
for my $host (sort $log->list_hosts()) {
   my $host_stat = $log->host($host);

   if ($host_stat->status eq 'DOWN') {TESTIF:{
      for my $srv ($log->list_services_on_host($host)) {
         last TESTIF if $log->service($host, $srv)->status eq 'OK' or $log->service($host, $srv)->status eq 'PENDING';
         }

      $hostdown{$host} = $host_stat;
      next HOST;
      }}

   for my $srv ($log->list_services_on_host($host)) {
      if ($log->service($host, $srv)->status ne 'OK') {
         push @serviceproblems, $log->service($host, $srv);
         $hostcount{$host}++;
         }
      }
   }

my $now = time;
my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime $now;
$year += 1900;
$mon++;
my $date = sprintf '%04i-%02i-%02i %02i:%02i', $year, $mon, $mday, $hour, $min;

my $htmlpage = <<"ENDH";
Content-Type: text/html

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
 <title>Nagios  Velvice</title>
 <link rel="stylesheet" type="text/css" href="$config->{'nagios-server'}{'stylesheets'}/velvice.css">
</head>
<body>
<div class="header">
 <h1>
  <ul>
    <li>Nagios Velvice Alert Panel : <a href="$config->{'nagios-server'}{'portal-url'}">Core Server</a></li>
    <li><small><a id="refresh" href="$cgi_script_name">$date</a></small></li>
  </ul>
 </h1>
</div>
ENDH

my %service_name   = ();
my %service_status = ();
for my $srv (@serviceproblems) {
   $service_name{$srv->service_description}++;
   $service_status{$srv->status}++;
   }

if (scalar @serviceproblems == 0) {
   $htmlpage .= "<p>No alert to recheck.</p>\n";
   }
else {

   $htmlpage .= "<p>Alert to recheck - Level:\n";
   $htmlpage .= join ",\n",
      " <a href='$cgi_script_name?check=all'>ALL</a><small>(" . scalar(@serviceproblems) . ')</small>',
      map(" <a href='$cgi_script_name?check=" . lc(uri_encode($_)) . "'>$_</a>($service_status{$_})", sort keys %service_status);
   $htmlpage .= ".\n";
   $htmlpage .= " <br />\n";
   $htmlpage .= " Service:\n";
   $htmlpage .= join ",\n",
      map(" <a href='$cgi_script_name?check=" . lc(uri_encode($_)) . "'>$_</a><small>($service_name{$_})</small>", sort keys %service_name);
   $htmlpage .= ".\n";
   $htmlpage .= "</p>\n";

   my $nagios_cmd;
   open $nagios_cmd, '>>', $config->{'nagios-server'}{'nagios-cmd'} or die "Can't open file filename: $!";

   my %remote_sshdown = ();
   my %remote_db      = ();
   my $remote_flag;

   my $current_host  = '';
   $htmlpage .= "<table border=\"1\">\n";
   SERVICE_PROBLEMS:
   for my $srv (@serviceproblems) {
      my $hostname = $srv->host_name;
      my $service  = $srv->service_description;
      my $status   = $srv->status;
      my $downtime = downtime($srv->last_state_change);
      my $output   = HTML::Entities::encode($srv->plugin_output) =~ s/^[A-Z_\s]+?[:-]//r;

      my $color = $status eq 'CRITICAL' ? '#F88888' : '#FFFF00';
      $color = alertcolor($color, $downtime);
      $htmlpage .= " <tr style='background:$color;'>\n";
      if ($hostname ne $current_host) {
         $current_host = $hostname;
         $htmlpage .= "  <td rowspan='$hostcount{$hostname}' style='vertical-align:middle;'>"
            . "<a href=\"$cgi_script_name?check=" . uri_encode($hostname) . '">&#8623;</a></td>' . "\n";
         $htmlpage .= "  <td rowspan='$hostcount{$hostname}' style='vertical-align:middle;'>"
            . "<a href=\"$config->{'nagios-server'}{'status-cgi'}?host=" . uri_encode($hostname) . "\">$hostname</a></td>\n";
         }

      my $bold;
      ACTION_STYLE:
      for my $act_name (keys %{$config->{'remote-action'}}) {
         my $act_regex = $config->{'remote-action'}{$act_name}{'regex'};
         $bold++ if $service =~ m/$act_regex/ and $config->{'remote-action'}{$act_name}{'style'} eq 'bold';
         }
      $htmlpage .= $bold ? '  <td class="bold">' : '  <td>';
      $htmlpage .= "$service</td>\n";

      $htmlpage .= "  <td>$status</td>\n";
      $htmlpage .= "  <td style='max-width:60%;'><small>$output";

      if (($cgi_check =~ m/all/i)
            or ($cgi_check =~ m/^$service$/i)
            or ($cgi_check =~ m/critical/i and $status eq 'CRITICAL')
            or ($cgi_check =~ m/warning/i  and $status eq 'WARNING')
            or ($cgi_check =~ m/pending/i  and $status eq 'PENDING')
            or ($cgi_check eq $hostname    and $status =~ m/^(CRITICAL|WARNING|PENDING)$/)
            ) {
         $now++;
         my $interval = $srv->next_check() - $srv->last_check() || 300;
         $interval =  240 if $interval <  240;
         $interval = 3000 if $interval > 3000;
         my $future = $now + 20 + int(rand($interval - 20)); # 5 * 60 = 300

         $htmlpage .= " -- <b>CHECK</b> [$now/" . ($future - $now) . "]";
         printf $nagios_cmd "[%lu] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%lu\n", $now, $hostname, $service, $now;
         # delay future command
         push @futurecheck, sprintf "[%lu] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%lu", $future, $hostname, $service, $future;
         }

      ACTION_PUSH_AND_DEPEND:
      for my $act_name (keys %{$config->{'remote-action'}}) {
         my $act_regex  = $config->{'remote-action'}{$act_name}{'regex'};
         my $act_status = $config->{'remote-action'}{$act_name}{'status'} || 'ALL';
         my $act_depend = $config->{'remote-action'}{$act_name}{'depend'} || 'SSH';

         if ($service =~ m/$act_regex/ and ($act_status eq 'ALL' or $status =~ m/$act_status/)) {
            $remote_db{$act_name} ||= [];
            push @{$remote_db{$act_name}}, $hostname;
            $remote_flag++;
            }

         # check depend service otherwise
         $remote_sshdown{$act_depend} ||= {};
         $remote_sshdown{$act_depend}->{$hostname}++ if $service =~ m/$act_depend/;
         }

      $htmlpage .= "</small></td>\n";
      $htmlpage .= "  <td style='text-align:right;'>$downtime days</td>\n";
      $htmlpage .= " </tr>\n";
      }

   $htmlpage .= "</table>\n";
   close $nagios_cmd;

   # host down
   if (%hostdown) {
      $htmlpage .= "<br />\n";
      $htmlpage .= "<table border='1'>\n";
      HOST_DOWN:
      for my $host (sort keys %hostdown) {
         my $host_stat = $hostdown{$host};
         my $hostname = $host_stat->host_name;
         my $downtime = downtime($host_stat->last_state_change);
         my $color = alertcolor('#F88888', $downtime);
         $htmlpage .= " <tr style='background:$color'>\n";
         $htmlpage .= "  <td><a href=\"$config->{'nagios-server'}{'status-cgi'}?host=$hostname\">$hostname</a></td>\n";
         my @host_service;
         for my $srv ($log->list_services_on_host($host)) {
            push @host_service, $log->service($host, $srv)->service_description;
            }
         $htmlpage .= "  <td><small>" . join(', ', @host_service) . "</small></td>\n";
         $htmlpage .= "  <td style='text-align:right;'>$downtime days</td>\n";
         $htmlpage .= " </tr>\n";
         }
      $htmlpage .= "</table>\n";
      }

   # remote action
   if ($remote_flag) {
      require Nagios::Object::Config;
      my $parser = Nagios::Object::Config->new();
      $parser->parse("/var/cache/nagios3/objects.cache");

      $htmlpage .= "<div class='action'>\n";
      REMOTE_ACTION:
      for my $act_name (keys %remote_db) {
         my $act_depend = $config->{'remote-action'}{$act_name}{'depend'} || 'SSH';

         my @action = grep !exists $remote_sshdown{$act_depend}->{$_}, @{$remote_db{$act_name}};
         if (@action) {
            my $srv_title = $config->{'remote-action'}{$act_name}{'title'} || "Action: $act_name";
            $htmlpage .= "<h2>$srv_title</h2>\n";
            $htmlpage .= "<pre>\n";
            my $remote_action = $config->{'remote-action'}{$act_name}{'command'};
            $remote_action = $config->{'remote-action'}{$act_name}{'command-one'}
               if @action == 1 and exists $config->{'remote-action'}{$act_name}{'command-one'};
            my @hosts;
            for my $host (@action) {
               my $object = $parser->find_object("$host", "Nagios::Host");
               push @hosts, hostmapping($object->address =~ s/\..*$//r);
               }
            my $hosts_list = join ' ', @hosts;
            $htmlpage .= ' ' . $remote_action =~ s{\%m}{$hosts_list}r;
            $htmlpage .= "</pre>\n";
            }
         }
      $htmlpage .= "</div>\n";
      }
   }

$htmlpage .= <<"ENDH";
<hr clear="all">
<div class="footer">
 <b><a href="http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/NagiosVelvice">Velvice</a>
   - version: $VERSION</b>
   (<a href="http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.html">online manual</a>)
   - Written by Gabriel Moreau
 <ul>
  <li>Licence GNU GPL version 2 or later and Perl equivalent</li>
  <li>Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France</li>
 </ul>
</div>
</body>
</html>
ENDH

print $htmlpage;

# delayed future check
if (@futurecheck) {
   sleep 2;
   my $nagios_cmd;
   open $nagios_cmd, '>>', $config->{'nagios-server'}{'nagios-cmd'} or die "Can't open file filename: $!";
   print $nagios_cmd "$_\n" for @futurecheck;
   close $nagios_cmd;
   }

__END__


=head1 NAME

velvice.cgi - nagios velvice alert panel

=head1 USAGE

 velvice.cgi
 velvice.cgi?check=XXX


=head1 DESCRIPTION

=begin html

<img width="700" alt="Nagios Velvice Alert Panel" title="Nagios Velvice Alert Panel" style="float:right" src="velvice.png" />

=end html

Nagios VELVICE is an acronym for "Nagios leVEL serVICE status".

The Nagios web page is sometimes very graphically charged
and does not necessarily contain the information you need at a glance.
For example, it is quite complicated to restart controls on multiple hosts in one click.

For example, a server that is down should take only one line and not one per service...
Similarly, a service that has been down for 5 minutes or since yesterday
has more weight than a service that has fallen for 15 days.

With Velvice Panel, a broken down server takes only one line.
Services that have been falling for a long time gradually lose their color and become pastel colors.

With Velvice Panel, it is possible through a single click
to redo a check of all services that are in the CRITICAL state.
Similarly, it is possible to restart a check on all SSH services in breakdowns ...
In order not to clog the Nagios server, checks are shifted by 2 seconds in time.

There is also a link to the web page of the main Nagios server.
For each computer, you have a direct link to its dedicated web page on this server.


=head1 CONFIGURATION FILE SPECIFICATION

The configuration file must be F</etc/nagios3/velvice.yml>.
This is not a required file.
The file is in YAML format because this is a human-readable text file style.
Other formats could have been Plain XML, RDF, JSON... but they are much less readable.

You can find in the software nagios-velvice an example of configuration:
L<velvice.sample.yml|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.sample.yml>.
This one is in fact the master reference specification!

The main keys C<nagios-server> and C<color-downtime> have good default values.
No secondary key is required...
The Velvice script try hard to replace ~ by the good value automatically.

 nagios-server:
   status-file: /var/cache/nagios3/status.dat
   nagios-cmd:  /var/lib/nagios3/rw/nagios.cmd
   portal-url:  ~/nagios3/
   status-cgi:  ~/cgi-bin/nagios3/status.cgi
   stylesheets: ~/nagios3/stylesheets

The background color of the faulty service line display remains stable with a bright color for at least 3 days.
Then, it decreases and becomes pastel after 53 days with an intensity of 70% (100% is white and 0% is black).

 color-downtime:
   day-min:  3
   day-max: 50
   factor:   0.7

With key C<host-mapping>,
it's good to map C<localhost> to the real name of the computer (hostname).

 host-mapping:
   localhost:  srv-nagios
   toto:       titi

The only important key is C<remote-action>.
You can affiliate as many subkeys as you want.
Let's take an example:

 remote-action:
   oom-killer:
     regex: ^OOM Killer
     title:  OOM Killer
     command:     tssh -c 'sudo rm /var/lib/nagios3/nagios_oom_killer.log' %m
     command-one: ssh %m 'sudo rm /var/lib/nagios3/nagios_oom_killer.log'
     depend: ^SSH
     status: ALL
     style: bold

C<oom-killer> is just a key for your remote action.
The regex is used to find which service has a problem...
The title is use in the result web page (not mandatory - otherwise, it will be C<Action: oom-killer>).
The C<command> is just written on this web page.
You have the responsibility to copy / cut it on a terminal.
For security reasons, the nagios server does not have the right to launch the command on the remote host.
The wildcard C<%m> is replaced by the list of the host (separated by the space).
Sometime, the command could be different if there is only one computer (just SSH and no parallel SSH).
If your command is based on SSH,
you can have an SSH action only if the remote SSH is running.
So you can make the remote action depend on the SSH service through a regular expression of your choice.

The last two keys.
The C<status> key is for CRITICAL or WARNING (or ALL).
The key C<style> is there to mark in bold the service in error on the web page.

=head1 SEE ALSO

yamllint(1), ysh(1), YAML, Nagios::StatusLog, Color::Calc

In Debian GNU/Linux distribution, packages for C<yamllint> and C<ysh> are:

=over

=item * C<yamllint> - Linter for YAML files (Python)

=item * C<libyaml-shell-perl> - YAML test shell (Perl)

=back


Own project ressources:

=over

=item * L<Web site|http://servforge.legi.grenoble-inp.fr/projects/soft-trokata/wiki/SoftWare/NagiosVelvice>

=item * L<Online Manual|http://servforge.legi.grenoble-inp.fr/pub/soft-trokata/nagios-velvice/velvice.html>

=item * L<SVN repository|http://servforge.legi.grenoble-inp.fr/svn/soft-trokata/trunk/nagios-velvice>

=back


=head1 VERSION

$Id: velvice.cgi 279 2018-07-15 10:38:51Z g7moreau $


=head1 AUTHOR

Written by Gabriel Moreau <Gabriel.Moreau(A)univ-grenoble-alpes.fr>, LEGI UMR 5519, CNRS, Grenoble - France


=head1 LICENSE AND COPYRIGHT

Licence GNU GPL version 2 or later and Perl equivalent

Copyright (C) 2014-2018, LEGI UMR 5519 / CNRS UGA G-INP, Grenoble, France
