Nagios sam-query Plugin

From GridPP Wiki
Jump to: navigation, search

check_lcg_same.pl

This nagios check script uses the lcg-sam-client tool (already installed on user interfaces and worker nodes) to query the SAM test database and get the latest test results.

#!/usr/bin/perl
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# check_lcg_same v0.1
# By c.a.j.brew@rl.ac.uk
#
# Thanks to mp@xmission.com who wrote the excellent 
# check_traceroute-pure_perl plugin, which this plugin was modeled after.
#
#############################################################################

use strict;
use Getopt::Long;

my $version = "v0.1";
my $opt_v;
my $opt_n=undef;
my $opt_help;
my $opt_s=undef;


my %ERRORS = ('UNKNOWN' , '-1',
              'OK' , '0',
              'WARNING', '1',
              'CRITICAL', '2');

# Set this to whatever you like, but make sure you don't hang Nagios
# for too long.
my $timeout = "30";

GetOptions 
	("v"   => \$opt_v,
	 "help"	=> \$opt_help,
	 "h"	=> \$opt_help,
	 "s=s"   => \$opt_s,
	 "n=s"   => \$opt_n
	);

unless ( $opt_n ) {
    $opt_n = `hostname -f`;
    chomp $opt_n;
}

if ($opt_v) {
	print "\nThis is check_lcg_same version $version\n";
	print "\n";
	print "Please report errors to c\.a\.j\.\@rl\.ac\.uk";
	print "\n\n";
	}

#subs	
sub print_help () {
	print "\n\nThis is check_lcg_same.pl. It is designed to send an alert\n";
	print "to Nagios if a LCG SAME reports a problem with a paticular service\n";
	print "on a host.\n\n";
	print "Usage:\n";
	print "\n";
	print "--help Display this help.\n";
	print "-v Display the version number of check_lcg_same.\n";
	print "-n Host that you wish to check the service on.\n";
	print "-s Service on that host that you wish to query.\n";
	}

sub usage() {
	print "check_lcg_same -n <host> -s <service>\n";
	exit(-1);
	}

sub do_check() {
    $ENV{'HOME'} = "/tmp";
    my $command = "/opt/lcg/same/client/bin/same-query";
    my $command_args = "servicestatus nodename=$opt_n serviceabbr=$opt_s voname=ops servicestatusvo=OPS";
    my $qry_result = `$command $command_args 2>&1`;
    print "$command $command_args"." $?\n" if ($opt_v);
    print OUT "$qry_result\n" if ($opt_v);
    for ($qry_result) {
	/^ok/ && do { print "OK: SAM reports $opt_s on $opt_n is ok\n"; return $ERRORS{"OK"}; };
	/^na/ && do { print "Warning: SAM reports $opt_s on $opt_n is na\n"; return $ERRORS{"WARNING"}; };
	/^down/ && do { print "Critical: SAM reports $opt_s on $opt_n is down\n"; return $ERRORS{"CRITICAL"}; };
	print "ERROR: Do not understand response from SAME query\n";
	return $ERRORS{"UNKNOWN"};
    }
    return $ERRORS{"UNKNOWN"};
}

# Must be placed at the end for -Wall to compile cleanly, blech
if ($opt_help) {
	print_help();
	}
usage() unless ($opt_s);
#timeouts
$SIG{'ALRM'} = sub {
    print ("ERROR: No response from SAME SERVER (timeout) in $timeout seconds\n");
    exit $ERRORS{"UNKNOWN"};
};
alarm($timeout);
exit do_check();

Chris brew 09:58, 8 Feb 2007 (GMT)