Sophie

Sophie

distrib > Mageia > 3 > i586 > media > core-release-src > by-pkgid > d7663ff9e326418bf917f7f57b31b233 > files > 1

nagios-check_nfs4-0.2-5.mga3.src.rpm

#!/usr/bin/perl -w
#
# check_nfs4.pl
#
# Monitor NFSv4 servers (and clients)
#
# Usage:
#-------
# Use this plugin with NRPE:
#  - on the server config:
#       check_command check_nrpe!check_nfs4
#  - on the monitored host config:
#       command[check_nfs4]=/usr/local/bin/check_nfs4.pl [-s][-i]
#
# Performance data:
#------------------
# The performance data can be displayed in NagiosGraph with the following conf:
#    /perfdata:nfsd_cpu=(\d+)% nfsd_used_threads=(\d+)% io_read=(\d+)% io_write=(\d+)%/
#    and push @s, [ NFS4,
#               [ nfsd_cpu,             GAUGE, $1 ],
#               [ nfsd_used_threads,    GAUGE, $2 ],
#               [ io_read,              GAUGE, $3 ],
#               [ io_write,             GAUGE, $4 ] ];
#
#
# Released under GPL
# Copyright (c) 2005 Frederic Jolly
# Version 0.2
#

use strict;
use File::Basename;
use lib "/usr/local/nagios/libexec";
use utils qw($TIMEOUT %ERRORS &print_revision &support);
use vars qw($PROGNAME);

sub exit_error($$);
sub print_help();
sub print_usage();

#
# Configure here:
#----------------
#  - the timeout (in second) after which a mounts is considered as not responding
#  - the list of the logfiles you want to check. Be careful that theses logfiles must be readable by the nagios user
#  - the warning string you want to grep from the log files
my ($hang_timeout, @logfiles, $pblog);
$hang_timeout = 3;
@logfiles = ('/var/log/messages');
$pblog = ("(rpc.mountd: refused mount|NFSD: Failure|NFSD: error)");


$PROGNAME = 'check_nfs4';

my ($opt_V, $opt_H, $client, $opt_S, $opt_w, $verbose, $state, $status, $perfs);

$opt_V = $opt_H = $client = $opt_S = '';
$opt_w = 100;
$verbose = 0;
$state = 'OK';
$status = '';
$perfs = '|';

$ENV{'BASH_ENV'}='';
$ENV{'ENV'}='';
$ENV{'PATH'}='';
$ENV{'LC_ALL'}='C';

# Get the options
use Getopt::Long;
Getopt::Long::Configure('bundling');
GetOptions(
	   'V'  => \$opt_V,     'version'   => \$opt_V,
	   'h'  => \$opt_H,     'help'      => \$opt_H,
	   'i'  => \$client,    'client'    => \$client,
	   's'  => \$opt_S,     'sec'       => \$opt_S,
	   'v+'  => \$verbose,  'verbose+'  => \$verbose,
	   'w=s' => \$opt_w,    'warning=s' => \$opt_w,
	   );

# -h|--help displays help
if ($opt_H) {
    print_help();
    exit $ERRORS{'OK'};
}

# -V|--version displays version number
if ($opt_V) {
    print_revision($PROGNAME,'$Revision: 0.2 $ '); 
    exit $ERRORS{'OK'};
}


# Get CPU perfs
if (!$client) {
    my ($cpu, @nfsd);
    $cpu = 0;
    @nfsd = `/bin/ps --no-heading -C nfsd -o %cpu`;
    if ($? == -1) { exit_error('UNKNOWN', "cannot find the command: '/bin/ps' or unknown options\n"); }
    if (@nfsd) { map { $cpu += $_ } @nfsd; }
    $perfs = $perfs . "nfsd_cpu=$cpu% ";
    if ($verbose) {
	$status = $status . "nfsd cpu = $cpu% ; ";
    }
    if ($cpu >= $opt_w) {
	$state = 'WARNING';
    }
}


#check the number of effective processes nfsd used
if (!$client) {
    my (@th_nfsd, $nb_th, $percent_used_th);
    open(RPC_NFSD, '/proc/net/rpc/nfsd') ||
	exit_error('UNKNOWN', "cannot read the file: '/proc/net/rpc/nfsd'\n");
    while (my $line = <RPC_NFSD>) {
	if ($line =~ /^th/) { @th_nfsd = split(' ', $line); }
    }
    close RPC_NFSD;
    $nb_th = $th_nfsd[1];
    if ($verbose) {
	$status = $status . "nfsd threads = $nb_th ; ";
    }
    $percent_used_th = 0;
    splice(@th_nfsd, 0, 3);
    map { if ($_ != 0) { $percent_used_th++ }} @th_nfsd;
    $percent_used_th *= 10;
    $status = $status . "nfsd used threads <= $percent_used_th% ; ";
    $perfs = $perfs . "nfsd_used_threads=$percent_used_th%";
}


# Get tranfer rates
if (!$client) {
    my (@iostat, $total, $read, $write);
    $total = $read = $write = 0;
    open(IO_STAT, '/proc/net/rpc/nfsd') ||
	exit_error('UNKNOWN', "cannot read the file: '/proc/net/rpc/nfsd'\n");
    while (my $line = <IO_STAT>) {
	if ($line =~ /^proc3/) { #will be proc4 later
	    @iostat = split(' ', $line);
	}
    }
    shift(@iostat);
    close IO_STAT;
    if (@iostat) { map { $total += $_ } @iostat; }
    if ($total) {
	$read = int($iostat[7]*100/$total);
	$write = int($iostat[8]*100/$total);
    }
    $perfs = $perfs . " io_read=$read% io_write=$write%";
}


# Check if the daemons are running
my ($nfsd_d, $idmapd_d, $mountd_d, $svcgssd_d, $gssd_d, $process, $daelist);
$nfsd_d = $idmapd_d = $mountd_d = $svcgssd_d, $gssd_d = 0;
$daelist = '';
$process = `/bin/ps aux`;
if ($? == -1) { exit_error('UNKNOWN', "cannot find the command: '/bin/ps'\n"); }
if ($process =~ /nfsd\]/) { $nfsd_d = 1; }
if ($process =~ /rpc.idmapd/) { $idmapd_d = 1; }
if ($process =~ /rpc.mountd/) { $mountd_d = 1; }
if ($process =~ /rpc.svcgssd/) { $svcgssd_d = 1; }
if ($process =~ /rpc.gssd/) { $gssd_d = 1; }
if (!$idmapd_d) { $daelist = $daelist . ' idmapd'; } 
if (!$client) {
    if (!$nfsd_d) { $daelist = $daelist . ' nfsd'; }
    if (!$mountd_d) { $daelist = $daelist . ' mountd'; }
    if ($opt_S && !$svcgssd_d) { $daelist = $daelist . ' svcgssd'; }
} else {
    if ($opt_S && !$gssd_d) { $daelist = $daelist . ' gssd'; }
}
if ($daelist ne '') {
    $state = 'CRITICAL';
    if ($daelist =~ / [^ ]+ /) { $status = $status . "daemons$daelist are not running ; "; }
    else { $status = $status . "daemon$daelist is not running ; ";}
}

# Check rpc errors
my (@rpc, $rpc_file, $rpc_error);
$rpc_error = 0;
if ($client) { $rpc_file = '/proc/net/rpc/nfs'; }
else         { $rpc_file = '/proc/net/rpc/nfsd'; }
open(RPC_F, $rpc_file) ||
    exit_error('UNKNOWN', "cannot read the file: $rpc_file\n");
while (my $line = <RPC_F>) {
    if ($line =~ /^rpc/) { @rpc = split(' ', $line); }
}
shift(@rpc);
close RPC_F;
# DEBUG TO REMOVE
printf "$rpc[1].$rpc[2].$rpc[3]";

if ($client) {
    if ($rpc[1] != 0) { 
	$rpc_error = 1;
	if ($verbose) { $status = $status . "Client retrans = $rpc[1] ; "; }
    }
    if ($rpc[2] != 0) {
	$rpc_error = 1;
	if ($verbose) { $status = $status . "Client authrefrsh = $rpc[2] ; "; }
    }
} else {
    if ($rpc[1] != 0) {
	$rpc_error = 1;
	if ($verbose) { $status = $status . "Server badcalls = $rpc[1] ; "; }
    }
    if ($rpc[2] != 0) { 
	$rpc_error = 1;
	if ($verbose) { $status = $status . "Server badauth = $rpc[2] ; "; }
    }
    if ($rpc[3] != 0) {
	$rpc_error = 1;
	if ($verbose) { $status = $status . "Server badclnt = $rpc[3] ; "; }
    }
}
if ($rpc_error) { 
    if (!$verbose) { $status = $status . "RPC errors ; "; }
}


# Detect if a mount points hangs
my (@mounts, @nfs4_mounts);
@mounts = `/bin/mount`;
if ($? == -1) { exit_error('UNKNOWN', "cannot find the command '/bin/mount'\n"); }
@nfs4_mounts = grep { /type nfs4/ ? s/.*on ([^ ]*) type nfs4.*/$1/ : () } @mounts;
chomp @nfs4_mounts;
foreach my $dir (@nfs4_mounts) {
    eval {
        local $SIG{ALRM} = sub { die 'TIMEOUT\n' };
        alarm $hang_timeout;
	opendir(DIR,$dir);
	readdir(DIR);
	closedir(DIR);
        alarm 0;
    };
    if ($@) {
	$state = 'CRITICAL';
	$status = $status . "; The mounted directory $dir is not responding ; ";
    }
}


# Check the logfiles
# (an offset is used and stored to prevent from returning the same error twice)
foreach my $logfile (@logfiles) {
    if (! -f $logfile) { exit_error('UNKNOWN', "cannot find file $logfile\n"); }
    my ($log_error, $offsetfile, $inode, $offset, $ino, $size);
    $log_error = 0;
    $offsetfile = '/tmp/.' . basename($logfile) . '.offset';
    $inode = $offset = 0;
    unless (open(LOGFILE, $logfile)) { exit_error('UNKNOWN', "file $logfile can not be read\n"); }
    if (open(OFFSET, $offsetfile)) {
        $_ = <OFFSET>;
	chomp $_;
	$inode = $_;
	$_ = <OFFSET>;
	chomp $_;
	$offset = $_;
	close OFFSET;
    }
    unless ((undef,$ino,undef,undef,undef,undef,undef,$size) = stat $logfile) {
        exit_error('UNKNOWN', "cannot get $logfile file size\n");
    }
    if ($inode != $ino || $offset > $size) { $offset = 0; }
    seek(LOGFILE, $offset, 0);
    while (my $logline = <LOGFILE>) {
	chomp $logline;
	if ($logline =~ /$pblog/) {
	    $log_error = 1;
	    if ($verbose) { $status = $status . "\nLOG error: $logline ; "; }
	}
    }
    if ($log_error) {
	unless ($state eq 'CRITICAL') { $state = 'WARNING'; }
	if (!$verbose) { $status = $status . "LOG errors ; "; }
    }
    $size = tell LOGFILE;
    close LOGFILE;
    unless (open(OFFSET, ">$offsetfile")) { exit_error('UNKNOWN', "file $offsetfile cannot be created\n"); }
    print OFFSET "$ino\n$size\n";
    close OFFSET;
}


exit_error($state, $status);

#
# subroutines
#
sub exit_error ($$) {
    my $the_state = shift;
    my $the_line = shift;
    chomp $the_line;
    if ($the_line =~ / $/) { chop($the_line); }
    if ($the_line =~ /;$/) { chop($the_line); }
    print "$the_state: $the_line$perfs \n";
    exit $ERRORS{$the_state};
}

sub print_help () {
    print_revision($PROGNAME, '$Revision: 0.2 $ ');
    print "Copyright (c) 2005 Fr��ic Jolly\n\n";
    print "NFSv4 plugin for Nagios\n";
    print_usage();
    print "\n";
    print "   [-v]                    Verbose\n";
    print "   [-i | --client]         Monitor an NFSv4 client\n";
    print "   [-s | --sec]            Check also security features\n";
    print "   [-w=xx | --warning=xx]  Percentage of CPU consumed by nfsd above which a WARNING will result\n";
    print "\n";
    print "$PROGNAME monitors on an NFSv4 server (or on an NFSv4 client) the following NFSv4 features:\n";
    print "  - check if the daemons (server-side, client-side, security) are running\n";
    print "  - check if there are rpc errors\n";
    print "  - detect if a mount point hangs\n";
    print "  - check the percentage of CPU consumed by nfsd\n";
    print "  - check the number of effective used nfsd processes\n";
    print "  - grep some problematic strings from logs\n";
    print "\n";
    print "$PROGNAME returns also performance data:\n";
    print "  - the percentage of CPU consumed by nfsd\n";
    print "  - the number of effective used nfsd processes\n";
    print "  - the transfer rates\n";
    print "\n";
    support();
}

sub print_usage () {
    	print "Usage: \n";
	print " $PROGNAME [-v] [-i | --client] [-s | --sec] [-w=xx | --warning=xx]\n";
	print " $PROGNAME [-h | --help]\n";
	print " $PROGNAME [-V | --version]\n";
}