#!/opt/vdops/bin/perl # This script queries Nokia IP boxes for hardware alarms and produces a report # V Who When What # --------------------------------------------------------------------------- # 1.4.0 skendric 2011-02-21 Upgrade to Netops 1.4.0 # 1.3.4 skendric 2010-12-30 Add @insane to report # 1.3.3 skendric 2010-12-17 Futz with owner/owner_backup # 1.3.2 skendric 2010-02-05 Upgrade to perl 5.10.1 # 1.3.1 skendric 2009-04-19 Distinguish between silent and unresponsive # 1.3.0 skendric 04-17-2009 Track disk full and VRRP # 1.2.1 skendric 03-20-2009 Add @down_for_maintenance # 1.2.0 skendric 03-24-2008 Support IP560 # 1.1.4 skendric 12-07-2007 Add owner # 1.1.3 skendric 03-21-2007 Stylistic mods # 1.1.2 skendric 11-19-2006 Replace Object Values with OIDs # 1.1.1 skendric 11-05-2005 Upgrade to new WI::VDOPS module structure # 1.1.0 skendric 05-09-2005 Support Netops.pm-1.2 # 1.0.1 skendric 02-24-2005 Add detail to alarm notification e-mail # 1.0.0 skendric 11-16-2004 First Version # # # Author: Stuart Kendrick, sbk {put at sign here} skendric {put dot here} com # # Source: http://www.skendric.com/device # # This software is available under the GNU GENERAL PUBLIC LICENSE, see # http://www.fsf.org/licenses/gpl.html # # # This script takes the following approach: # -Parses the hosts table for a list of targets (or accepts a command- # line list) # -Queries a bunch of HOST-RESOURCES and NOKIA-IPSO-SYSTEM variables # -Produces a report # # # Requirements: # -The target(s) must be pingable # # -The following MIB modules stashed in /opt/vdops/share/snmp/mibs, # or wherever it is that you store MIB modules: # NOKIA-IPSO-SYSTEM-MIB.txt # # -PERL modules: the WI::Netops collection # # # Assumptions: # # # Tested on: # -perl-5.12.2 # -net-snmp-5.6 # # # Instructions: # -Customize the script for your site: find the 'user-configurable # variables' section and modify as appropriate # -Try it out # # # Caveats: # # # Known Bugs: # # # To do: # -Add support for SNMPv3 # # Begin script # Load modules use strict; use warnings; use feature 'say'; use feature 'switch'; use Carp qw(carp cluck croak confess); use Data::Dumper; use English qw( -no_match_vars ); use Getopt::Std; use List::MoreUtils qw(any); use WI::Netops::HostTools 1.0.4; use WI::Netops::NetopsTools 2.2.3; use WI::Netops::NetopsData 1.4.0; use WI::Netops::PingTools 1.1.7; use WI::Netops::SNMPTools 1.5.3; use WI::Netops::Utilities 1.4.4; # Declare global variables my %disk_status; # Status of disk my %ipsoCardOperStatus; # Status of cards my %ipsoChassisTemperature; # Status of chassis temperature my %ipsoFanOperStatus; # Status of fans my %ipsoPowerSupplyOperStatus; # Status of power supplies my %ipsoPowerSupplyOverTemperature; # Temperature of power supplies my $max_cards; # Max number of cards report supports my $max_fans; # Max number of fans report supports my $max_ps; # Max number of ps report supports my %num_cards; # Number of cards my %num_fans; # Number of fans my %num_ps; # Number power supplies my %thermometer_status; # Status of hottest thermometer my $storage_threshold; # Threshold of disk space utilization above # which we will whine my %vrrp_status; # Health of VRRP configuration # Define global variables $program_name = 'nokia-alarm'; $usage = 'Usage: nokia-alarm -s {yes|no} [-d {integer}] [-r] [-a | -e {expr} | -f {filename} | target1 target2 target3 ...]'; $version = '1.3.4'; # Storage threshold $storage_threshold = 90; # If the disk is more than 90% full, whine # Nokia specifics $max_cards = 5; $max_fans = 5; $max_ps = 2; # Grab arguments getopts('ad:e:f:rs:', \%option); @target = @ARGV; # Set mode if ($option{r}) { $mode = 'report' } elsif (-t STDIN) { $mode = 'interactive' } else { $mode = 'batch' } ### Begin Main Program ############################################### { check_args(); # Check arguments read_config(); # Read Netops config file compile_mibs(); # Compile MIB files build_target(); # Populate @target target_check(); # Look for errors in @target basic_info(); # Gather information info_before(); # Gather more information sanity_check(); # Check for error conditions do_the_work(); # Do it identify_alarms(); # Count devices with alarms write_alarm_log(); # Record issues print_report(); # Print report notify_staff(); # Mail report } ##### End Main Program ################################################# ######################################################################## # Query variables ######################################################################## sub do_the_work { # Debug trace trace_location('begin') if $debug; # Notify operator print_it('Querying targets...'); unless ($dome) { sleep $short; return 1; } # Loop through the list of targets for my $target (@target) { say "Processing $target" if $debug; acquire_card_status($target); acquire_disk_status($target); acquire_fan_status($target); acquire_power_status($target); acquire_temperature_status($target); acquire_vrrp_status($target); # Entertain operator print $BANG if $mode eq 'interactive'; } # Debug info if ($debug > 2) { for my $target (@target) { if (defined $alarm_count{$target} and $alarm_count{$target} > 0) { say "alarm_count{$target} = $alarm_count{$target}"; } } } # Make things look pretty say "\n" if $mode eq 'interactive'; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Acquire card status ######################################################################## sub acquire_card_status { my $host = shift; my $status; # Hacked version of $val my $val; # Result of snmpGet # Debug trace trace_location('begin') if $debug; # Sanity check confess 'Must provide a host' unless defined $host; # Walk cards for (my $k = 1; $k <= $num_cards{$host}; $k++) { $val = snmpGet({host => $host, oid => "ipsoCardOperStatus.$k"}); $val //= $QUERY; # Hack given ($val) { when ('enabled') { $status = 'ok' } when ('disabled') { $status = 'off' } default { $status = '?' } } # Increment counter $alarm_count{$host}++ unless $status eq 'ok'; # Save result push @{$ipsoCardOperStatus{$host}}, $status; } # Fill out values with dashes for (my $k = 0; $k < $max_cards; $k++) { unless (defined $ipsoCardOperStatus{$host}[$k]) { $ipsoCardOperStatus{$host}[$k] = $DASH; } } # Debug info say "$host card status @{$ipsoCardOperStatus{$host}}" if $debug > 1; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Acquire disk status ######################################################################## sub acquire_disk_status { my $host = shift; my $status; # Hacked version of $val my $val; # Result of snmpGet # Debug trace trace_location('begin') if $debug; # Get disk status say "Getting hrDeviceStatus.3" if $debug > 3; $val = snmpGet({host => $host, oid => 'hrDeviceStatus.3'} ); $val //= $QUERY; # Hack given ($val) { when ('unknown') { $status = 'unk' } when ('running') { $status = 'ok' } when ('warning') { $status = 'warn' } when ('testing') { $status = 'test' } when ('down') { $status = 'down' } default { $status = '?' } } # Increment counter $alarm_count{$host}++ unless $status eq 'ok'; # If disk status is ok, check space if ($status eq 'ok') { say 'Getting CHECKPOINT-MIB::diskPercent.0' if $debug > 3; $val = snmpGet( {host => $host, oid => '.1.3.6.1.4.1.2620.1.6.7.3.3.0'} ); $val //= $QUERY; if ($val > $storage_threshold) { $alarm_count{$host}++; $status = 'full'; } } # Save result $disk_status{$host} = $status; # Debug info say "$host disk status = $disk_status{$host}" if $debug > 1; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Acquire fan status ######################################################################## sub acquire_fan_status { my $host = shift; my $status; # Hacked version of $val my $val; # Result of snmpGet # Debug trace trace_location('begin') if $debug; # Sanity check confess 'Must provide a host' unless defined $host; # Walk fans for (my $k = 1; $k <= $num_fans{$host}; $k++) { $val = snmpGet( {host => $host, oid => "ipsoFanOperStatus.$k"} ); $val //= $QUERY; # Hack given ($val) { when ('running') { $status = 'ok' } when ('notRunning') { $status = 'off' } when ('notAvailable') { $status = 'na' } default { $status = '?' } } # Increment counter $alarm_count{$host}++ unless $status eq 'ok'; # Save result push @{$ipsoFanOperStatus{$host}}, $status; } # Fill out values with dashes for (my $k = 0; $k < $max_fans; $k++) { unless (defined $ipsoFanOperStatus{$host}[$k]) { $ipsoFanOperStatus{$host}[$k] = $DASH; } } # Debug info say "$host fan status @{$ipsoFanOperStatus{$host}}" if $debug > 1; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Acquire power supply information ######################################################################## sub acquire_power_status { my $host = shift; my $status; # Hacked version of $val my $val; # Result of snmpGet # Debug trace trace_location('begin') if $debug; # Sanity check confess 'Must provide a host' unless defined $host; # Walk power supplies looking for status for (my $k = 1; $k <= $num_ps{$host}; $k++) { say "Getting ipsoPowerSupplyOperStatus.$k" if $debug > 3; $val = snmpGet( {host => $host, oid => "ipsoPowerSupplyOperStatus.$k"} ); $val //= $QUERY; # Hack given ($val) { when ('running') { $status = 'ok' } when ('notRunning') { $status = 'off' } when ('notAvailable') { $status = 'na' } default { $status = '?' } } # Increment counter $alarm_count{$host}++ unless $status eq 'ok'; # Save result push @{$ipsoPowerSupplyOperStatus{$host}}, $status; } # Fill out values with dashes for (my $k = 0; $k < $max_ps; $k++) { unless (defined $ipsoPowerSupplyOperStatus{$host}[$k]) { $ipsoPowerSupplyOperStatus{$host}[$k] = $DASH; } } # Debug info say "$host power supply status = @{$ipsoPowerSupplyOperStatus{$host}}" if $debug > 1; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Acquire temperature information ######################################################################## sub acquire_temperature_status { my $host = shift; my $status; # Hacked version of $val my @temperatures; # List of all 'status' numbers my $val; # Result of snmpGet my $worst; # Status of hottest thermometer # Debug trace trace_location('begin') if $debug; # Sanity check confess 'Must provide a host' unless defined $host; # Walk thermometers looking for temperature alarms for (my $k = 1; $k <= $num_ps{$host}; $k++) { $val = snmpGet({host => $host, oid => "ipsoPowerSupplyOverTemperature.$k"}); $val //= $QUERY; # Hack given ($val) { when ('normal') { $status = 'ok' } when ('overTemperature') { $status = 'hot' } when ('notAvailable') { $status = 'na' } default { $status = '?' } } # Increment counter $alarm_count{$host}++ unless $status eq 'ok'; # Save result push @{$ipsoPowerSupplyOverTemperature{$host}}, $status; push @temperatures, $status; } # Consult chassis temperature $val = snmpGet( {host => $host, oid => 'ipsoChassisTemperature.0'} ); $val //= $QUERY; # Hack given ($val) { when ('normal') { $status = 'ok' } when ('overTemperature') { $status = 'hot' } default { $status = '?' } } $ipsoChassisTemperature{$host} = $status; push @temperatures, $status; # Record the worst temperature if (any {$_ eq 'hot'} @temperatures) { $worst = 'hot'; } elsif (any {$_ eq '?'} @temperatures) { $worst = $QUERY; } elsif (any {$_ eq 'na'} @temperatures) { $worst = 'na'; } elsif (any {$_ eq 'ok'} @temperatures) { $worst = 'ok'; } $thermometer_status{$host} = $worst; # Fill out values with dashes for (my $k = 1; $k <= $max_ps; $k++) { unless (defined $ipsoPowerSupplyOverTemperature{$host}[$k]) { $ipsoPowerSupplyOverTemperature{$host}[$k] = $DASH; } } # Debug info if ($debug > 1) { say "$host temperature:"; say " power supply temp status @{$ipsoPowerSupplyOverTemperature{$host}}"; say " chassis temp status = $ipsoChassisTemperature{$host}"; say " worst thermometer status = $thermometer_status{$host}"; } # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Acquire VRRP status ######################################################################## sub acquire_vrrp_status { my $host = shift; my $status; # Worst status my $vb; # Varbind from snmp walk # Debug trace trace_location('begin') if $debug; # Acquire VRRP operational status $vb = snmpWalk( {host => $host, oid => 'vrrpOperAdminState'} ); # Walk varbind, extracting worst status OPER: for my $varbind (@$vb) { my $val = $varbind->{val}; given ($val) { when ('up') { $status = 'ok'; } when ('down') { $status = 'down'; last OPER; } default { $status = $QUERY; last OPER; } } } # If VRRP is functioning, verify that all interfaces are configured to # preempt PREEMPT: if ($status eq 'up') { $vb = snmpWalk( {host => $host, oid => 'vrrpOperPreemptMode'} ); for my $varbind (@$vb) { my $val = $varbind->{val}; unless ($val eq 'true') { $alarm_count{$host}++; $status = 'not'; last PREEMPT; } } } # Save result $vrrp_status{$host} = $status; # Debug info say "$host VRRP status = $vrrp_status{$host}" if $debug > 1; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Gather information ... and perform more error checking ######################################################################## sub info_before { # Debug trace trace_location('begin') if $debug; # Notify operator print_it('Gathering more information...'); # Walk target, counting cards, fans, and power supplies for my $target (@target) { my ($card_ref, $fan_ref, $ps_ref); # Debug info say "Processing $target" if $debug > 1; # Count number of cards $card_ref = snmpWalk( {host => $target, oid => 'ipsoCardIndex'} ); $num_cards{$target} = @$card_ref; say " $target contains $num_cards{$target} cards" if $debug > 1; # Count number of fans $fan_ref = snmpWalk( {host=>$target, oid => 'ipsoFanIndex'} ); $num_fans{$target} = @$fan_ref; say " $target contains $num_fans{$target} fans" if $debug > 1; # Count number of power supplies $ps_ref = snmpWalk( {host => $target, oid => 'ipsoPowerSupplyIndex'} ); $num_ps{$target} = @$ps_ref; say " $target contains $num_ps{$target} power supplies" if $debug > 1; # Entertain operator print $BANG if $mode eq 'interactive'; } # Make things look pretty say "\n" if $mode eq 'interactive'; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Tell the operator what I discovered ######################################################################## sub print_report { my $handle; my $total = @target; my $now = get_now(); # If we are running in test mode, skip this routine unless ($dome) { print_it("Running in test mode, cannot print a meaningful report\n"); return 1; } # Debug trace trace_location('begin') if $debug; # Direct output to screen or to file if ($mode eq 'interactive') { $handle = *STDOUT; } else { open $handle, '>', $report_file or die "Cannot open $report_file: $!\n"; } print {$handle} <