#!/opt/vdops/bin/perl # This script queries switch power parameters, compares then to internally # defined thresholds, and produces a report # V Who When What # --------------------------------------------------------------------------- # 1.3.0 skendric 2011-03-31 Use cefcTotalxxx rather than cefcFRUTotalxxx # This supports gear which doesn't do PoE # 1.2.0 skendric 2011-02-21 Upgrade to Netops 1.4.0 # 1.1.3 skendric 2010-12-30 Add @insane to report # 1.1.2 skendric 2010-12-17 Futz with owner/owner_backup # 1.1.1 skendric 2010-10-06 Handle bug in Cat37xx in which mgmt agent # claims the power supply is off # 1.1.0 skendric 2010-02-07 Upgrade to perl 5.10.1 # 1.0.4 skendric 2009-03-20 Add @down_for_maintenance # 1.0.3 skendric 2008-01-14 Ignore utilization over 100% # 1.0.2 skendric 2007-12-07 Add owner # 1.0.1 skendric 2007-03-19 Add RFC3761 support # 1.0.0 skendric 2007-03-09 First Version # # # Author: Stuart Kendrick, sbk {put at sign here} skendric {put dot here} com # # Source: http://www.skendric.com/device # # This software is available under the GNU GENERAL PUBLIC LICENSE, see # http://www.fsf.org/licenses/gpl.html # # # This script takes the following approach: # -Parses the hosts table for a list of targets (or accepts a command- # line list) # -Queries a bunch of CISCO-ENTITY-FRU-CONTROL-MIB specific variables # -Produces a report # # # Requirements: # -The target(s) must be pingable # # -The following MIB modules stashed in /opt/vdops/share/snmp/mibs, # or wherever it is that you store MIB modules: # CISCO-PRODUCTS-MIB.my # # -PERL modules: the FHCRC::Netops collection # # # Assumptions: # # # Tested on: # -perl-5.12.2 # -net-snmp-5.6 # # # Instructions: # -Customize the script for your site: find the 'user-configurable # variables' section and modify as appropriate # -Try it out # # # Caveats: # # # Known Bugs: # # # To do: # -Add support for SNMPv3 # # Begin script # Load modules use strict; use warnings; use feature 'say'; use feature 'switch'; use Carp qw(carp cluck croak confess); use Data::Dumper; use English qw( -no_match_vars ); use Getopt::Std; use POSIX qw(ceil); use FHCRC::Netops::CiscoTools 1.4.3; use FHCRC::Netops::HostTools 1.0.4; use FHCRC::Netops::NetopsTools 2.2.3; use FHCRC::Netops::NetopsData 1.4.0; use FHCRC::Netops::PingTools 1.1.7; use FHCRC::Netops::SNMPTools 1.5.3; use FHCRC::Netops::Utilities 1.4.4; # Declare global variables. All hashes keyed by target my %amp_factor; # Flavor of amps, derived from cefcPowerUnits my %drawn_inline_power; # cefcFRUDrawnInlineCurrent my %drawn_system_power; # cefcTotalDrawnCurrent my %iid; # iid of cefcPowerRedundancyMode my %inline_percent; # drawn_inline_power/total_inline_power my $inline_threshold; # threshold at which we'll whine my %poe_mib; # Identifies which POE-related MIB this target # supports: 'rfc3621' or 'cisco' (for # CISCO-ENTITY-FRU-CONTROL-MIB) my %ps_model; # Hash of arrays of power supply model names my %redundancy_mode; # cefcPowerRedundancyMode my %system_percent; # drawn_system_power/total_system_power my %total_inline_power; # cefcFRUTotalInlineCurrent my %total_system_power; # cefcTotalAvailableCurrent my $system_threshold; # threshold at which we'll whine my %volts; # Volts, derived from cefcPowerUnits # Define global variables $program_name = 'switch-power-alarm'; $usage = 'Usage: switch-power-alarm -s {yes|no} [-d {integer}] [-r] [-a | -e {expr} | -f {filename} | target1 target2 target3 ...]'; $version = '1.3.0'; # Thresholds $inline_threshold = 80; $system_threshold = 85; # Grab arguments getopts('ad:e:f:rs:', \%option); @target = @ARGV; # Set mode if ($option{r}) { $mode = 'report' } elsif (-t STDIN) { $mode = 'interactive' } else { $mode = 'batch' } ### Begin Main Program ############################################### { check_args(); # Check arguments read_config(); # Read Netops config file compile_mibs(); # Compile MIB files build_target(); # Populate @target target_check(); # Look for errors in @target basic_info(); # Gather information sanity_check(); # Check for error conditions info_before(); # Gather more information do_the_work(); # Do it identify_alarms(); # Count devices with alarms write_alarm_log(); # Record issues print_report(); # Print report notify_staff(); # Mail report } ##### End Main Program ################################################# ######################################################################## # Acquire CISCO-ENTITY-FRU-CONTROl-MIB POE-related variables ######################################################################## sub acquire_cisco_var { my $amp_factor; my $iid; my $power_unit; my $redundancy_mode; my $result; my $target = shift; my $val; my $volts; # Debug trace trace_location('begin') if $debug; # Debug info say " Processing $target" if $debug > 1; # Acquire cefcPowerRedundancyMode $val = snmpWalk( {host => $target, oid => 'cefcPowerRedundancyMode'} ); # Extract the first cefcPowerRedundancyMode and iid $iid = $val->[0]->{iid}; $redundancy_mode = $val->[0]->{val}; say " iid = $iid, redundancy_mode = $redundancy_mode" if $debug > 2; # Acquire cefcPowerUnits $val = snmpGet( {host => $target, oid => "cefcPowerUnits.$iid"} ); unless (defined $val) { say "\n$target does not support cefcPowerUnits, ignoring" if $debug; $result = 0; goto END; } $power_unit = $val; say " power_unit = $power_unit" if $debug > 2; # Extract amp_factor and volts ($amp_factor, $volts) = parse_cefc_power_units($power_unit); unless (defined $amp_factor and defined $volts) { say "\nCannot parse cefcPowerUnits for $target" if $debug; $result = 0; goto END; } # Debug info if ($debug > 2) { say " amp_factor = $amp_factor"; say " volts = $volts"; } # Acquire cefcTotalAvailableCurrent $val = snmpGet( {host => $target, oid => "cefcTotalAvailableCurrent.$iid"} ); unless (defined $val and $val ne $EMPTY_STR and $val > 0) { say "\n$target does not support cefcTotalAvailableCurrent" if $debug; $result = 0; goto END; } $total_system_power{$target} = ceil($val * $amp_factor * $volts); say "total_system_power = $total_system_power{$target}" if $debug > 2; # Acquire cefcTotalDrawnCurrent $val = snmpGet( {host => $target, oid => "cefcTotalDrawnCurrent.$iid"} ); unless (defined $val and $val ne $EMPTY_STR and $val > 0) { say "\n$target does not support cefcTotalDrawnCurrent" if $debug; $result = 0; goto END; } $drawn_system_power{$target} = ceil($val * $amp_factor * $volts); say "drawn_system_power = $drawn_system_power{$target}" if $debug > 2; # Acquire cefcFRUTotalInlineCurrent $val = snmpGet( {host => $target, oid => "cefcFRUTotalInlineCurrent.$iid"} ); if (defined $val and $val ne $EMPTY_STR and $val > 0) { $total_inline_power{$target} = ceil($val * $amp_factor * 50); } else { $total_inline_power{$target} = $DASH; } say "total_inline_power = $total_inline_power{$target}" if $debug > 2; # Acquire cefcFRUDrawnInlineCurrent $val = snmpGet( {host => $target, oid => "cefcFRUDrawnInlineCurrent.$iid"} ); if (defined $val and $val ne $EMPTY_STR and $val > 0) { $drawn_inline_power{$target} = ceil($val * $amp_factor * 50); } else { $drawn_inline_power{$target} = $DASH; } say "drawn_inline_power = $drawn_inline_power{$target}" if $debug > 2; # Save this stuff $amp_factor{$target} = $amp_factor; $iid{$target} = $iid; $volts{$target} = $volts; given ($redundancy_mode) { when ('redundant') { $redundancy_mode{$target} = 'r' } when ('combined') { $redundancy_mode{$target} = 'c' } default { $redundancy_mode{$target} = '?' } } # Success $result = 1; # Problem END: # Debug trace trace_location('end') if $debug; return $result; } ######################################################################## # Acquire RFC3621 POE-related variables ######################################################################## sub acquire_rfc3621_var { my $result; my $status; my $target = shift; my $total; my $val; my $watts; # Debug trace trace_location('begin') if $debug; # Debug info say " Processing $target" if $debug > 1; # Acquire pethMainPsePower $val = snmpGet( {host => $target, oid => 'pethMainPsePower.1'} ); unless (defined $val) { say "\n$target does not support pethMainPsePower.1, ignoring" if $debug; $result = 0; goto END; } # Extract pethMainPsePower ($total_inline_power{$target} = $val) =~ s/ Watts//; say " total_inline_power = $total_inline_power{$target}" if $debug > 2; # Acquire pethMainPseOperStatus $val = snmpGet( {host => $target, oid => 'pethMainPseOperStatus.1'} ); unless (defined $val) { say "\n$target does not support pethMainPseOperStatus.1, ignoring" if $debug; $result = 0; goto END; } # Hack pethMainPseOperStatus given ($val) { when ('on') { $status = 'on' } when ('off') { $status = 'off' } when ('faulty') { $status = 'bad' } default { $status = $QUERY } } $redundancy_mode{$target} = $status; say " redundancy_mode = $redundancy_mode{$target}" if $debug > 2; # Acquire pethMainPseConsumptionPower say 'Getting pethMainPseConsumptionPower.1' if $debug > 3; $val = snmpGet( {host => $target, oid => 'pethMainPseConsumptionPower.1'} ); unless (defined $val) { say "\n$target does not support pethMainPseConsumptionPower.1, ignoring" if $debug; $result = 0; goto END; } # Extract pethMainPseConsumptionPower ($drawn_inline_power{$target} = $val) =~ s/ Watts//; say " drawn_inline_power = $drawn_inline_power{$target}" if $debug > 2; # Fill out system power variables $total_system_power{$target} = $DASH; $drawn_system_power{$target} = $DASH; # Success $result = 1; # Problem END: # Debug trace trace_location('end') if $debug; return $result; } ######################################################################## # Query variables ######################################################################## sub do_the_work { # Debug trace trace_location('begin') if $debug; # Notify operator print_it('Querying targets...'); unless ($dome) { sleep $short; return 1; } # Loop through the list of targets for my $t (@target) { say "Processing $t" if $debug; # Calculate system percent if ($drawn_system_power{$t} eq $DASH or $total_system_power{$t} eq $DASH) { $system_percent{$t} = $DASH; } else { $system_percent{$t} = ceil( ($drawn_system_power{$t} / $total_system_power{$t}) * 100 ); say " system_percent{$t} = $system_percent{$t}" if $debug > 3; $system_percent{$t} = '???' if $system_percent{$t} > 100; } # Calculate inline percent if ($drawn_inline_power{$t} eq $DASH or $total_inline_power{$t} eq $DASH) { $inline_percent{$t} = $DASH; } else { $inline_percent{$t} = ceil( ($drawn_inline_power{$t} / $total_inline_power{$t}) * 100 ); say " inline_percent{$t} = $inline_percent{$t}" if $debug > 3; $inline_percent{$t} = '???' if $inline_percent{$t} > 100; } # Compare to thresholds unless ($system_percent{$t} eq $DASH or $system_percent{$t} eq '???' ) { $alarm_count{$t}++ if $system_percent{$t} > $system_threshold; } unless ($inline_percent{$t} eq $DASH or $inline_percent{$t} eq '???') { $alarm_count{$t}++ if $inline_percent{$t} > $inline_threshold; } # Check redundancy mode given ($redundancy_mode{$t}) { when ('r') { my $noop = 'life is fine' } when ('on') { my $noop = 'life is good' } when ('off') { # I don't believe that a Cat37xx can report an 'off' power supply ... # if its power supply is off, I figure it isn't reporting much of # anything. Nevertheless, under 12.2(53)SE2, Cat37xx do precisely # this. Ignore it. if ($box{$t} eq 'cat37xxStack') { say "$t reports an off power supply; I do not believe it" if $debug; log_it("$t reports an off power supply; I do not believe it"); } else { say "$t reports that its power supply is off" if $debug; log_it("$t reports that its power supply is off"); $alarm_count{$t}++; } } when ('bad') { say "$t reports a faulty power supply" if $debug; log_it("$t reports a faulty power supply"); $alarm_count{$t}++; } default { say "$t reports $redundancy_mode{$t} for its power supply" if $debug; log_it("$t reports $redundancy_mode{$t} for its power supply"); $alarm_count{$t}++; } } # Entertain operator print $BANG if $mode eq 'interactive'; } # Make things look pretty say "\n" if $mode eq 'interactive'; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Gather more information ######################################################################## sub info_before { my @remove; my $result; # Debug trace trace_location('begin') if $debug; # Notify operator print_it('Gathering more information...'); # Loop through targets, deriving current and voltage information TARGET: for my $target (@target) { my (@ps_model, $val); # Acquire entPhysicalModelName $val = snmpWalk( {host => $target, oid => 'entPhysicalModelName'} ); unless (defined $val) { say "\n$target does not support entPhysicalModelName, ignoring" if $debug; push @remove, $target; print $DOT if $mode eq 'interactive'; next TARGET; } # Extract entPhysicalModelName for my $varbind (@$val) { my ($entity, $model); $entity = $varbind->{val}; given ($entity) { when (/PWR/) { ($model) = ($entity) =~ /-(\w+)$/ } when (/PS/) { ($model) = ($entity) =~ /WS-(\w+)/ } } push @ps_model, $model if defined $model; } for (my $i = 0; $i < 2; $i++) { given ($ps_model[$i]) { when (undef) { $ps_model[$i] = $DASH } when ($EMPTY_STR) { $ps_model[$i] = $QUERY } when ($SPACE) { $ps_model[$i] = $QUERY } } } $ps_model{$target} = \@ps_model; # If this is an RFC3621 box, then query RFC3621 variables given ($poe_mib{$target}) { when ('rfc3621') { $result = acquire_rfc3621_var($target) } when ('cisco') { $result = acquire_cisco_var($target) } } # Entertain the operator if ($result) { print $BANG if $mode eq 'interactive'; } # Otherwise, skip to the next target else { push @remove, $target; print $DOT if $mode eq 'interactive'; next TARGET; } } # Remove entries which failed checks prune_local(@remove); prune_basic(@remove); # Make things look pretty say "\n" if $mode eq 'interactive'; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Tell the operator what I discovered ######################################################################## sub print_report { my $handle; my @target_list; # Version of @target sorted according to POE # MIB type my $total = @target; my $now = get_now(); # Debug trace trace_location('begin') if $debug; # Build @target_list { my (@cisco, @rfc3621, @other); for my $host (keys %poe_mib) { given ($poe_mib{$host}) { when ('cisco') { push @cisco, $host } when ('rfc3621') { push @rfc3621, $host } default { push @other, $host } } } @cisco = sort @cisco; @rfc3621 = sort @rfc3621; @other = sort @other; @target_list = (@cisco, @rfc3621, @other); } # Direct output to screen or to file if ($mode eq 'interactive') { $handle = *STDOUT; } else { open $handle, '>', $report_file or die "Cannot open $report_file: $!\n"; } print {$handle} <[0], $ps_model{$target}->[1]; } unless ($handle =~ /STDOUT/) { close $handle or warn "Cannot close $report_file: $!\n"; } # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Remove entries. Sometimes, the operator gives us target(s) which I # cannot process. Instead of crashing & burning, remove these # from the list ######################################################################## sub prune_local { my @nuked; my @remove = @_; # Debug trace trace_location('begin') if $debug; # Check to see if we have work to do if (@remove > 0) { # Make things look pretty say('') if $mode eq 'interactive'; # Remove entries which failed checks for my $remove (@remove) { say "Removing $remove" if $debug; delete $amp_factor{$remove}; delete $drawn_inline_power{$remove}; delete $drawn_system_power{$remove}; delete $iid{$remove}; delete $inline_percent{$remove}; delete $poe_mib{$remove}; delete $ps_model{$remove}; delete $redundancy_mode{$remove}; delete $total_inline_power{$remove}; delete $total_system_power{$remove}; delete $system_percent{$remove}; delete $volts{$remove}; push @nuked, $remove; } } # Debug trace trace_location('end') if $debug; return @nuked; } ######################################################################## # Sanity check ######################################################################## sub sanity_check { my @remove; my $val; # Result of snmpWalk # Debug trace trace_location('begin') if $debug; # Notify operator print_it('Sanity check...'); # Loop through targets, looking for problems TARGET: for my $target (@target) { my ($arg, $val); # Identify which POE MIB to use $val = snmpWalk( {host => $target, oid => 'pethMainPsePower'} ); $poe_mib{$target} = 'rfc3621' if (defined $val and @$val > 0); say 'Walking cefcPowerRedundancyMode' if $debug > 3; $val = snmpWalk( {host => $target, oid => 'cefcPowerRedundancyMode'} ); $poe_mib{$target} = 'cisco' if (defined $val and @$val > 0); # If neither of these work, skip it unless (defined $poe_mib{$target}) { say "I do not see POE-related values in $target, skipping" if $debug; push @remove, $target; print $DOT if $mode eq 'interactive'; next TARGET; } # Debug info say " $target supports $poe_mib{$target}" if $debug > 1; # Entertain operator print $BANG if $mode eq 'interactive'; } # Make things look pretty say('') if $mode eq 'interactive'; # Remove entries which failed checks prune_basic(@remove); # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Output help ######################################################################## sub HELP_MESSAGE { print <