#!/opt/vdops/bin/perl # This script automates the process of watching one or more APC UPSes # (which are undergoing run-time calibration) for alarms. If it sees an # alarm, then it cancels calibration and shrieks. It does not itself # initiate calibration -- it assumes that calibration has been initiated # through some other mechanism # V Who When What # --------------------------------------------------------------------------- # 1.2.0 skendric 2011-02-21 Upgrade to Netops 1.4.0 # 1.1.1 skendric 2010-06-25 Allow $error{$target} to be undef # 1.1.0 skendric 2010-01-10 Upgrade to perl 5.10.1 # 1.0.1 skendric 2007-04-10 Enhance alarm detection # 1.0.0 skendric 2007-04-02 First Version # # Author: Stuart Kendrick, sbk {put at sign here} skendric {put dot here} com # # Source: http://www.skendric.com/device # # This software is available under the GNU GENERAL PUBLIC LICENSE, see # http://www.fsf.org/licenses/gpl.html # # # This script takes the following approach: # -Parses the hosts table for a list of targets (or accepts a command- # line list) # -Sets the upsAdvTestRuntimeCalibration variable to performCalibration # on the first target # -Poll upsAdvTestCalibrationResults variable until it returns ok # -Checks for alarms, bails if there are any # -Continues with the next target # # Requirements: # -The target(s) must be pingable # # -PERL modules: the WI::Netops collection # # # # Assumptions: # # # Tested on: # -APC 9606 Web/SNMP card, AP961x + AP963x Network Management Card, # SmartUPS, Symmetra # -perl-5.12.2 # -net-snmp-5.6 # # # Instructions: # -Customize the script for your site: find the 'user-configurable # variables' section and modify as appropriate # -Type "apc-watch-calibrate" to see the command-line options # -Try it out # # # Caveats: # # # Known Bugs: # # # To do: # # # Begin script # Load modules use strict; use warnings; use feature 'say'; use feature 'switch'; use Carp qw(carp cluck croak confess); use Data::Dumper; use English qw( -no_match_vars ); use Getopt::Std; use List::MoreUtils qw(any firstidx); use WI::Netops::APCTools 1.2.2; use WI::Netops::HostTools 1.0.4; use WI::Netops::NetopsTools 2.2.3; use WI::Netops::NetopsData 1.4.0; use WI::Netops::PingTools 1.1.7; use WI::Netops::SNMPTools 1.5.3; use WI::Netops::Utilities 1.4.4; # Declare global variables my %calib_date; # upsAdvTestCalibrationDate my %calib_result; # upsAdvTestCalibrationResults my @only_these_models; # Filters list of targets, discarding any whose # model identifier isn't listed my @operators; # List of alpha pager identities to contact # in event of issues my $wait; # Minutes to wait for device(s) to finish calibration # Define global variables $program_name = 'apc-watch-calibrate'; $usage = 'Usage: apc-watch-calibrate -s {yes|no} [-d {integer}] [-r] [-a | -e {expr} | -f {filename} | target1 target2 target3 ...]'; $version = '1.2.0'; # Define user-configurable variables # Notification #@operators = qw/sjones skendric tcoburn/; push @operators, getlogin; # Target details @only_these_models = qw//; # Timing $wait = 480; # Grab arguments getopts('ad:e:f:s:', \%option); @target = @ARGV; # Set mode if ($option{r}) { $mode = 'report' } elsif (-t STDIN) { $mode = 'interactive' } else { $mode = 'batch' } ### Begin Main Program ############################################### { check_args(); # Check arguments read_config(); # Read Netops config file compile_mibs(); # Compile MIB files build_target(); # Populate @target target_check(); # Look for errors in @target basic_info(); # Gather information info_before(); # Gather more information sanity_check(); # Check for major errors print_before(); # Tell operator what I will do do_the_work(); # Watch the UPSes info_after(); # Gather information print_after(); # Tell the operator what I did } ##### End Main Program ############################################### ######################################################################## # Do the work: poll for alarms ######################################################################## sub do_the_work { my @host; # Copy of @target my $now; # Current time my %page_arg; # Arguments for send_page routine my $page_result; # Boolean identifying success/failure of # page(s) sent for alarm my $start_time; # Time at which we started # Debug trace trace_location('begin') if $debug; # Define local variables @host = @target; $start_time = time(); $now = time(); %page_arg = ( host => $snpp_host, recipient => \@operators ); # Notify operator say 'Watching devices for alarms...' if $mode eq 'interactive'; $page_arg{'message'} = "Watching calibraton on @target --$program_name"; $page_result = send_page(\%page_arg); $page_result = 0 unless defined $page_result; log_it("Unable to notify operator of $program_name begin") unless $page_result; unless ($dome) { sleep $short; say 'just kidding' if $mode eq 'interactive'; return 1; } # So long as we haven't exceeded wait time, keep polling while ( ($now - $start_time) < ($wait * 60) ) { my @remove; # Loop through @hosts, polling for alarms HOSTS: for my $host (@host) { my @alarms; my $alarm_ref; my $alarm_text; my $cancel_result; # Acquire alarm state $alarm_ref = acquire_apc_alarms($host); # Some alarms are normal (or irrelevant) to a UPS under calibration; # delete these for my $key (%$alarm_ref) { delete $alarm_ref->{'On Battery'}; delete $alarm_ref->{'Low Battery'}; delete $alarm_ref->{'Runtime Calibration'}; delete $alarm_ref->{'Batteries Discharged'}; delete $alarm_ref->{'Self Test In Progress'}; delete $alarm_ref->{'Low Battery / On Battery'}; } # If there any alarms left, record them if (scalar keys %$alarm_ref > 0) { for my $key (keys %$alarm_ref) { push @alarms, $key; } } # If we see an alarm, cancel calibration and shriek if (@alarms > 0) { my $alarm_text = join ', ', @alarms; # Cancel calibration $cancel_result = cancel_apc_calibration($host); $cancel_result = 0 unless defined $cancel_result; # Define %page_arg if ($cancel_result) { $page_arg{'message'} = "$host reports alarm, calibration cancelled: $alarm_text --$program_name"; } else { $page_arg{'message'} = "$host reports an alarm, unable to cancel calibration: $alarm_text --$program_name"; } # Notify operator $page_result = send_page(\%page_arg); $page_result = 0 unless defined $page_result; log_it("Unable to page for alarm on $host") unless $page_result; # Flag host for removal push @remove, $host; } } # Entertain operator print $BANG if $mode eq 'interactive'; # If a UPS has entered alarm, remove it from @host if (@remove > 0) { for my $nuke (@remove) { my $index = firstidx { $_ eq $nuke } @host; splice @host, $index, 1; } } # Pause between polls sleep $long; $now = time(); } # Make things look pretty say('') if $mode eq 'interactive'; # Notify operator say 'Done watching devices for alarms...' if $mode eq 'interactive'; $page_arg{'message'} = "Done watching calibration on @target --$program_name"; $page_result = send_page(\%page_arg); $page_result = 0 unless defined $page_result; log_it("Unable to notify operator of $program_name end") unless $page_result; say('') if $mode eq 'interactive'; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Gather information ######################################################################## sub info_before { # Debug trace trace_location('begin') if $debug; # Notify operator print_it('Gathering more information...'); # Loop through targets for my $target (@target) { # Debug info say "Processing $target..." if $debug > 1; # Describe previous calibration event $calib_date{$target} = snmpGet( {host => $target, oid => 'upsAdvTestCalibrationDate.0'} ); $calib_result{$target} = snmpGet( {host => $target, oid => 'upsAdvTestCalibrationResults.0'} ); # Entertain operator print $BANG if $mode eq 'interactive'; } # Make things look pretty say "\n" if $mode eq 'interactive'; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Gather information after calibration initiation ######################################################################## sub info_after { # Debug trace trace_location('begin') if $debug; # Notify operator print_it('Pinging targets...'); sleep $short; # Loop through targets, pinging TARGET: for my $target (@target) { my $alarm_list; # If we can't ping the target, skip to the next one unless (ping_it($target)) { print_it("\nCannot ping $target\n"); $error{$target} = 'Cannot ping'; next TARGET; } # Check to see if the UPS is in alarm $error{$target} = $alarm_list if $alarm_list = acquire_apc_alarms($target); # Describe previous calibration event $calib_date{$target} = snmpGet( {host => $target, oid => 'upsAdvTestCalibrationDate.0'} ); $calib_result{$target} = snmpGet( {host => $target, oid => 'upsAdvTestCalibrationResults.0'} ); } # Make things look pretty say "\n" if $mode eq 'interactive'; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Tell operator what I did ######################################################################## sub print_after { my $shit_happens = 0; # Debug trace trace_location('begin') if $debug; # If running from cron, don't print report return 1 if $mode eq 'batch'; say "\n# Here is what I see"; # Iterate through @target for my $target (@target) { $shit_happens++ if defined $error{$target}; } return 1 unless $mode eq 'interactive'; print "\n"; print < 0) { print "\n\n\n\n"; print <