#!/opt/vdops/bin/perl # This script automates the process of initiating the 'calibration' test. # After building a list of APC UPSes, it initiates calibration on the first, # waits for calibration to complete, checks for alarms, and then continues # V Who When What # --------------------------------------------------------------------------- # 1.2.0 skendric 02-21-2011 Upgrade to Netops 1.4.0 # 1.1.3 skendric 12-17-2010 Automatically add the person running the script # to @operators # 1.1.2 skendric 11-15-2010 Keep operator informed via pages # 1.1.1 skendric 06-25-2010 Allow $error{$target} to be undef # 1.1.0 skendric 12-23-2009 Remove Net::SNMP support # 1.0.5 skendric 04-09-2007 Enhance alarm detection # 1.0.4 skendric 04-02-2007 Move cancel_calibration to APCTools.pm # 1.0.3 skendric 03-16-2007 Move acquire_apc_alarms to APCTools.pm # 1.0.2 skendric 03-09-2007 Handle AP9606 cards in poll # 1.0.1 skendric 03-06-2007 Poll alarm status while waiting for # calibration to complete # 1.0.0 skendric 12-06-2006 First Version # # Author: Stuart Kendrick, sbk {put at sign here} skendric {put dot here} com # # Source: http://www.skendric.com/device # # This software is available under the GNU GENERAL PUBLIC LICENSE, see # http://www.fsf.org/licenses/gpl.html # # # This script takes the following approach: # -Parses the hosts table for a list of targets (or accepts a command- # line list) # -Sets the upsAdvTestRuntimeCalibration variable to performCalibration # on the first target # -Poll upsAdvTestCalibrationResults variable until it returns ok # -Checks for alarms, bails if there are any # -Continues with the next target # # Requirements: # -The target(s) must be pingable # # -PERL modules: the WI::Netops collection # # # # Assumptions: # # # Tested on: # -APC 9606 Web/SNMP card, AP961x + AP963x Network Management Card, # SmartUPS, Symmetra # -perl-5.12.2 # -net-snmp-5.6 # # # Instructions: # -Customize the script for your site: find the 'user-configurable # variables' section and modify as appropriate # -Type "apc-seq-calibrate" to see the command-line options # -Try it out # # # Caveats: # # # Known Bugs: # # # To do: # # # Begin script # Load modules use strict; use warnings; use feature 'say'; use feature 'switch'; use Carp qw(carp cluck croak confess); use Data::Dumper; use English qw( -no_match_vars ); use Getopt::Std; use List::MoreUtils qw(uniq); use WI::Netops::APCTools 1.2.2; use WI::Netops::HostTools 1.0.4; use WI::Netops::NetopsTools 2.2.3; use WI::Netops::NetopsData 1.4.0; use WI::Netops::PingTools 1.1.7; use WI::Netops::SNMPTools 1.5.3; use WI::Netops::Utilities 1.4.4; # Declare global variables my %calib_date; # upsAdvTestCalibrationDate my %calib_result; # upsAdvTestCalibrationResults my @only_these_models; # Filters list of targets, discarding any whose # model identifier isn't listed my @operators; # List of alpha pager identities to contact # in event of issues my $wait; # Minutes to wait for device to finish calibration # Define global variables $program_name = 'apc-seq-calibrate'; $usage = 'Usage: apc-seq-calibrate -s {yes|no} [-d {integer}] [-a | -e {expr} | -f {filename} | target1 target2 target3 ...]'; $version = '1.2.0'; # Define user-configurable variables # Notification #@operators = qw/sjones bsmith skendric/; push @operators, getlogin; # Target details @only_these_models = qw//; # Timing $wait = 400; # Grab arguments getopts('ad:e:f:s:', \%option); @target = @ARGV; # Set mode if ($option{r}) { $mode = 'report' } elsif (-t STDIN) { $mode = 'interactive' } else { $mode = 'batch' } ### Begin Main Program ############################################### { check_args(); # Check arguments read_config(); # Read Netops config file compile_mibs(); # Compile MIB files build_target(); # Populate @target target_check(); # Look for errors in @target basic_info(); # Gather information sanity_check(); # Check for major errors info_before(); # Gather more information print_before(); # Tell operator what I will do do_the_work(); # Initiate calibration info_after(); # Gather information print_after(); # Tell the operator what I did } ##### End Main Program ############################################### ######################################################################## # Do the work: initiate calibration, poll for alarms ######################################################################## sub do_the_work { my $alarm_result; # Boolean identifying presence of alarms my $calibrate_result; # Boolean identifying success/failure of # calibration my $initiate_result; # Boolean identifying success/failure of # snmpSet for upsAdvTestRuntimeCalibration my $owner; # The login of the person running me my %page_arg; # Arguments for send_page routine my $page_result; # Boolean identifying success/failure of # page(s) sent for alarm # Debug trace trace_location('begin') if $debug; # Define local variables $initiate_result = $calibrate_result = $alarm_result = 0; # Notify operator print 'Initiating calibration...' if $mode eq 'interactive'; if ($dome) { say(''); } else { sleep $short; say 'just kidding' if $mode eq 'interactive'; return 1; } # Loop through @target, initiating calibration TARGET: for my $target (@target) { # Define %page_arg, just in case we need it %page_arg = ( host => $snpp_host, message => "$target reports an alarm --apc-seq-calibrate", recipient => \@operators ); # Announce start log_it("Processing $target"); if ($mode eq 'interactive') { say '--------------------------------------------------------'; say "Processing $target"; } # Check for pre-existing alarms if (acquire_apc_alarms($target)) { if ($mode eq 'interactive') { say "$target reports a pre-existing alarm, skipping"; } log_it("$target reports a pre-existing alarm, skipping"); next TARGET; } # Initiate calibration test $initiate_result = initiate_apc_calibration($target); # Poll until calibration finishes $calibrate_result = poll($target) if $initiate_result; # Look for alarms $alarm_result = acquire_apc_alarms($target) if $calibrate_result; # Notify operator $page_result = send_page(\%page_arg) if $alarm_result; log_it("Unable to page for alarm on $target") unless $page_result; # Announce completion log_it("Done processing $target"); if ($mode eq 'interactive') { say "Done processing $target"; say "-------------------------------------------------------\n\n" } last TARGET if $alarm_result; } # Pause to allow last UPS to react to end of calibration sleep $long; sleep $long; # Make things look pretty say('') if $mode eq 'interactive'; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Gather information ######################################################################## sub info_before { # Debug trace trace_location('begin') if $debug; # Notify operator print_it('Gathering more information...'); # Loop through targets for my $target (@target) { # Describe previous calibration event $calib_date{$target} = snmpGet( {host => $target, oid => 'upsAdvTestCalibrationDate.0'} ); $calib_result{$target} = snmpGet( {host => $target, oid => 'upsAdvTestCalibrationResults.0'} ); # Entertain operator print $BANG if $mode eq 'interactive'; } # Figure out who to page push @operators, getlogin; @operators = uniq @operators; die 'Operators is not defined' unless @operators > 0; # Make things look pretty say "\n" if $mode eq 'interactive'; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Gather information after calibration initiation ######################################################################## sub info_after { # Debug trace trace_location('begin') if $debug; # Notify operator print_it('Pinging targets...'); sleep $short; # Loop through targets, pinging TARGET: for my $target (@target) { my $alarm_list; # If we can't ping the target, skip to the next one unless (ping_it($target)) { print_it("\nCannot ping $target\n"); $error{$target} = 'Cannot ping'; next TARGET; } # Check to see if the UPS is in alarm $error{$target} = $alarm_list if $alarm_list = acquire_apc_alarms($target); # Describe previous calibration event $calib_date{$target} = snmpGet( {host => $target, oid => 'upsAdvTestCalibrationDate.0'} ); $calib_result{$target} = snmpGet( {host => $target, oid => 'upsAdvTestCalibrationResults.0'} ); } # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Query upsAdvTestCalibrationResults until either calibration finishes # or we give up. Also, query upsAdvStateAbnormalConditions.0 and/or # upsBasicStateOutputState.0 for alarms. If we detect an alarm, # abort the calibration and bail ######################################################################## sub poll { my $alarm_ref; # Hash ref of alarms returned by UPS under # inspection my @alarms; # List of alarms returned by UPS under # inspection my $answer; # Boolean identifying success/failure of # calibration process my $calib_result; # Response to query of upsAdvTestCalibrationResults my $host = shift; my $miss = 0; # Number of pings which the target missed my $ticks; # Number of cycles to poll # Debug trace trace_location('begin') if $debug; # Define local variables $ticks = int $wait/5; $ticks = 1 if $ticks < 1; # Pause to allow calibration to begin sleep $long; # Poll upsAdvTestCalibrationResults POLL: for (my $t = 0; $t < $ticks; $t++) { $calib_result = snmpGet( {host => $host, oid => 'upsAdvTestCalibrationResults.0'} ); # Analyze result given ($calib_result) { when ('ok') { log_it("Finished calibration on $host"); say "\n Finished calibration on $host" if $mode eq 'interactive'; last POLL; } when ('invalid') { log_it("Invalid calibration on $host"); say "Invalid calibration on $host" if $mode eq 'interactive'; $error{$host} = 'Invalid calibration'; last POLL; } when ('inProgress') { log_it("Calibration in progress on $host"); print $BANG if $mode eq 'interactive'; $miss-- if $miss > 0; } when ('?') { log_it("$host missed an snmpGet for upsAdvTestCalibrationResults"); print $DOT if $mode eq 'interactive'; $miss++; } default { log_it("$host returned an undefined value for upsAdvTestCalibrationResults: $calib_result"); print $BANG if $mode eq 'interactive'; } } # End 'Analyze calibration of snmpGet' # If the host has missed too many snmpGets, bail if ($miss > 3) { log_it("$host has quit responding, bailing"); say "\n$host has quit responding, bailing" if $mode eq 'interactive'; last POLL; } # Acquire alarm state $alarm_ref = acquire_apc_alarms($host); # Some alarms are normal (or irrelevant) to a UPS under calibration; # delete these for my $key (%$alarm_ref) { delete $alarm_ref->{'On Battery'}; delete $alarm_ref->{'Low Battery'}; delete $alarm_ref->{'Runtime Calibration'}; delete $alarm_ref->{'Batteries Discharged'}; delete $alarm_ref->{'Self Test In Progress'}; delete $alarm_ref->{'Low Battery / On Battery'}; } # If there any alarms left, record them if (scalar keys %$alarm_ref > 0) { for my $key (keys %$alarm_ref) { push @alarms, $key; } last POLL; } # Sleep for a while sleep 300; } # End 'Poll upsAdvTestCalibrationResults' # Analyze results of poll cycle if (@alarms > 0) { log_it("$host reports alarms, aborting calibration:" . join ', ', @alarms); cancel_apc_calibration($host); $answer = 1; } else { given ($calib_result) { when ('ok') { $answer = 1 } when ('invalid') { $answer = 0 } when ('inProgress'){ log_it("Calibration on $host has exceeded $wait minutes, bailing"); if ($mode eq 'interactive') { say "\nCalibration on $host has exceeded $wait minutes, bailing"; } $answer = 0; } when ('?') { log_it("$host missing snmpGets"); say "$host missing snmpGets" if $mode eq 'interactive'; $answer = 0; } default { log_it("$host returning an undefined value: $calib_result"); say "$host returning an undefined value: $calib_result" if $mode eq 'interactive'; $answer = 0; } } } # Pause sleep $long; # Debug trace trace_location('begin') if $debug; return $answer; } ######################################################################## # Tell operator what I did ######################################################################## sub print_after { my $shit_happens = 0; # Debug trace trace_location('begin') if $debug; # If running from cron, don't print report return 1 if $mode eq 'batch'; say "\n# Here is what I see"; # Iterate through @target for my $target (@target) { $shit_happens++ if defined $error{$target}; } return 1 unless $mode eq 'interactive'; # Congratulate operator say "\n\nDone calibrating"; print "\n"; print < 0) { print "\n\n\n\n"; print <