#!/opt/vdops/bin/perl # This script parses the APC PowerNet-MIB file and creates a swatch.conf # for monitoring traps # V Who When What # --------------------------------------------------------------------------- # 1.7.0 skendric 03-23-2011 Expand to four categories: minor, major, severe # critical, eliminate @no_throttle # 1.6.1 skendric 03-04-2011 Finally get throttling by host right # 1.6.0 skendric 08-03-2010 Add @no_throttle, for particularly sensitive # traps # 1.5.5 skendric 07-30-2010 Upgrade to perl-5.10.1 # 1.5.4 skendric 09-15-2008 Add EMU sensor connect/disconnect traps # 1.5.3 skendric 09-03-2008 Add PDU traps # 1.5.2 skendric 06-18-2008 Remove communicationEstablished # 1.5.1 skendric 03-07-2008 Migrate from throttle to threshold # 1.5.0 skendric 04-12-2007 Simplify # 1.4.0 skendric 04-09-2007 Enumerate only traps for gear we own. Divide # traps in to minor/major/critical. Employ # category-specific throttling # 1.3.7 skendric 04-06-2007 Enumerate all traps # 1.3.6 skendric 03-09-2007 Migrate back to old-style throttling # 1.3.5 skendric 12-20-2006 Support Swatch-3.2.1 threshholding # 1.3.4 skendric 12-19-2006 Employ 'mail_em' # 1.3.3 skendric 12-03-2006 Stylistic mods # 1.3.2 skendric 11-05-2005 Upgrade to new FHCRC::VDOPS module structure # 1.3.1 skendric 10-31-2005 Sort @trap # 1.3.0 skendric 10-26-2005 Skip little UPSes # 1.2.4 skendric 10-14-2005 More excludes # 1.2.3 skendric 08-08-2005 Add perlcode # 1.2.2 skendric 08-07-2005 Make throttling work # 1.2.1 skendric 07-17-2005 Add throttling # 1.2.0 skendric 04-03-2005 Add variable defining who gets what # 1.1.1 skendric 04-01-2005 Just grab trap # 1.1.0 skendric 03-31-2005 Use Tie::File # 1.0.1 skendric 03-31-2005 Add facilities notifications # 1.0.0 skendric 03-28-2005 First Version # # Author: Stuart Kendrick, sbk {put at sign here} skendric {put dot here} com # # Source: http://www.skendric.com/monitor # # This software is available under the GNU GENERAL PUBLIC LICENSE, see # http://www.fsf.org/licenses/gpl.html # # # This script takes the following approach: # -Find the first TRAP-TYPE line and records the English OID # -Searches further and records the associated SUMMARY line # -Repeats # -Produces a swatch.conf file # # # Requirements: # -swatch.conf contains the following: # # Use this variable to stuff the hostname into $1 # perlcode my $grab_host = '^\w+\s+\d+\s+\d\d:\d\d:\d\d\s+(\w+)'; # # # # Assumptions: # # # Tested on: # -perl-5.12.2 # # # Instructions: # -Customize the script for your site: find the 'user-configurable # variables' section and modify as appropriate # -Type "build-swatch-config" to see the command-line options # -Try it out # # # # Caveats: # # # Known Bugs: # # # To do: # # Begin script # Load modules use v5.12.0; use strict; use warnings FATAL => 'all'; use feature 'say'; use feature 'switch'; use Carp qw(carp cluck croak confess); use Data::Dumper; use English qw( -no_match_vars ); use Getopt::Std; use List::MoreUtils qw(any); use FHCRC::Netops::HostTools 1.0.5; use FHCRC::Netops::NetopsData 1.4.4; use FHCRC::Netops::NetopsTools 2.3.1; use FHCRC::Netops::Utilities 1.4.5; # Declare global variables my %device; # Hash of @target, keyed by IP my %ignore_device; # Hash of the devices we will ignore, keyed by IP my @mail_recipients; # List of addresses which want to receive e-mail # notification my @page_recipients; # List of pager ids who want to receive alpha page # notification my $output_file; # Location of swatch.conf to create my %priority_of; # Hash of priorities, keyed by trap my @skip_model; # List of APC models to ignore my %throttle_of; # Throttle time keyed by trap my %throttle_def_of; # Throttle definitions keyed by priority # Define global variables $program_name = 'build-swatch-config'; $usage = 'Usage: build-swatch-config -s {yes|no} [-d {integer}] [-z {netops_global_config}] [-y {netops_alarm_config}] [-a | -e {expr} | -f {filename} | target1 target2 target3 ...]'; $version = '1.6.1'; # Priority informs how we alarm and how we throttle # Minor Major Severe Critical # Email 7x24 7x24 7x24 7x24 # Page Never M-F 8am-5pm 7x24 7x24 # Throttle 8 hours 1 hour 10 minutes 1 minute # APC Stuff %priority_of = ( apcEnvSensorConnected => 'minor', apcEnvSensorDisconnected => 'minor', atsCommunicationEstablished => 'minor', atsCommunicationLost => 'minor', atsOverCurrentCleared => 'minor', atsOverCurrent => 'minor', atsPowerSupplyFailureCleared => 'minor', atsPowerSupplyFailure => 'minor', atsSourceSwitched => 'minor', atsLostRedundancy => 'minor', atsRedundancyRestored => 'minor', batteryPackCommLost => 'minor', batteryPackCommEstablished => 'minor', bmBatManCommEstab => 'minor', bmBatManCommLost => 'minor', bmBatManMaintenanceAlarmCleared => 'minor', bmBatManMaintenanceAlarm => 'minor', bmBatManKneeAlarmCleared => 'minor', bmBatManKneeAlarm => 'minor', bmBatManEnvironmentAlarmCleared => 'minor', bmBatManEnvironmentAlarm => 'minor', bmsWarningConditionCleared => 'minor', bmsWarningCondition => 'minor', calibrationStart => 'minor', calibrationEnd => 'minor', envSTIncTempRateViolationCleared => 'minor', envSTIncTempRateViolation => 'minor', envSTDecTempRateViolationCleared => 'minor', envSTDecTempRateViolation => 'minor', envMinTempThresholdViolationCleared => 'minor', envMinTempThresholdViolation => 'minor', envMinHumidityThresholdViolationCleared => 'minor', envMinHumidityThresholdViolation => 'minor', envMaxTempThresholdViolationCleared => 'minor', envMaxTempThresholdViolation => 'minor', envMaxHumidityThresholdViolationCleared => 'minor', envMaxHumidityThresholdViolation => 'minor', envLowTempThresholdViolationCleared => 'minor', envLowTempThresholdViolation => 'minor', envLowHumidityThresholdViolationCleared => 'minor', envLowHumidityThresholdViolation => 'minor', envLTIncTempRateViolationCleared => 'minor', envLTIncTempRateViolation => 'minor', envLTDecTempRateViolationCleared => 'minor', envLTDecTempRateViolation => 'minor', envHighTempThresholdViolationCleared => 'minor', envHighTempThresholdViolation => 'minor', envHighHumidityThresholdViolationCleared => 'minor', gracefullShutdown => 'minor', iemLowTempThresholdViolationCleared => 'minor', iemLowTempThresholdViolation => 'minor', iemLowHumidThresholdViolationCleared => 'minor', iemLowHumidThresholdViolation => 'minor', iemHighTempThresholdViolationCleared => 'minor', iemHighTempThresholdViolation => 'minor', iemHighHumidThresholdViolationCleared => 'minor', iemHighHumidThresholdViolation => 'minor', iemContactFaultCleared => 'minor', iemContactFault => 'minor', humidityThresholdViolationCleared2 => 'minor', humidityThresholdViolationCleared1 => 'minor', humidityThresholdViolation2 => 'minor', humidityThresholdViolation1 => 'minor', rPDUPowerSupply1Fail => 'minor', rPDUPowerSupply10k => 'minor', rPDUPowerSupply2Fail => 'minor', rPDUPowerSupply20k => 'minor', smartAvrReducing => 'minor', smartBoostOff => 'minor', smartAvrReducingOff => 'minor', upsBatteryReplaced => 'minor', upsCalibrationStackChanged => 'minor', upsDiagnosticsFailed => 'minor', upsDipSwitchChanged => 'minor', upsBatteryNeedsReplacement => 'minor', upsBatteryNeedsReplacement => 'minor', batteryIncrease => 'major', batteryDecrease => 'major', extBatteryFrameIncease => 'major', extBatteryFrameDecrease => 'major', externalSwitchGearClosed => 'major', externalSwitchGearOpened => 'major', intelligenceModuleInserted => 'major', intelligenceModuleRemoved => 'major', passwordChanged => 'major', powerModuleIncrease => 'major', powerModuleDecrease => 'major', rPDULowLoad => 'major', rPDULowLoadCleared => 'major', smartRelayFaultCleared => 'major', temperatureThresholdViolationCleared2 => 'major', temperatureThresholdViolationCleared1 => 'major', temperatureThresholdViolation2 => 'major', temperatureThresholdViolation1 => 'major', upsBatteryOverVoltageCleared => 'major', upsBatteryOverVoltage => 'major', upsBackfeedRelayFaultCleared => 'major', upsBackfeedRelayFault => 'major', upsBypassRelayFaultCleared => 'major', upsBypassRelayFault => 'major', upsBatteryTempSensorFaultCleared => 'major', upsBatteryTempSensorFault => 'major', upsBatterySoftBusStartFaultCleared => 'major', upsBatterySoftBusStartFault => 'major', abnormalConditionCleared => 'severe', abnormalCondition => 'severe', baseFanFailureCleared => 'severe', baseFanFailure => 'severe', badVoltageCleared => 'severe', badVoltage => 'severe', bmBatManBatteryAlarmCleared => 'severe', bmBatManBatteryAlarm => 'severe', bmsCriticalConditionCleared => 'severe', bmsCriticalCondition => 'severe', bypassPowerSupplyFailure => 'severe', chargerFailureCleared => 'severe', chargerFailure => 'severe', dischargeCleared => 'severe', noBatteries => 'severe', noBatteriesCleared => 'severe', returnFromLowBattery => 'severe', smwCriticalConditionCleared => 'severe', smwCriticalCondition => 'severe', upsDischarged => 'severe', upsInverterOverTemperatureCleared => 'severe', upsInverterOverTemperature => 'severe', upsInverterFaultCleared => 'severe', upsInverterFault => 'severe', upsInternalOverTemperatureCleared => 'severe', upsInternalOverTemperature => 'severe', upsOverloadCleared => 'severe', upsOverload => 'severe', upsOutputSwitchOpened => 'severe', upsOutputSwitchClosed => 'severe', upsPfcInputRelayFailureCleared => 'severe', upsPfcInputRelayFailure => 'severe', upsPfcFaultCleared => 'severe', upsPfcFault => 'severe', upsRebootStarted => 'severe', upsSleeping => 'severe', upsTurnedOn => 'severe', upsTurnedOff => 'severe', upsWokeUp => 'severe', batteryOverTemperatureCleared => 'critical', batteryOverTemperature => 'critical', hardwareFailureBypass => 'critical', softwareBypass => 'critical', returnFromBypass => 'critical', switchedBypass => 'critical', upsOnBattery => 'critical', powerRestored => 'critical', ); # Files $output_file = '/opt/vdops/etc/swatch/swatch-ups-conf'; # Notification @mail_recipients = qw/zapper-watchers/; @page_recipients = qw/zapper-responders/; # Target details @skip_model = qw/Smart-UPS SMART-UPS unknown/; # Throttling %throttle_def_of = ( minor => '28800', major => '3600', severe => '600', critical => '90', ); # Grab arguments getopts('ad:e:s:y:z:', \%option); @target = @ARGV; # Set mode if ($option{r}) { $mode = 'report' } elsif (-t STDIN) { $mode = 'interactive' } else { $mode = 'batch' } ### Begin Main Program ############################################### { check_args(); # Check arguments read_config(); # Read config file build_target(); # Populate @target target_check(); # Look for errors in @target basic_info(); # Gather information info_before(); # Gather additional information assign_throttle(); # Add trap-specific throttle param write_file(); # Produce the output file } ##### End Main Program ############################################### ######################################################################## # Build %throttle ######################################################################## sub assign_throttle { # Debug trace trace_location('begin') if $debug; # Walk traps for my $trap (keys %priority_of) { my $priority = $priority_of{$trap}; $throttle_of{$trap} = $throttle_def_of{$priority}; } # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Sort by priority ######################################################################## sub by_priority { $priority_of{$a} cmp $priority_of{$b} } ######################################################################## # Gather information ######################################################################## sub info_before { my %remove; # Debug trace trace_location('begin') if $debug; # Notify operator print_it('Gathering additional information...'); # Loop through targets, gathering more info for my $target (@target) { # Build name/address hash my $ip = get_ipaddr($target); $device{$ip} = $target; # Categorize devices if ( any { $apc_device_model{$target} =~ /$_/ } @skip_model ) { $ignore_device{$ip} = $target; } # Entertain the operator print $BANG if $mode eq 'interactive'; } # Make things look pretty say "\n" if $mode eq 'interactive'; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Sanity check ######################################################################## sub sanity_check { my @remove; # Debug trace trace_location('begin') if $debug; # Notify operator print_it('Sanity check...'); # Loop through targets, removing non-APC devices TARGET: for my $target (@target) { # Identify manufacturer unless ($manufacturer{$target} eq 'APC') { print_it("Manufacturer of $target is not APC, ignoring"); push @remove, $target; print $DOT if $mode eq 'interactive'; next TARGET; } # Entertain operator print $BANG if $mode eq 'interactive'; } # Make things look pretty say "\n" if $mode eq 'interactive'; # Remove entries which failed checks prune_basic(@remove); # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Build file ######################################################################## sub write_file { # Debug trace trace_location('begin') if $debug; # Open output file open my $file, '>', $output_file or die "Cannot open $output_file: $!"; # Insert the ignore lines print {$file} '# Skip little UPSes', "\n"; for my $ip (sort keys %ignore_device) { print {$file} "ignore=/$ip|$ignore_device{$ip}/\n"; } print {$file} "\n\n"; # Walk traps, inserting each stanza as we go print {$file} "# Look for UPS traps\n"; for my $trap (sort by_priority keys %priority_of) { # Insert watchfor regex #print {$file} "watchfor=/\$grab_host.*Enterprise Specific Trap \\(PowerNet-MIB::$trap\\)/\n"; print {$file} "watchfor=/Enterprise Specific Trap \\(PowerNet-MIB::$trap\\)/\n"; # Insert paging notification for my $pager (@page_recipients) { given ($priority_of{$trap}) { when ('critical') { print {$file} " exec=\$page_em $pager \$* --swatch\n"; } when ('severe') { print {$file} " exec=\$page_em $pager \$* --swatch\n"; } when ('major') { print {$file} " exec=\$page_em $pager \$* --swatch,when=1-5:7-18\n"; } } } # Insert email notification for my $addr (@mail_recipients) { print {$file} " exec=\$mail_em $addr \$* --swatch\n"; } # Insert throttling print {$file} " threshold track_by=\$4, type=limit, count=1, seconds=$throttle_of{$trap}\n"; # Blank line print {$file} "\n"; } # Clean up close $file or warn "Cannot close $output_file: $!"; # Notify operator log_it("Ending $PROGRAM_NAME"); say "Ending $PROGRAM_NAME" if $mode eq 'interactive'; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Output help ######################################################################## sub HELP_MESSAGE { print <