#!/opt/vdops/bin/perl # This script queries Tipping Point IPS boxes for hardware alarms and # produces a report # V Who When What # --------------------------------------------------------------------------- # 1.2.0 skendric 2011-02-21 Upgrade to Netops 1.4.0 # 1.1.2 skendric 2010-12-30 Add @insane to report # 1.1.1 skendric 2010-12-17 Futz with owner/owner_backup # 1.1.0 skendric 2010-02-07 Upgrade to perl 5.10.1 # 1.0.3 skendric 2009-05-21 Support SNMP.pm # 1.0.2 skendric 2009-04-19 Distinguish between silent and unresponsive # 1.0.1 skendric 2009-03-20 Add @down_for_maintenance # 1.0.0 skendric 2007-12-07 First Version # # # Author: Stuart Kendrick, sbk {put at sign here} skendric {put dot here} com # # Source: http://www.skendric.com/device # # This software is available under the GNU GENERAL PUBLIC LICENSE, see # http://www.fsf.org/licenses/gpl.html # # # This script takes the following approach: # -Parses the hosts table for a list of targets (or accepts a command- # line list) # -Queries a bunch of HOST-RESOURCES and NOKIA-IPSO-SYSTEM variables # -Produces a report # # # Requirements: # -The target(s) must be pingable # # -The following MIB modules stashed in /opt/vdops/share/snmp/mibs, # or wherever it is that you store MIB modules: # NOKIA-IPSO-SYSTEM-MIB.txt # # -PERL modules: the WI::Netops collection # # # Assumptions: # # # Tested on: # -perl-5.12.2 # -net-snmp-5.6 # # # Instructions: # -Customize the script for your site: find the 'user-configurable # variables' section and modify as appropriate # -Try it out # # # Caveats: # # # Known Bugs: # # # To do: # -Add support for SNMPv3 # # Begin script # Load modules use strict; use warnings; use feature 'say'; use feature 'switch'; use Carp qw(carp cluck croak confess); use Data::Dumper; use English qw( -no_match_vars ); use Getopt::Std; use List::MoreUtils qw(any); use WI::Netops::HostTools 1.0.4; use WI::Netops::NetopsTools 2.2.3; use WI::Netops::NetopsData 1.4.0; use WI::Netops::PingTools 1.1.7; use WI::Netops::SNMPTools 1.5.3; use WI::Netops::Utilities 1.4.4; # Declare global variables my %chassis_state; # Chassis my $cpu_thresh; # Threshold of CPU utlization above which # we shriek my %cpu_util; # CPU utilization my %fan_state; # Fans my %ha_state; # High availability my $memory_thresh; # Threshold of memory utlization above which # we shriek my %memory_util; # Memory utilization my %pem_state; # Another chassis my %ps_state; # Power supplies my @single_ps_models; # TP models which contain a single power supply my %slot_state; # Slots my %temperature; # Temperature in F my $temperature_thresh; # Temperature above which we shriek # Define global variables # Debug stuff $program_name = 'tippingpoint-alarm'; $usage = 'Usage: tippingpoint-alarm -s {yes|no} [-d {integer}] [-r] [-a | -e {expr} | -f {filename} | target1 target2 target3 ...]'; $version = '1.2.0'; # Thresholds $cpu_thresh = 75; $memory_thresh = 80; $temperature_thresh = 158; # Tipping Point specifics @single_ps_models = qw/tpt-model-50 tpt-model-200/; # Grab arguments getopts('ad:e:f:rs:', \%option); @target = @ARGV; # Set mode if ($option{r}) { $mode = 'report' } elsif (-t STDIN) { $mode = 'interactive' } else { $mode = 'batch' } ### Begin Main Program ############################################### { check_args(); # Check arguments read_config(); # Read Netops config file compile_mibs(); # Compile MIB files build_target(); # Populate @target target_check(); # Look for errors in @target basic_info(); # Gather information sanity_check(); # Check for error conditions do_the_work(); # Do it identify_alarms(); # Count devices with alarms write_alarm_log(); # Record issues print_report(); # Print report notify_staff(); # Mail report } ##### End Main Program ################################################# ######################################################################## # Query variables ######################################################################## sub do_the_work { # Debug trace trace_location('begin') if $debug; # Notify operator print_it('Querying targets...'); unless ($dome) { sleep $short; return 1; } # Loop through the list of targets for my $target (@target) { say "Processing $target" if $debug; # Determine state of hardware components $chassis_state{$target} = acquire_chassis_state($target); $ha_state{$target} = acquire_ha_state($target); $fan_state{$target} = acquire_fan_state($target); $ps_state{$target} = acquire_ps_state($target); $slot_state{$target} = acquire_slot_state($target); # Identify resource consumption $cpu_util{$target} = acquire_cpu_consumption($target); $memory_util{$target} = acquire_memory_consumption($target); $temperature{$target} = acquire_temperature($target); # Entertain operator print $BANG if $mode eq 'interactive'; } # Debug info if ($debug > 2) { for my $target (@target) { if (defined $alarm_count{$target} and $alarm_count{$target} > 0) { say "alarm_count{$target} = $alarm_count{$target}"; } } } # Make things look pretty say "\n" if $mode eq 'interactive'; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Acquire chassis state ######################################################################## sub acquire_chassis_state { my $state; my $host = shift; # Debug trace trace_location('begin') if $debug; # Is HA enabled? $state = snmpGet( {host => $host, oid => 'chasRunState.0'} ); say " chasRunState.0 = $state" if $debug; # If chasRunState looks good, consult pemRunState $state = acquire_pem_state($host) if $state eq 'act'; # Increment counter $alarm_count{$host}++ unless $state eq 'act'; # Debug trace trace_location('end') if $debug; return $state; } ######################################################################## # Acquire CPU utilization ######################################################################## sub acquire_cpu_consumption { my $util; my $host = shift; # Debug trace trace_location('begin') if $debug; # Is HA enabled? $util = snmpGet( {host => $host, oid => 'resourceHPCPUBusyPercent.0'} ); $util //= $QUERY; say " CPU utilization = $util" if $debug; # Increment counter $alarm_count{$host}++ if ($util eq $QUERY or $util > $cpu_thresh); # Debug trace trace_location('end') if $debug; return $util; } ######################################################################## # Acquire fan state ######################################################################## sub acquire_fan_state { my $state; my $host = shift; my $val; # Debug trace trace_location('begin') if $debug; # Get fan state $val = snmpWalk( {host => $host, max_rep => 10, oid => 'fanRunState'} ); # Are all the fans happy? FAN: for my $varbind (@$val) { $state = $varbind->{val}; last FAN unless $state eq 'act'; } $state = $QUERY unless (defined $state and $state ne $EMPTY_STR); # Increment counter $alarm_count{$host}++ unless $state eq 'act'; # Debug trace trace_location('end') if $debug; return $state; } ######################################################################## # Acquire high availability status ######################################################################## sub acquire_ha_state { my $fault; # highAvailFaultState.0 my $host = shift; my $state; # Roll-up variable capturing the # worst state of the HA variables my $status; # highAvailEnabled.0 my $zero; # highAvailZeroPower.0 # Debug trace trace_location('begin') if $debug; # Is HA enabled? $status = snmpGet( {host => $host, oid => 'highAvailEnabled.0'} ); given ($status) { when ('disabled') { $state = 'down' } when ('enabled') { $state = 'up' } default { $state = $QUERY } } say " $host high availability is $status" if $debug > 3; goto END unless $state eq 'up'; # Is it functioning? $fault = snmpGet( {host => $host, oid => 'highAvailFaultState.0'} ); $fault //= $QUERY; given ($fault) { when ('normal') { $state = 'up' } when ('fallback') { $state = 'down' } default { $state = $QUERY } } say " $host HA fault state is $fault" if $debug > 3; goto END unless $state eq 'up'; # Does it fail open? $zero = snmpGet( {host => $host, oid => 'highAvailZeroPowerState.0'} ); $zero //= $QUERY; given ($zero) { when ('normal') { $state = 'up' } when ('ips-bypass') { $state = 'down' } default { $state = $QUERY } } say " $host HA zero power state is $zero" if $debug > 3; goto END unless $state eq 'up'; END: # Increment counter $alarm_count{$host}++ unless $state eq 'up'; # Debug trace trace_location('end') if $debug; return $state; } ######################################################################## # Acquire Memory utilization ######################################################################## sub acquire_memory_consumption { my $util; my $host = shift; # Debug trace trace_location('begin') if $debug; # What percentage of memory are we using? say 'Getting resourceHPMemoryInUsePercent.0' if $debug > 3; $util = snmpGet( {host => $host, oid => 'resourceHPMemoryInUsePercent.0'} ); $util //= $QUERY; say " memory utilization = $util" if $debug; # Increment counter $alarm_count{$host}++ if ($util eq $QUERY or $util > $memory_thresh); # Debug trace trace_location('end') if $debug; return $util; } ######################################################################## # Acquire pem state ######################################################################## sub acquire_pem_state { my $state; my $host = shift; my $val; # Debug trace trace_location('begin') if $debug; # Get hardware state $val = snmpWalk( {host => $host, max_rep => 10, oid => 'pemRunState'} ); # Is the high-level hardware state happy? PEM: for my $varbind (@$val) { $state = $varbind->{val}; last PEM unless $state eq 'act'; } $state //= $QUERY; say " pemRunState = $state" if $debug; # Debug trace trace_location('end') if $debug; return $state; } ######################################################################## # Acquire ps state ######################################################################## sub acquire_ps_state { my $state; my $host = shift; my $val; # Debug trace trace_location('begin') if $debug; # Get fan state say 'Walk psRunState' if $debug > 3; $val = snmpWalk( {host => $host, max_rep => 10, oid => 'psRunState'} ); # Are all the power supplies happy? PS: for my $varbind (@$val) { $state = $varbind->{val}; last PS unless $state eq 'act'; } $state //= $QUERY; say " psRunState eq $state" if $debug; # Acquire global power supply status if ($state eq 'act') { unless (any { $_ eq $sysObjectID{$host} } @single_ps_models) { my $global; say 'Get resourcePowerSupplyStatus.0' if $debug > 3; $global = snmpGet({host => $host, oid => 'resourcePowerSupplyStatus.0'}); $state = $global unless $global eq 'green'; say " resourcePowerSupplyStatus.0 eq $state" if $debug; } } # Increment counter $alarm_count{$host}++ unless $state eq 'act'; # Debug trace trace_location('end') if $debug; return $state; } ######################################################################## # Acquire slot state ######################################################################## sub acquire_slot_state { my $state; my $host = shift; my $val; # Debug trace trace_location('begin') if $debug; # Get slot state $val = snmpWalk( {host => $host, max_rep => 10, oid => 'slotRunState'} ); # Are the slots happy? SLOT: for my $varbind (@$val) { $state = $varbind->{val}; last SLOT unless ($state eq 'act' or $state eq 'oos'); } $state //= $QUERY; # Increment counter $alarm_count{$host}++ unless $state eq 'act'; # Debug trace trace_location('end') if $debug; return $state; } ######################################################################## # Acquire temperature ######################################################################## sub acquire_temperature { my $temp; my $host = shift; # Debug trace trace_location('begin') if $debug; # How hot are we? $temp = snmpGet( {host => $host, oid => 'resourceChassisTempDegreesC.0'} ); $temp //= $QUERY; $temp = convert_celsius_to_fahrenheit($temp) unless $temp eq $QUERY; say " temperature = $temp" if $debug; # Increment counter $alarm_count{$host}++ if ($temp eq $QUERY or $temp > $temperature_thresh); # Debug trace trace_location('end') if $debug; return $temp; } ######################################################################## # Tell the operator what I discovered ######################################################################## sub print_report { my $handle; my $total = @target; my $now = get_now(); # If we are running in test mode, skip this routine unless ($dome) { print_it("Running in test mode, cannot print a meaningful report\n"); return 1; } # Debug trace trace_location('begin') if $debug; # Direct output to screen or to file if ($mode eq 'interactive') { $handle = *STDOUT; } else { open $handle, '>', $report_file or die "Cannot open $report_file: $!\n"; } print {$handle} <