#!/opt/vdops/bin/perl # This script inflicts HTTP auth fails on all the APC management cards it # can find and then pokes through syslog, looking for the ensuing warm start # traps. The presence of such traps indicates: # (a) that snmptrapd is running on loghost # (b) that each of these APC devices is sending traps to loghost # I want to know this because swatch monitors the traps from the big UPSes # and notifies electrical-folks when it sees something interesting. Some of these # traps indicate critical conditions. Historically, this path has broken # with some regularity (snmptrapd wasn't running or the UPSes were # misconfigured). This script attemps to detect that kind of error. # V Who When What # --------------------------------------------------------------------------- # 1.4.0 skendric 2011-02-21 Upgrade to Netops 1.4.0 # 1.3.1 skendric 2010-12-17 Futz with owner/owner_backup # 1.3.0 skendric 2010-06-18 Use accessViolationHTTP traps instead of # Warm Starts # 1.2.0 skendric 2010-06-15 Use Warm Starts instead of auth fail traps # 1.1.1 skendric 2010-06-13 Add @down_for_maintenance # 1.1.0 skendric 2010-04-02 Upgrade to perl 5.10.1 # 1.0.2 skendric 2008-10-20 Accept command-line target list # 1.0.1 skendric 2008-04-30 Don't notify operators if running interactively # 1.0.0 skendric 2008-04-25 First Version # # Author: Stuart Kendrick, sbk {put at sign here} skendric {put dot here} com # # Source: http://www.skendric.com/syslog # # This software is available under the GNU GENERAL PUBLIC LICENSE, see # http://www.fsf.org/licenses/gpl.html # # # This script takes the following approach: # -Extracts lines from syslog related to APC devices # -Compares the originators with a list of known APC devices # -Shrieks if it detects a mismatch # # Requirements: # -PERL modules: WI::VDOPS::Utilities; # # # Assumptions: # # # Tested on: # -perl-5.12.2 # # # Instructions: # -Customize the script for your site: find the 'user-configurable # variables' section and modify as appropriate # -Type "apc-verify-trapping-alarm" to see the command-line options # -Try it out # # # # Caveats: # # # Known Bugs: # # # To do: # # Begin script # Load modules use strict; use warnings; use feature 'say'; use feature 'switch'; use Data::Dumper; use English qw( -no_match_vars ); use Getopt::Std; use List::MoreUtils qw(any); use LWP::UserAgent; use Sys::Hostname; use WI::Netops::NetopsData 1.4.0; use WI::Netops::NetopsTools 2.2.3; use WI::Netops::SNMPTools 1.5.3; use WI::Netops::Utilities 1.4.4; # Declare global variables my %http_auth_fails; # Hash of integers, indicating the number of # accessViolationHTTP traps, keyed by host name my $http_timeout; # Time in seconds during which we will wait for the # card to respond my $logfile; # Location of yesterday's syslog my $loghost; # Name of your loghost my @missing; # List of devices which did not log a warm start # trap today my $operator; # Address I notify if I discover problems # Define global variables $program_name = 'apc-verify-trapping-alarm'; $usage = 'Usage: apc-verify-trapping-alarm -s {yes|no} [-d {integer}] [-a | -e {expr} | -f {filename} | target1 target2 target3 ...]'; $version = '1.4.0'; # Define user-configurable variables # Input file given (hostname) { when (/gnat/) { $logfile = '/var/log/syslog' } when (/colossus/) { $logfile = '/loghost/log/syslog' } when (/judy/) { $logfile = '/var/log/syslog' } default { $logfile = '/var/log/syslog' } } # HTTP timeout $http_timeout = 20; # Loghost $loghost = hostname; # Grab arguments getopts('ad:e:f:rs:', \%option); @target = @ARGV; # Set mode if ($option{r}) { $mode = 'report' } elsif (-t STDIN) { $mode = 'interactive' } else { $mode = 'batch' } ### Begin Main Program ############################################### { check_args(); # Check arguments read_config(); # Read Netops config file compile_mibs(); # Compile MIB files build_target(); # Populate @target target_check(); # Look for errors in @target basic_info(); # Gather information inflict_auth_fails(); # Attempt to logon using bogus credentials parse_logfile(); # Find interesting messages look_for_missing_devices(); # Correlate devices and traps identify_alarms(); # Count devices with alarms write_alarm_log(); # Record issues print_report(); # Print report notify_staff(); # Tell staff what happened } ##### End Main Program ############################################### ######################################################################## # Create accessViolationHTTP fails ######################################################################## sub inflict_auth_fails { my $ua; # LWP::UserAgent object my %form; # Logon form which we will POST to the mgmt card # Debug trace trace_location('begin') if $debug; # Define form %form = ( login_username => 'foo', login_password foo submit => 'Log+On' ); # Build LWP object $ua = LWP::UserAgent->new( timeout => $http_timeout, protocols_allowed => ['https', 'http'], ); # Notify operator say 'Inflicting HTTP auth fails...' if $mode eq 'interactive'; # Walk targets, inflicting auth fails as we go for my $target (@target) { my ($miss, $url); $miss = 0; $url = "https://$target/Forms/login1"; say "Processing $target" if $debug; # Inflict four auth fails (only need three to trigger the trap) for (my $i = 0; $i < 4; $i++) { my $response; # Try logging in using bogus username/password $response = $ua->post($url, Content => \%form); # Figure out what happened if ($response->is_error) { $miss++; say " Missed a POST" if $debug; } # Debug info say $response->as_string if $debug > 3; # Entertain the operator print $DASH if $mode eq 'interactive'; sleep 2; } # Evaluate result if ($miss > 1 ) { log_it("$target missed $miss POSTs; auth fail trap not triggered"); push @unresponsive, $target; } # Entertain the operator print $BANG if $mode eq 'interactive'; } # Make things look pretty say "\n" if $mode eq 'interactive'; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Compare the list of devices from the hosts table with the list of # devices which have logged warm start traps ######################################################################## sub look_for_missing_devices { # Debug trace trace_location('begin') if $debug; # Do the work TARGET: for my $target (@target) { next TARGET if defined $http_auth_fails{$target}; if (any {$_ eq $target} @down_for_maintenance) { log_it("Did not see warm start traps for $target; skipping because it belongs to down_for_maintenance"); next TARGET; } $alarm_count{$target}++; } # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Notify operator ######################################################################## sub notify_operator { my $handle; my $msg; # Debug trace trace_location('begin') if $debug; # Skip to the end if we didn't find any missing devices if (@unresponsive > 0) { print_it("The following devices did not respond to HTTP logon attempts"); print_it("@unresponsive\n"); } if (@missing == 0 and @unresponsive == 0) { print_it('I saw auth fail traps from all devices'); goto END; } if (@missing == 0 and @unresponsive > 0) { print_it('I saw auth fail traps from all other devices'); goto END; } if (@missing > 0) { print_it('I did not see auth fail traps from these devices:'); print_it("@missing"); } # Notify operator say "\nNotifying operator..." if $mode eq 'interactive'; # If we are running interactively, skip ahead if ($mode eq 'interactive') { say 'Just kidding'; goto END; } # Skip ahead unless we are serious unless ($dome) { say 'Just kidding'; goto END; } # Build message header $msg = Mail::Send->new(); $msg->to($operator); $msg->subject($report_subject); # Build message body $handle = $msg->open; print {$handle} <close; END: # Make things look pretty say "\n" if $mode eq 'interactive'; # Make things look pretty log_it("Ending $PROGRAM_NAME"); say "Ending $PROGRAM_NAME" if $mode eq 'interactive'; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Parse logfile ######################################################################## sub parse_logfile { my $host; my $line; # Debug trace trace_location('begin') if $debug; # Notify operator say 'Parsing logfile...' if $mode eq 'interactive'; # Sleep a bit, to allow the agents to reboot and log traps sleep $long; sleep $long; # Open logfile open my $log, '<', $logfile or die "Cannot open $logfile: $!"; LINE: while (my $line = <$log>) { # Skip unless this line was written by snmptrapd and contains an APC trap next LINE unless $line =~ /snmptrapd/; next LINE unless $line =~ /PowerNet-MIB/; # Skip unless the line is an accessViolationHTTP trap next LINE unless $line =~ /accessViolationHTTP/; # Skip firewall lines next LINE if $line =~/PIX/; # Skip sendmail lines next LINE if $line =~/sendmail/; # Skip network management lines next LINE if $line =~ /nodewatch|tocops|apager/; next LINE if $line =~ /velmamgmt|daphnemgmt/; # Grab host ($host) = ($line =~ /snmptrapd.*?\d+-\d+-\d+\s+\d\d:\d\d:\d\d\s+(.*?)\s+/); # Bail if we didn't find host unless (defined $host and $host ne $EMPTY_STR) { say 'Could not find host on line:'; say "$line\n"; next LINE; } next LINE unless (defined $host and $host ne $EMPTY_STR); # Count the results $http_auth_fails{$host}++; # Entertain the operator print $BANG if $mode eq 'interactive'; } # Make things look pretty say "\n" if $mode eq 'interactive'; # Clean-up close $log or warn "Cannot close $logfile: $!"; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Print report ######################################################################## sub print_report { my $handle; my $total = @target; my $now = get_now(); # If we are running in test mode, skip this routine unless ($dome) { print_it("Running in test mode, cannot print a meaningful report\n"); return 1; } # Debug trace trace_location('begin') if $debug; # Direct output to screen or to file if ($mode eq 'interactive') { $handle = *STDOUT; } else { open $handle, '>', $report_file or die "Cannot open $report_file: $!\n"; } print {$handle} <