#!/opt/vdops/bin/perl # This script queries devices for their interfaces, picks out the ones with # non-empty ifAlias values, checks for ifOperStatus and errors, and whines # if it sees something it doesn't like # V Who When What # --------------------------------------------------------------------------- # 2.1.0 skendric 2011-02-21 Upgrade to Netops 1.4.0 # 2.0.5 skendric 2010-12-17 Futz with owner/owner_backup # 2.0.4 skendric 2010-11-18 Apply shrink_if_descr to if_name # 2.0.3 skendric 2010-06-24 Insert 0 percent for link down interfaces # 2.0.2 skendric 2010-06-15 Fix ifIn/OutDiscard reporting error # 2.0.1 skendric 2010-05-21 Simplify skipping interfaces # 2.0.0 skendric 2010-05-07 Use percentages instead of absolute thresholds # 1.3.0 skendric 2010-04-22 Upgrade to perl 5.10.1 # 1.2.0 skendric 2009-12-20 Command-line ignore discards or errors # 1.1.1 skendric 2009-10-26 Skip interfaces with 'ad hoc' or 'shutdown' in # ifAlias # 1.1.0 skendric 2009-10-07 Ignore ifIn/OutDiscards for now # 1.0.1 skendric 2009-03-20 Add @down_for_maintenance # 1.0.0 skendric 2009-02-24 First Version # # # Author: Stuart Kendrick, sbk {put at sign here} skendric {put dot here} com # # Source: http://www.skendric.com/device # # This software is available under the GNU GENERAL PUBLIC LICENSE, see # http://www.fsf.org/licenses/gpl.html # # # This script takes the following approach: # -Parses the hosts table for a list of targets (or accepts a command- # line list) # -Scans ifAlias for non-empty strings, considering the presencen of a # character sufficient reason to declare the port an 'uplink' # -Produces a report highlighting ifOperStatus, ifInErrors, and # ifOutErrors # -Sends e-mail if it sees a problem # # # Requirements: # -The target(s) must be pingable # # -The following MIB modules stashed in /opt/vdops/share/snmp/mibs, # or wherever it is that you store MIB modules: # CISCO-PRODUCTS-MIB.my # # -PERL modules: the WI::Netops collection # # # Assumptions: # # # Tested on: # -perl-5.12.2 # -net-snmp-5.6 # # # Instructions: # -Customize the script for your site: find the 'user-configurable # variables' section and modify as appropriate # -Try it out # # # Caveats: # -This script assumes that the Supervisor card is in slot 1, that # the Sup card contains two uplink ports, and that both uplink # ports are in use # # # Known Bugs: # # # To do: # -Add support for SNMPv3 # # Begin script # Load modules use strict; use warnings; use feature 'say'; use feature 'switch'; use Carp qw(carp cluck croak confess); use Data::Dumper; use English qw( -no_match_vars ); use Getopt::Std; use List::MoreUtils qw(any); use Regexp::Common; use WI::Netops::CiscoTools 1.4.3; use WI::Netops::HostTools 1.0.4; use WI::Netops::IFTools 1.3.1; use WI::Netops::NetopsTools 2.2.3; use WI::Netops::NetopsData 1.4.0; use WI::Netops::PingTools 1.1.7; use WI::Netops::SNMPTools 1.5.3; use WI::Netops::Utilities 1.4.4; # Declare global variables. Hashes are keyed by target, pointing to # hash refs which are keyed by ifIndex my $discard_thresh; # User-specified threshold of discards, # expressed as a percentage of (error packets) / # (total packets) my $error_thresh; # User-specified threshold of errors, expressed # as a percentage of (error packets) / # (total packets) my %if_alias; my %if_name; my %if_admin_status; my %if_oper_status; my %if_in_discards; my %if_out_discards; my %if_in_discards_pct; my %if_out_discards_pct; my %if_in_errors; my %if_out_errors; my %if_in_errors_pct; my %if_out_errors_pct; my %if_in_pkts; my %if_out_pkts; my %if_hc_in_ucast_pkts; my %if_hc_out_ucast_pkts; my %if_hc_in_broadcast_pkts; my %if_hc_out_broadcast_pkts; my %if_hc_in_multicast_pkts; my %if_hc_out_multicast_pkts; my $ignore_discards; # Boolean telling us whether or not to ignore # ifIn/OutDiscards my $ignore_errors; # Boolean telling us whether or not to ignore # ifIn/OutErrors my $log_percent; # Boolean telling us whether or not to log # error/discard percentages per interface my %reason_for_alarm; # Hash of hash refs (keyed by ifName containing # string identifying cause of alarm), keyed by # target my @skip_aliases; # Strings in ifAlias which will induce us to # ignore the associated interface my %skip_interface; # Hash of switches keyed by switchName-interface # These are interfaces with known issues which # we want to ignore, i.e. which we know about # and which we do not want triggering e-mail my %uplinks; # Hash (keyed by target) of hash refs (keyed by # ifIndex) of ifAlias such that ifAlias is # non-empty my $volume_thresh; # User-specified number of frames below which # we automatically ignore the interface. The # thinking here is that if the interface has # seen some small number of frames, say '10', # and two of those were errors, then normally # this interface would show up on the report, # as having a 20% error rate. But really, # unless we've seen enough traffic, the error # (or discard) percentage doesn't point us # toward a problem. # Define global variables $program_name = 'named-if-alarm'; $usage = 'Usage: named-if-alarm -s {yes|no} [-x] [-y] [-d {integer}] [-r] [-a | -e {expr} | -f {filename} | target1 target2 target3 ...]'; $version = '2.1.0'; # Logging $log_percent = 0; # Skip interfaces whose ifAlias contains the following strings @skip_aliases = qw/Creeper down DOWN Flam Flim future hoc kw sniffer Tiki/; # Skip specific target / interface combinations %skip_interface = ( "gbsr-a-esx GigabitEthernet2/2" => 1, "gbsr-b-esx GigabitEthernet2/2" => 1, ); # Thresholds (%) $discard_thresh = .05; $error_thresh = .01; $volume_thresh = 10000; # Ignore flags $ignore_discards = 0; $ignore_errors = 0; # Grab arguments getopts('ab:d:e:f:rs:t:v:xy', \%option); @target = @ARGV; $error_thresh = $option{b} if defined $option{b}; $discard_thresh = $option{t} if defined $option{t}; $volume_thresh = $option{v} if defined $option{v}; unless ($RE{num}{real}->matches($error_thresh)) { die "-b {num} must be a number\n" ; } unless ($RE{num}{real}->matches($discard_thresh)) { die "-t {num} must be a number\n" ; } unless ($RE{num}{real}->matches($volume_thresh)) { die "-v {num} must be a number\n" ; } $ignore_discards = 1 if defined $option{x}; $ignore_errors = 1 if defined $option{y}; # Set mode if ($option{r}) { $mode = 'report' } elsif (-t STDIN) { $mode = 'interactive' } else { $mode = 'batch' } ### Begin Main Program ############################################### { check_args(); # Check arguments read_config(); # Read Netops config file compile_mibs(); # Compile MIB files build_target(); # Populate @target target_check(); # Look for errors in @target basic_info(); # Gather information info_before(); # Gather more information do_the_work(); # Do it identify_alarms(); # Figure out what is broken write_alarm_log(); # Record issues print_report(); # Print report notify_staff(); # Mail report } ##### End Main Program ################################################# ######################################################################## # Query variables ######################################################################## sub do_the_work { # Debug trace trace_location('begin') if $debug; # Notify operator say 'Analyzing interface counters...' if $mode eq 'interactive'; # Loop through the list of targets for my $target (@target) { say "Processing $target" if $debug; # Analyze interfaces INTERFACE: for my $index (sort keys %{$uplinks{$target}}) { my ($if_alias, $if_name); my ($if_admin_status, $if_oper_status); my ($if_in_discards, $if_out_discards); my ($if_in_errors, $if_out_errors); my ($if_hc_in_ucast_pkts, $if_hc_out_ucast_pkts); my ($if_hc_in_multicast_pkts, $if_hc_out_multicast_pkts); my ($if_hc_in_broadcast_pkts, $if_hc_out_broadcast_pkts); my ($if_in_pkts, $if_out_pkts); my ($if_in_errors_pct, $if_out_errors_pct); my ($if_in_discards_pct, $if_out_discards_pct); my ($if_problems); say "Doing ifIndex $index" if $debug > 1; # Extract name and status $if_alias = $if_alias{$target}->{$index}; $if_name = $if_name{$target}->{$index}; $if_admin_status = $if_admin_status{$target}->{$index}; $if_oper_status = $if_oper_status{$target}->{$index}; say " ifAlias = $if_alias" if $debug > 2; # Skip interfaces which are administratively down if ($if_admin_status eq 'down') { log_it("$if_name ($if_alias) on $target is administratively down, skipping"); say "$if_name ($if_alias) is administratively down, skipping" if $debug; next INTERFACE; } # Skip interfaces belonging to the skip list if (defined $skip_interface{"$target $if_name"}) { log_it("$if_name ($if_alias) on $target belongs to ignore list, skipping"); say "$if_name ($if_alias) belongs to ignore list, skipping" if $debug; next INTERFACE; } # Skip interfaces containing the skip strings if (any { $if_alias =~ /$_/} @skip_aliases ) { log_it("$if_name ($if_alias) belongs to a ignore list, skipping"); say "$if_name ($if_alias) belongs to ignore list, skipping" if $debug; next INTERFACE; } # Whine about interfaces with link down if ($if_oper_status eq 'down') { $alarm_count{$target}++; $reason_for_alarm{$target}->{$index} = 'link down'; log_it("$target:$if_name ($if_alias) is down"); say " $if_alias ($if_name) is down" if $debug; } # Extract counters $if_in_errors = $if_in_errors{$target}->{$index}; $if_out_errors = $if_out_errors{$target}->{$index}; $if_in_discards = $if_in_discards{$target}->{$index}; $if_out_discards = $if_out_discards{$target}->{$index}; $if_hc_in_ucast_pkts = $if_hc_in_ucast_pkts{$target}->{$index}; $if_hc_out_ucast_pkts = $if_hc_out_ucast_pkts{$target}->{$index}; $if_hc_in_multicast_pkts = $if_hc_in_multicast_pkts{$target}->{$index}; $if_hc_out_multicast_pkts = $if_hc_out_multicast_pkts{$target}->{$index}; $if_hc_in_broadcast_pkts = $if_hc_in_broadcast_pkts{$target}->{$index}; $if_hc_out_broadcast_pkts = $if_hc_out_broadcast_pkts{$target}->{$index}; # Do a little arithmetic $if_in_pkts = $if_hc_in_ucast_pkts + $if_hc_in_broadcast_pkts + $if_hc_in_multicast_pkts; $if_out_pkts = $if_hc_out_ucast_pkts + $if_hc_out_broadcast_pkts + $if_hc_out_multicast_pkts; # If the interface hasn't seen traffic, skip it if ($if_in_pkts == 0 or $if_out_pkts == 0) { $if_in_errors_pct{$target}->{$index} = 0; $if_out_errors_pct{$target}->{$index} = 0; $if_in_discards_pct{$target}->{$index} = 0; $if_out_discards_pct{$target}->{$index} = 0; next INTERFACE; } # Calculate percentages $if_in_errors_pct = sprintf "%2.3f", ($if_in_errors / $if_in_pkts); $if_out_errors_pct = sprintf "%2.3f", ($if_out_errors / $if_out_pkts); $if_in_discards_pct = sprintf "%2.3f", ($if_in_discards / $if_in_pkts); $if_out_discards_pct = sprintf "%2.3f", ($if_out_discards / $if_out_pkts); $if_problems = $if_in_errors_pct + $if_out_errors_pct + $if_in_discards_pct + $if_out_discards_pct; # Log percentages if asked if ($log_percent and $if_problems > 0) { log_it("For $target:$if_name ($if_alias), if_in_errors_pct = $if_in_errors_pct, if_out_errors_pct = $if_out_errors_pct, if_in_discards_pct = $if_in_discards_pct, if_out_discards_pct = $if_out_discards_pct"); } # Save percentages $if_in_errors_pct{$target}->{$index} = $if_in_errors_pct; $if_out_errors_pct{$target}->{$index} = $if_out_errors_pct; $if_in_discards_pct{$target}->{$index} = $if_in_discards_pct; $if_out_discards_pct{$target}->{$index} = $if_out_discards_pct; # Compare thresholds # In Packets if ($if_in_pkts > $volume_thresh) { if ($if_in_discards_pct >= $discard_thresh) { log_it("For $target, $if_name ($if_alias) in alarm because inDiscards at $if_in_discards_pct"); $reason_for_alarm{$target}->{$index} = 'inDiscards'; $alarm_count{$target}++; } if ($if_in_errors_pct >= $error_thresh) { log_it("For $target, $if_name ($if_alias) in alarm because inErrors at $if_in_errors_pct"); $reason_for_alarm{$target}->{$index} = 'inErrors'; $alarm_count{$target}++; } } # Out Packets if ($if_out_pkts > $volume_thresh) { if ($if_out_discards_pct >= $discard_thresh) { log_it("For $target, $if_name ($if_alias) in alarm because outDiscards at $if_out_discards_pct"); $reason_for_alarm{$target}->{$index} = 'outDiscards'; $alarm_count{$target}++; } if ($if_out_errors_pct >= $error_thresh) { log_it("For $target, $if_name ($if_alias) in alarm because outErrors at $if_out_errors_pct"); $reason_for_alarm{$target}->{$index} = 'outErrors'; $alarm_count{$target}++; } } } # End 'Analyze interfaces' # Entertain the operator print $BANG if ($mode eq 'interactive' and not $debug); } # End 'Loop through list of targets' # Make things look pretty say "\n" if $mode eq 'interactive'; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Gather more information ######################################################################## sub info_before { # Debug trace trace_location('begin') if $debug; # Notify operator print_it('Gathering interface counters...'); # Loop through targets for my $target (@target) { say "Processing $target" if $debug; $uplinks{$target} = find_named_ports($target); # Acquire named interface attributes INTERFACE: for my $index (sort keys %{$uplinks{$target}}) { my ($if_alias, $if_name); my ($if_admin_status, $if_oper_status); my ($if_in_discards, $if_out_discards); my ($if_in_errors, $if_out_errors); my ($if_hc_in_ucast_pkts, $if_hc_out_ucast_pkts); my ($if_hc_in_multicast_pkts, $if_hc_out_multicast_pkts); my ($if_hc_in_broadcast_pkts, $if_hc_out_broadcast_pkts); say "Doing ifIndex $index" if $debug > 1; # ifAlias $if_alias = $uplinks{$target}->{$index}; say " ifAlias = $if_alias" if $debug > 2; # ifName # Why doesn't IF-MIB::ifName.$index work here? [Must specify fully- # qualitified Object Value, on account of the ifName object in # Brcm-adapterInfo-MIB, which loads after IF-MIB] say "Getting ifName.$index" if $debug > 3; $if_name = snmpGet({host => $target, oid => ".1.3.6.1.2.1.31.1.1.1.1.$index"}); $if_name //= $QUERY; $if_name = shrink_if_descr($if_name); say " ifName = $if_name" if $debug > 2; # ifAdminStatus $if_admin_status = snmpGet( {host => $target, oid => "ifAdminStatus.$index"} ); $if_admin_status //= $QUERY; say " ifAdminStatus = $if_admin_status" if $debug > 2; # ifOperStatus $if_oper_status = snmpGet( {host => $target, oid => "ifOperStatus.$index"} ); $if_oper_status //= $QUERY; say " ifOperStatus = $if_oper_status" if $debug > 2; # ifInDiscards $if_in_discards = snmpGet( {host => $target, oid => "ifInDiscards.$index"} ); $if_in_discards //= 0; say " ifInDiscards = $if_in_discards" if $debug > 2; # ifOutDiscards $if_out_discards = snmpGet( {host => $target, oid => "ifOutDiscards.$index"} ); $if_out_discards //= 0; say " ifOutDiscards = $if_out_discards" if $debug > 2; # ifInErrors $if_in_errors = snmpGet( {host => $target, oid => "ifInErrors.$index"} ); $if_in_errors //= 0; say " ifInErrors = $if_in_errors" if $debug > 2; # ifOutErrors $if_out_errors = snmpGet( {host => $target, oid => "ifOutErrors.$index"} ); $if_out_errors //= 0; say " ifOutErrors = $if_out_errors" if $debug > 2; # ifHCInUcastPkts $if_hc_in_ucast_pkts = snmpGet( {host => $target, oid => "ifHCInUcastPkts.$index"} ); $if_hc_in_ucast_pkts //= 0; say " ifHCInUcastPkts = $if_hc_in_ucast_pkts" if $debug > 2; # ifHCOutUcastPkts $if_hc_out_ucast_pkts = snmpGet( {host => $target, oid => "ifHCOutUcastPkts.$index"} ); $if_hc_out_ucast_pkts //= 0; say " ifHCOutUcastPkts = $if_hc_out_ucast_pkts" if $debug > 2; # ifHCInMulticastPkts $if_hc_in_multicast_pkts = snmpGet( {host => $target, oid => "ifHCInMulticastPkts.$index"} ); $if_hc_in_multicast_pkts //= 0; say " ifHCInMulticastPkts = $if_hc_in_multicast_pkts" if $debug > 2; # ifHCOutMulticastPkts $if_hc_out_multicast_pkts = snmpGet( {host => $target, oid => "ifHCOutMulticastPkts.$index"} ); $if_hc_out_multicast_pkts //= 0; say " ifHCOutMulticastPkts = $if_hc_out_multicast_pkts" if $debug > 2; # ifHCInBroadcastPkts $if_hc_in_broadcast_pkts = snmpGet( {host => $target, oid => "ifHCInBroadcastPkts.$index"} ); $if_hc_in_broadcast_pkts //= 0; say " ifHCInBroadcastPkts = $if_hc_in_broadcast_pkts" if $debug > 2; # ifHCOutBroadcastPkts $if_hc_out_broadcast_pkts = snmpGet( {host => $target, oid => "ifHCOutBroadcastPkts.$index"} ); $if_hc_out_broadcast_pkts //= 0; say " ifHCOutBroadcastPkts = $if_hc_out_broadcast_pkts" if $debug > 2; # Build data structures $if_alias{$target}->{$index} = $if_alias; $if_name{$target}->{$index} = $if_name; $if_admin_status{$target}->{$index} = $if_admin_status; $if_oper_status{$target}->{$index} = $if_oper_status; $if_in_discards{$target}->{$index} = $if_in_discards; $if_out_discards{$target}->{$index} = $if_out_discards; $if_in_errors{$target}->{$index} = $if_in_errors; $if_out_errors{$target}->{$index} = $if_out_errors; $if_hc_in_ucast_pkts{$target}->{$index} = $if_hc_in_ucast_pkts; $if_hc_out_ucast_pkts{$target}->{$index} = $if_hc_out_ucast_pkts; $if_hc_in_multicast_pkts{$target}->{$index} = $if_hc_in_multicast_pkts; $if_hc_out_multicast_pkts{$target}->{$index} = $if_hc_out_multicast_pkts; $if_hc_in_broadcast_pkts{$target}->{$index} = $if_hc_in_broadcast_pkts; $if_hc_out_broadcast_pkts{$target}->{$index} = $if_hc_out_broadcast_pkts; # Entertain operator print $DASH if $mode eq 'interactive'; } # End 'Acquire named interface attributes' # Entertain operator print $BANG if $mode eq 'interactive'; } # End 'Loop through targets' # Make things look pretty say "\n" if $mode eq 'interactive'; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Tell the operator what I discovered ######################################################################## sub print_report { my $handle; my $total = @target; my $now = get_now(); # If we are running in test mode, skip this routine unless ($dome) { print_it("Running in test mode, cannot print a meaningful report\n"); return 1; } # Debug trace trace_location('begin') if $debug; # Direct output to screen or to file if ($mode eq 'interactive') { $handle = *STDOUT; } else { open $handle, '>', $report_file or die "Cannot open $report_file: $!\n"; } print {$handle} <{$index}; $if_alias = $if_alias{$target}->{$index}; $if_name = $if_name{$target}->{$index}; $if_oper_status = $if_oper_status{$target}->{$index}; $if_in_errors_pct = $if_in_errors_pct{$target}->{$index}; $if_out_errors_pct = $if_out_errors_pct{$target}->{$index}; $if_in_discards_pct = $if_in_discards_pct{$target}->{$index}; $if_out_discards_pct = $if_out_discards_pct{$target}->{$index}; if ($first) { printf {$handle} "%-10.10s %-20.20s %-4.4s %06.3f %06.3f %06.3f %06.3f\n", $if_name, $if_alias, $if_oper_status, $if_in_errors_pct, $if_out_errors_pct, $if_in_discards_pct, $if_out_discards_pct; $first = 0; } else { printf {$handle} " %-10.10s %-20.20s %-4.4s %06.3f %06.3f %06.3f %06.3f\n", $if_name, $if_alias, $if_oper_status, $if_in_errors_pct, $if_out_errors_pct, $if_in_discards_pct, $if_out_discards_pct; } } # End 'Walk interfaces' } # Add silent devices to the report for my $silent (sort @silent) { printf {$handle} "%-15s Not answering pings\n", $silent; } # Add unresponsive devices to the report for my $unresponsive (sort @unresponsive) { printf {$handle} "%-15s Not answering SNMP GETs\n", $unresponsive; } # Add insane devices to the report for my $insane (sort @insane) { printf {$handle} "%-15s Insane\n", $insane; } # Clean up unless ($handle =~ /STDOUT/) { close $handle or warn "Cannot close $report_file: $!\n"; } # Make things look pretty say "\n" if $mode eq 'interactive'; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Output help ######################################################################## sub HELP_MESSAGE { print <