#!/opt/vdops/bin/perl # This script pings an IP space and records which addresses answer in a text # database. It then produces both detailed and summary reports. # V Who When What # --------------------------------------------------------------------------- # 2.6.6 skendric 03-30-2010 Notify operator of progress # 2.6.5 skendric 01-23-2010 Syntax fiddles # 2.6.4 skendric 01-26-2009 Add administrative defined routes # 2.6.3 skendric 05-08-2008 Add $warmIPs to the summary report # 2.6.2 skendric 06-25-2007 Remove SCCA hack # 2.6.1 skendric 01-04-2007 Hack in SCCA routes # 2.6.0 skendric 11-29-2006 Grab route table to determine routes to ping # 2.5.5 skendric 10-23-2006 Fiddle with formatting # 2.5.4 skendric 11-05-2005 Upgrade to new FHCRC::VDOPS module structure # 2.5.3 skendric 11-30-2004 Remove effort to identify active subnets # 2.5.2 skendric 04-30-2004 Yank & char in front of subroutine calls # 2.5.1 skendric 10-22-2003 Fixed error in identifying active subnets # 2.5.0 skendric 10-05-2003 Add limited support for non /24 subnet masks # 2.4.0 skendric 10-04-2003 Performance enhancements # 2.3.1 skendric 10-03-2003 More accurately detect live subnets # 2.3.0 skendric 03-28-2003 Numerous minor updates # 2.2.2 skendric 01-26-2003 Tighten scoping # 2.2.1 skendric 01-24-2003 Enhance debugging # 2.2.0 skendric 01-23-2003 Handle pings sent to subnet broadcast # addresses, enhance debugging # 2.1.0 skendric 01-15-2003 Enhance summary report # 2.0.0 skendric 01-13-2003 Add summary report # ... rhood Numerous updates # 1.0.0 rhood 01-10-1994 First Version # Author: Stuart Kendrick, sbk {put at sign here} skendric {put dot here} com # # Source: http://www.skendric.com/device # # This software is available under the GNU GENERAL PUBLIC LICENSE, see # http://www.fsf.org/licenses/gpl.html # # This script takes the following approach: # -Ping all IP addresses in space # -Record results in data file # -Produce two reports and append to a log file # # # Definitions: # AllIPs: The keys are the union of addresses in the hosts # table and addresses which have answered pings; the # values are the time when the address last answered # a ping # HotIPs: The keys are subnets; the values are the sum of all # addresses which answered pings during this pass # WarmIPs: The keys are subnets; the values are the sum of all # addresses on that subnet which have answered a ping # within the last $idle_days # # # # Requirements: # # # Assumptions: # # # Tested on: # -perl-5.10.1 # -fping-2.4b2 # # # Instructions: # -Customize the script for your site: find the 'user-configurable # variables' section and modify as appropriate # -Try it out # # # Caveats: # # # Known Bugs: # # # To do: # # # Begin script # Load modules use strict; use warnings; use feature 'say'; use feature 'switch'; use Carp qw(carp cluck croak confess); use Data::Dumper; use English qw( -no_match_vars ); use Getopt::Std; use List::MoreUtils qw(any); use Net::Netmask; use NetAddr::IP; use Sys::Syslog; use FHCRC::Netops::NetopsTools 2.0.7; use FHCRC::Netops::NetopsData 1.3.0; use FHCRC::Netops::PingTools 1.1.5; use FHCRC::Netops::SNMPTools 1.3.9; use FHCRC::Netops::Utilities 1.3.9; # Declare global variables my %allIPs; # Union of hosts table and unregistered # addresses which have answered a ping within # the last $idle_days my $allIPs; # Total number of elements in %allIPs my $data_file; # Place to store the time when the node # last answered a ping my %hotIPs; # Addresses which answered a ping during # this pass, tallied by subnet my $hotIPs; # Sum of above my $idle_days; # Number of days after which a non-responsive # node is considered cold (inactive) my $institution; # Part of report title my %invisible_routes; # Hash of masks keyed by route, listing the routes which # aren't visible in the routing table which we consult my %ip_comment; # Comment field from hosts table my %ip_name; # First hostname of each address in hosts table my $log_file; # Place to record summary statistics my %routes; # Hash of subnet masks keyed by IP route my $report_file; # Place to write report my $router; # Router which I will query for a route table my @sortedIPs; # List of IPs from %allIPs sorted by IP addresses my $summary_file; # Place to write the summary report my $time_limit; # An IP address must have answered a ping # since $time_limit to be considered warm my %warmIPs; # Addresses which have answered at least # one ping across the last $idle_days, tallied # by subnet my $warmIPs; # Sum of above # Define user-configurable variables # Define global variables $debug = 0; # 10 = Logging # 9 = Database SELECT operations # 8 = Per IP/MAC/Port processing # 7 = Database INSERT/UPDATE/DELETE # 6 = Dump SNMP var # 5 = Dump snmp_packets # 4 = Grody: print big var # 3 = Verbose: print mid var # 2 = Simple: print small var # 1 = Basic: subroutine trace # 0 = Disable debugging $program_name = 'pinger-report'; $usage = 'Usage: pinger-report -s {yes|no} [-d {integer}]'; $version = '2.6.6'; # Binaries $grab_hosts = '/bin/cat /etc/hosts'; # Misc $idle_days = 30; # Ping parameters $fping_backoff = '1.0'; # How rapidly fping backs off, when a target has # missed a ping. '1.0' means linear $fping_max = 510; # Maximum number of addresses we will hand to fping # at a time. In my experience, most boxes can # emit an extraordinary number of pings per second. # However, most boxes start dropping ICMP Replies # if they receive them too rapidly: using a sniffer, # I can see them arrive at the NIC ... but my # application doesn't receive them. I'm guessing that # IP stacks contain a buffer of some size into which # they stuff ICMP Replies, and if the CPU doesn't empty # that buffer fast enough (handing the ICMP Replies # to the application), then the buffer overflows and # the OS starts losing ICMP Replies. By default, # fping retries any addresses which failed to answer # the first ping (see $fping_retries), so this script # effectively double-checks such misses. However, # if the number of missed pings is sufficiently large # the responses to this second round can overflow # your ICMP Reply buffer again ... leading to # incorrect results, i.e. IP addresses reported as # silent when in fact they are responding to pings. # So why not set this paramter to 1? Well, the # pinging process is slow then. What to do? Calibrate # your box. Use 'fping' to send pings to an increasing # number of live targets ... until you see the number # of responses hit a ceiling $fping_retries = 1; # Number of times fping will retry an IP address, if it # fails to answer the first time. fping 2.4b2 doesn't # support 0 here $fping_timeout = 50; # Number of milliseconds before fping quits listening # for a response on its first effort. The man page # says that a non-root user cannot set this to be less # than 250 ms, but I find that I can set it as low # as 50 ms before fping quits running # Report stuff $institution = 'Widgets International'; $data_file = '/home/netops/rpts/pinger-report.dat'; $log_file = '/home/netops/logs/pinger-report.log'; $report_file = '/home/netops/rpts/pinger-report.txt'; $summary_file = '/home/netops/rpts/pinger-summary.txt'; # SNMP Stuff @snmp_read_list = qw/public/; @snmp_version_list = qw/2/; # Route Stuff # This is the router whose route table we will use to identify the # subnets we want to ping $router = 'ja-b-rtr.fhcrc.org'; # We will filter the route table using this list of major networks, # pinging only addresses on routes which fall within one of these # This is useful in a number of scenarios, here are a few: # (a) Some of the networks in your route table aren't reachable # from the box running this script # (b) Your router carries a full Internet route table, and you # only want to ping the routes inside your enterprise # (c) You want to debug, and wading through all the output is tedious; # in that case, restrict this list to something small, like a # /24 network # If you don't want to be bothered with this feature, just enter # the string 'all' here. Or specify '-a' on the command-line @allowed_networks = qw (all); # Sometimes, the reference router doesn't contain all the routes to which # you want to send pings. Populate this hash with those extra routes %invisible_routes = ( '10.112.0.0' => '255.255.252.0' ); # Calculate how old an address must be before we consider it cold $time_limit = time - $idle_days*24*60*60; # Grab arguments getopts('ad:rs:', \%option); @allowed_networks = qw (all) if $option{a}; die "Must define \@allowed_networks\n" unless @allowed_networks > 0; # Set mode if (-t STDIN) { $mode = 'interactive' } else { $mode = 'batch' } ##### Begin Main Program ############################################### { check_args(); # Check arguments sanity_check(); # Look for basic problems grab_routes(); # Populate %mask ping_routes(); # Ping the list read_data(); # Read the data file read_hosts(); # Acquire the hosts file do_the_work(); # Figure out current state write_data(); # Update the data file write_log(); # Update the log file write_report(); # Write the report file write_summary(); # Write the summary file } ##### End Main Program ################################################# ######################################################################## # Suck in the hosts file. After this subroutine exits, %allIPs # consists of the union of hotIPs, the datafile, and the hosts table ######################################################################## sub read_hosts { my $alias; my $aliases; my @aliases; my @comment; my $found; my $host; my $ip; my $key; # Debug trace trace_location('begin') if $debug; # Notify operator say "Grabbing hosts via $grab_hosts" if $mode eq 'interactive'; HOST: for $host (split ('\n', `$grab_hosts`)) { next HOST if $host =~ /^#/; next HOST if $host =~ /\A\s+\z/; next HOST if not $host =~ /\w/; ($ip, $aliases, @comment) = split ('\s+', $host); @aliases = split ($COMMA, $aliases); $ip_name{$ip} = $aliases[0]; $ip_comment{$ip} = join ($SPACE, @comment); $ip_comment{$ip} =~ s/#\s+//; $allIPs{$ip} = 0 unless defined $allIPs{$ip}; } # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Grab the route table, merge with the administratively defined routes, # populating %routes ######################################################################## sub grab_routes { my $dynamic_ref; # Debug trace trace_location('begin') if $debug; # Notify operator say "Grabbing routes from $router" if $mode eq 'interactive'; # Grab dynamic routes if (snmp_char($router)) { $dynamic_ref = acquire_route_table($router); } else { die "Could not acquire route table from $router"; } # Merge with administratively defined routes %routes = (%$dynamic_ref, %invisible_routes); # Debug info if ($debug) { say 'I will examine the following routes:'; for my $route (sort keys %routes) { say "$route / $routes{$route}"; } } # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Ping everything & keep track of who responds. After this subroutine # exits, %allIPs contains time keyed by hotIP ######################################################################## sub ping_routes { my @addrs; # Array of addresses sent to pinging routine my $alive_ref; # Reference to an array of addresses # which responded to the pinger my @allowed_network_objects; # Array of NetAddr::IP objects paralleling # the strings in @allowed_networks my @sorted_routes; # Sorted list of routes from %routes my $time; # 'time' when we pinged my ($dead_ref, $unknown_ref, $error_ref); # Not used in this routine # Debug trace trace_location('begin') if $debug; # Notify operator say "Pinging routes..." if $mode eq 'interactive'; # Create a sorted list of routes (this just makes the pinging order # pretty ... not functional) @sorted_routes = Net::Netmask::sort_by_ip_address(keys %routes); # Build array of NetAddr::IP objects representing @allowed_networks unless ($allowed_networks[0] eq 'all') { for my $network (@allowed_networks) { my $network_obj = NetAddr::IP->new($network); push @allowed_network_objects, $network_obj; } } # Loop through routes ROUTE: for my $route (@sorted_routes) { my $subnet; # The first three octets of $route my $mask = $routes{$route}; # Debug info say "Considering $route/$mask" if $debug > 2; # Ignore the gateway-of-last-resort and localhost next ROUTE if $route eq '0.0.0.0'; next ROUTE if $route eq '127.0.0.0'; # Ignore unless the route matches one of the major networks listed in # @allowed_networks unless ($allowed_networks[0] eq 'all') { my $flag = 0; my $route_obj = NetAddr::IP->new($route, $mask); DO_I_BELONG: for my $network_obj (@allowed_network_objects) { if ($network_obj->contains($route_obj)) { $flag = 1; last DO_I_BELONG; } } next ROUTE unless $flag; } # Debug info say "Accepting $route/$mask" if $debug > 2; # Generate list of addresses within this route my ($netaddr_obj, $addr_ref); $netaddr_obj = NetAddr::IP->new($route, $mask); $addr_ref = $netaddr_obj->hostenumref(); for my $addr_obj (@$addr_ref) { my $addr = sprintf("%s", $addr_obj->addr); push @addrs, $addr; } # Ping the addresses $time = time; say " $route/$mask" if $mode eq 'interactive'; ($alive_ref, $dead_ref, $unknown_ref, $error_ref) = ping_list(\@addrs); # Record the results for my $ip (@$alive_ref) { # Grab the first three octets and call it 'subnet' ($subnet) = ($ip =~ /(\d+\.\d+\.\d+)\.\d+/); $subnet .= '.0'; # Record stats $allIPs{$ip} = $time; $hotIPs{$subnet}++; $hotIPs++; say "hotIPs{$subnet} = $hotIPs{$subnet}, $ip is hot" if $debug == 8; } # Clean up undef @addrs; # Debug info say "hotIPs{$subnet} = $hotIPs{$subnet}, and hotIPs = $hotIPs" if $debug; } # Debug info say '%allIPs contains ' . scalar(keys %allIPs) . ' addresses' if $debug > 1; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # - Calculate $allIPs # - Populate %warmIPs # - Purge nodes which don't exist anymore: these are nodes which aren't # in the hosts table and which aren't answering pings # # After this subroutine exits, allIPs consists of the union of hotIPs, # the hosts table, and addresses from the datafile which have answered # pings within the past $idle_days ######################################################################## sub do_the_work { my $subnet; # Debug trace trace_location('begin') if $debug; # Notify operator say 'Processing' if $mode eq 'interactive'; # Walk through %allIPs, incrementing $allIPs if the address exists in # the hosts table or if the address doesn't exist in the hosts table but # has answered a ping recently. Tally warmIPs by /24 subnet. (This # choice of /24 is arbitrary -- has nothing to do with the routing # table, for example.) Remove addresses which don't exist in the hosts # table and which haven't answered a ping within the last $idle_days. # This last step is aimed at removing addresses in subnets which don't # exist anymore, neither in the hosts table nor in the routing table. # Walk through the IP addresses IP: for my $ip (keys %allIPs) { # Grab the first three octets and call it 'subnet' ($subnet) = ($ip =~ /(\d+\.\d+\.\d+)\.\d+/); $subnet .= '.0'; # Address exists in hosts table: increment %allIPs if ($ip_name{$ip}) { $allIPs++; } # Address doesn't exist in hosts table but has answered a ping # recently, where recently is defined as less than $idle_days: # increment $allIPs elsif ($allIPs{$ip} > $time_limit) { $allIPs++; } # Address doesn't exist in the hosts table and hasn't answered # a ping recently (where recently is defined as less then $idle_days): # delete from %allIPs else { delete $allIPs{$ip}; } next IP unless defined $allIPs{$ip}; # Increment $warmIPs and %warmIPs. Define $hotIPs{$subnet} if it # doesn't already exist -- this avoids undefined errors in write_summary if ($allIPs{$ip} > $time_limit) { $warmIPs++; $warmIPs{$subnet}++; $hotIPs{$subnet} = 0 unless defined $hotIPs{$subnet}; } } # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Read the datafile. After this subroutine exits, %allIPs consists of # the union of hotIPs and whatever was stored in the datafile ######################################################################## sub read_data { my $ip; my $last_ping; # Debug trace trace_location('begin') if $debug; # Notify operator say "Reading $data_file\n" if $mode eq 'interactive'; # Crunch data file $last_ping = $EMPTY_STR; touch_file($data_file, 0664) unless -e $data_file; open my $data, '<', $data_file or die "Cannot open $data_file: $!"; while (my $line = <$data>) { chomp $line; ($ip, $last_ping) = split (':', $line); $allIPs{$ip} = $last_ping unless defined $allIPs{$ip}; } close $data or warn "Cannot close $data_file: $!"; # Debug info if ($debug > 2) { print "\%allIPs contains " . scalar(keys %allIPs) . " addresses\n"; } # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Check for obvious errors ######################################################################## sub sanity_check { my $binary; my $hosts; my $result; my $user; # Debug trace trace_location('begin') if $debug; # Notify operator log_it("Starting $PROGRAM_NAME"); say 'Sanity check...' if $mode eq 'interactive'; # The binary portion of grab_hosts must be executable ($binary, $hosts) = split ($SPACE, $grab_hosts); die "Cannot execute $binary: $!" unless -x $binary; # The hosts file must be readable and non-empty die "$hosts must be readable\n" unless -r $hosts; die "$hosts must be non-empty\n" unless -s $hosts; # fping behaves differently when run as root, and I haven't written # enough code to handle that behavior die "Cannot run as root\n" if $EUID == 0; # fping must be executable die "Cannot execute $fping_binary: $!" unless -x $fping_binary; # fping must be setuid root $result = `$fping_binary 127.0.0.1`; die "$fping_binary must be setuid root: $!" unless $result =~ /is alive/; # fping v2.4b2 doesn't support a retry count of 0 die "fping v2.b2 doesn't support a retry of 0 \n" unless $fping_retries > 0; # The various report and log files must be readable and writeable touch_file($data_file, 0664) or die "Cannot read/write $data_file: $!"; touch_file($report_file, 0664) or die "Cannot read/write $report_file: $!"; touch_file($summary_file, 0664) or die "Cannot read/write $summary_file: $!"; touch_file($log_file, 0664) or die "Cannot read/write $log_file: $!"; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Build a timestamp ######################################################################## sub time_stamp { my ($time) = @_; my $stamp; my ($sec, $min, $hour, $mday, $mon, $year, @rest) = localtime ($time); # Debug trace trace_location('begin') if $debug == 9; $year -= 100 if $year >= 100; if ($time == 0) { $stamp = '< never >'; } else { $stamp = sprintf ("%2.2d-%2.2d-%2.2d %2.2d:%2.2d", $mon+1, $mday, $year, $hour, $min); } # Debug trace trace_location('end') if $debug == 9; return $stamp; } ######################################################################## # Write data_file ######################################################################## sub write_data { # Debug trace trace_location('begin') if $debug; # Notify operator say "Writing $data_file" if $mode eq 'interactive'; # Open data file open my $data, ">", $data_file or die "Cannot open $data_file: $!"; # Build a sorted list of addresses @sortedIPs = Net::Netmask::sort_by_ip_address(keys %allIPs); # Walk the sorted list and write each address to the data file for my $ip (@sortedIPs) { print {$data} "$ip:$allIPs{$ip}\n"; } close $data or warn "Cannot close $data_file: $!"; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Write log_file. Record $hotIPs and $allIPs. ######################################################################## sub write_log { # Debug trace trace_location('begin') if $debug; # Notify operator say "Writing $log_file" if $mode eq 'interactive'; # Write log open my $log, '>>', $log_file or warn 'Cannot open $log_file: $!'; print {$log} join ($SPACE, epoch_to_tstamp(time), $hotIPs, $allIPs), "\n"; close $log or warn "Cannot close $log_file: $!"; # Debug trace trace_location('end') if $debug; return 1; } ######################################################################## # Write report_file. This lists each IP address in the hosts table and # when it last returned a ping. This also lists any IP addresses which # answered a ping during this pass but which don't exist in the hosts # table. ######################################################################## sub write_report { my $comment; my $name; my $now; # Debug trace trace_location('begin') if $debug; # Notify operator say "Writing $report_file" if $mode eq 'interactive'; # Get current time $now = get_now(); # Define report open my $report, '>', $report_file or die "Cannot open $report_file: $!"; print {$report} <', $summary_file or die "Cannot open $summary_file: $!"; print {$handle} <