#!/usr/bin/perl -w use Getopt::Long qw(:config no_ignore_case); use strict; our ($showall, $help); our $threshold = 10; our $spamfile; GetOptions ( 'a' => \$showall, 'threshold|t=f' => \$threshold, 'help|h|?' => \$help, ); &usage if $help; if ($ARGV[0]) { $spamfile = $ARGV[0]; } else { die "FATAL: Specify MBOX\n"; } our $emails = 0; our (%TO, %FROM); our ($totalTo, $totalFrom); our ($lackingTo, $lackingFrom); our ($percTo, $percFrom, $percTot); our $header; open SPAMBOX, $spamfile; while () { if (/^From\s+/) { $emails++; $header = 1; } if (/^\s*$/) { $header = 0; } if (/^To:\s+(?:[[:alnum:]\s"']+\s+)?[\<]?([^[:space:]@]+@(?:(?:[[:alnum:]\-])+\.)+[[:alpha:]]{2,6})[\>]?$/ && $header != 0) { $TO{$1}++; $totalTo++; } if (/^From:\s+(?:[[:alnum:]\s"']+\s+)?[\<]?([^[:space:]@]+@(?:(?:[[:alnum:]\-])+\.)+[[:alpha:]]{2,6})[\>]?$/ && $header != 0) { $FROM{$1}++; $totalFrom++; } } close SPAMBOX; $lackingTo = ($emails - $totalTo); $lackingFrom = ($emails - $totalFrom); print "Number of emails: $emails\n"; print "Threshold: $threshold\%\n"; print "Number of unique recipients: " . (keys %TO) . "\n"; print "Total number of To: lines: $totalTo\n"; print "$lackingTo emails had no To: line\n"; print "Highest receivers (%age To:) (%age Total):\n\n"; foreach my $addr (sort bypercTO keys %TO) { #$totalto += $TO{$addr}; $percTot = ($TO{$addr} / $emails) * 100; $percTot = sprintf("%.2f", $percTot); $percTo = ($TO{$addr} / $totalTo) * 100; $percTo = sprintf("%.2f", $percTo); if (!$showall) { print "$addr: $TO{$addr} ($percTo\%) ($percTot\%)\n" if ($percTot > $threshold or $percTo > $threshold); } else { print "$addr: $TO{$addr} ($percTo\%) ($percTot\%)\n"; } } print "\nTotal number of From: lines: $totalFrom\n"; print "$lackingFrom emails had no From: line\n"; print "Highest senders:\n\n"; foreach my $addr (sort bypercFROM keys %FROM) { $percTot = ($FROM{$addr} / $emails) * 100; $percTot = sprintf("%.2f", $percTot); $percFrom = ($FROM{$addr} / $totalFrom) * 100; $percFrom = sprintf("%.2f", $percFrom); if (!$showall) { print "$addr: $FROM{$addr} ($percFrom\%) ($percTot\%)\n" if ($percTot > $threshold or $percFrom > $threshold); } else { print "$addr: $FROM{$addr} ($percFrom\%) ($percTot\%)\n"; } } sub bypercTO { $TO{$b} <=> $TO{$a} } sub bypercFROM { $FROM{$b} <=> $FROM{$a} } sub usage { print "Usage: $0 [-a] [-t ] [-h] MBOX\n"; print " -a - show all addresses (cancels out -t)\n"; print " -t - threshold (\%age) of items to show\n"; print " -h - this help\n"; exit 0; }