#! /usr/bin/perl

    #	Test for lines in a mail folder which begin with
    #	"From " and are not preceded by a blank line.  These
    #	should never be the start of a genuine mail message.
       
    #	by John Walker  http://www.fourmilab.ch/
    #	    	     January 2003
    
    
    $totalMessages = 0;
    $fromNonblank = 0;
    
    $inf = $ARGV[0];
    if ($inf =~ m/\.gz$/) {
    	open(IN, "zcat $inf |") || die "Cannot open gzipped input file $inf";
    } else {
    	open(IN, "<$inf") || die "Cannot open input file $inf";
    }
        
    $lineno = 0;
    while (($l = <IN>)) {
    	$lineno++;
    	if ($l =~ m/^From /) {
    	    last;
	}
    }
    
    if (!$l) {
    	print("No messages in mail folder!\n");
	exit(0);
    }
    
    $eof = 0;
    
    $lastline = '';
    
    while (!$eof) {
    
    	#   Read next message from mail folder.  At
	#   this point $l contains the first ("From ")
	#   line of the message.
	
	$nlines = 0;
	undef @message;
	$msize = length($l);
	&trim_end_of_line;
	$message[$nlines++] = $l;
	$lastline = $l;
	
	#   Read the balance of the message into the
	#   @message array.  Quit when the "From " line
	#   of the next message is encountered or the
	#   end of the folder is encountered.
	
	while ($l = <IN>) {
	    $lineno++;
	    &trim_end_of_line;
#print("$lineno ($l)\n");
	    if ($l =~ m/^From /) {
	    	if ($lastline ne '') {
		    $fromNonblank++;
		    print("\n$lineno:\n    ($lastline)\n    ($l)\n");
		}
		$lastline = $l;
		last;
	    } else {
	    	$message[$nlines++] = $l;
	    	$msize += length($l);
		$lastline = $l;
    	    }
	}
	if ($nlines > 0) {
	    &dispose_of_message;
	}
	$eof = !($l);
# if ($bail++ > 20) { $eof = 1; }  # Quick bail-out for testing
    }
    
    #	Compute and display aggregate statistics
    
    print("Total messages: $totalMessages\n");
    print("From without preceding blank line: $fromNonblank\n");

#   Dispose of the message in the @message
#   array.
    
sub dispose_of_message {
    $totalMessages++;    
}

#   Trim end of line sequences from line

sub trim_end_of_line {
    $l =~ s/[\s\r\n]*$//;
}
