#!/local/bin/perl
# narf. News Article Rejection Filter.
# Visit narf's web page at
#	http://utcc.utoronto.ca/abuse/antispam-stuff/narf/
$::NARFVERSION = "0.9301";		# global so filter can use it
#
# This program attempts to emulate INN enough for the INN antispam
# filters to cut in and work well. Like INN, it runs forever.
# However, it's not INN. It works by filtering CNews batches from
# one directory (the source) to another (the destination), exploding
# them into articles in transit and passing each article through
# the INN antispam filter (of your choice; it uses filter_innd.pl
# intact, so drop in your favorite one). Only accepted articles are
# written out to the new batch files.
# We use: http://www.exit109.com/~jeremy/news/antispam.html
# Some portions of this code do depend on the filter being used,
# especially the saveemphist and loademphist routines.
#
# Usage is:
# -v			- verbose
# -V NUM		- give an explicit verbosity level
# -D			- dryrun, don't actually remove files or write
#			  destination batches.
# -O			- run only once and then exit, don't sleep
# -c NUM		- do only NUM batches at most in any pass
#			  (otherwise a pass processes all batches)
# -m MAXSIZE		- maximum size of articles accepted (including headers)
# -b BYTES		- write destination batches of at most BYTES bytes
# -s/-e/d DIR		- set source, destination, or error batch directory
# -q DIR		- write certain sorts of rejected articles to DIR
# -l FILE		- set logfile
# -w SECS		- wait SECS seconds between passes.
# -P PIDFILE		- file to write our PID in
# -C CONFFILE		- configuration file loaded periodically to allow
#			  for adjustments.
# -S STOPFILE		- if this file exists, stop processing and wait until
#			  it stops existing before resuming.
# -E EMPFILE		- file to load and save EMP history from.
# -X			- flip eXtra reporting about rejected messages on
#			  (or off)
# -N			- flip logging newsgroups of rejected messages on
#			  (or off)
sub usage {
	print "usage: $0 [-vDOXN] [-c NUM] [-m MAXSIZE] [-b BYTES] [-s SRCDIR] [-e ERRDIR] [-d DESTDIR] [-l LOGFILE] [-w SECS] [-P PIDFILE] [-C CONFFILE] [-S STOPFILE] [-E EMPFILE] [-q DUMPDIR]\n";
	exit 1;
}
# XXX: update the getopts string down at the bottom when one updates this.

# We require a recent perl. This probably still runs under perl 5.002,
# but I'm reluctant to assert that without testing it.
use 5.004;			# tested only under perl 5.004.

# The name narf was contributed by Paul Kern. Chris Siebenmann's original
# name for it was much less interesting.
#
# Bugs: this needs more theory of operation information.
# The documentation should be in POD format instead of in comments.
#
# Digression on output batch sizes:
# The algorithm for clipping the output batch size is naieve.
# Basically, at the end of every input batch, if the output batch
# is equal to or larger than the batch maximum size a new output
# batch is started.
# A new output batch is always started for each directory run,
# even if the last one only wrote a small batch; we don't hold a
# batch open while we sleep.
#
# Signals:
# Sending a SIGHUP asks the program to close and reopen files (the
# log file and the output batch file) so you can change them. It's
# not all that useful, because the program runs so fast that it will
# likely do it on its own soon. SIGHUP also asks narf to save its
# EMP history if possible.
#
# Sending a SIGTERM asks it to exit after the next batch has finished.
#
# Speed: disgustingly fast. About 30 megs of batches a minute on an Indy.
# Memory usage: potentially large, but then it's rejecting spam for you.
# It does have to read each article into memory and potentially copy it
# around some; if this bothers you, it can be set to discard articles
# that are too big without looking at them at all. (Such articles won't
# have their message-id logged, since it would have to look at the
# article to find that.)
#
# Log format:
# If it starts with a -, it's a rejected article. Format:
#	- MSGID RNEWS-SRC SIZE REASON
# MSGID may be '-' if there was none.
# If it starts with a +, it's an accepted article. Format:
#	+ MSGID RNEWS-SRC SIZE
# If it starts with a '!', it's an error. Freeform.
# If it starts with a ':', it's an information message. Freeform.
# If it starts with a '_', it's extra information about a rejected article.
# If it starts with a '=', it's extra information about an article. This is
# mostly freeform, but it always starts with '= <MSGID> WHAT-SORT' and the
# usual WHAT-SORT will have a constant format.

# Use of 'use strict' requires us to pass filehandles via '*FILEHANDLE{IO}',
# which I'm not sure I understand at all. But sages tell me that this is
# the way to go (both 'use strict' and filehandles).
use strict;


# ---------------------------------------------------------------------
# Configuration section.
# The following are the standard CNews configuration variables; we
# default to deriving the location of everything else from them.
# Generate these from your $NEWSCONFIG file.
my $NEWSCTL = "/news/lib";
my $NEWSBIN = "/news/bin";
my $NEWSARTS = "/news/spool";
my $NEWSOV = "/news/spool";
my $NEWSPATH = "/local/bin:/bin:/usr/bin:/usr/ucb";
my $NEWSUMASK = 022;
my $NEWSCONFIG = "/news/lib/bin/config";


my ($FILTDIR, $SRCDIR, $DSTDIR, $BADDIR, $LOGFILE, $PIDFILE,
    $STOPFILE, $CONFFILE);
# File and directory locations.
# Where to find the filter_innd.pl file.
# NOTEZ BIEN: this file will be RELOADED when CHANGED. It is possible
# to bring the entire daemon down by changing your filter_innd.pl to
# do something stupid (like 'exit').
$FILTDIR = "$NEWSCTL/bin/narf";

# Where the incoming batches live
$SRCDIR = "$NEWSARTS/in.nntp";
# Where destination batches go
$DSTDIR = "$NEWSARTS/in.coming";
# Where bad batches go. By default this is the destination, where
# the real software may be able to demangle them. Note that the
# bad directory and the source directory have to be on the same
# filesystem, so that one can rename() a batch from $SRCDIR to
# $BADDIR.
$BADDIR = $DSTDIR;
# Where certain sorts of rejected articles are written.
# if this is not set, they are not written at all.
$::DUMPDIR = "$SRCDIR/rejected";
# The log file
$LOGFILE = "$NEWSCTL/narf.log";
# Our PID file
$PIDFILE = "$NEWSCTL/narf.pid";
# Our stop file:
$STOPFILE = "$NEWSCTL/narf.stop";
# A config file to (re)load periodically. Call this a crude IPC
# method to talk to us.
$CONFFILE = "$FILTDIR/narf.conf";
# The file used to save (and reload) the EMP history we've accumulated
# This is a global so that narf.conf can reset it after oopsies like
# a narf.emps file with the wrong ownership. (Don't laugh, it happened
# to us once.)
$::EMPFILE = "$FILTDIR/narf.emps";

# Load the filter to define variables.
# Die if it doesn't define things we'll need later.
my ($MSGHIST);
do "$FILTDIR/filter_innd.pl"		or die "Filter load error";
die "Filtering functions not defined"
		unless (defined &filter_mode && defined &filter_art);

#		-- The message-id cache --
#
#  In INN, the filters are run on a stream of articles that has already
# had duplicates suppressed. In narf, we are running on batchfiles,
# which may come from many sources, all operating independantly. For
# correct filter operation we may need to suppress duplicates manually
# within the window of articles that the filter itself keeps history
# for; to do otherwise would risk the filter erroneously deciding that
# something had been posted too many times because it saw several copies
# of the same article.
#
#  *However*, this may not actually be the case. If you use the Paul
# Vixie message-id daemon (recommended) and your system can in general
# keep up, then you will only see duplicates in the batch stream in
# very rare cases (we see in the single digits a day). If this is so
# then you can turn the message-id history cache off entirely and save
# memory. We do not recommend this unless you are *entirely sure* you
# know what you are doing. Ideally you will have real measurements of
# how often you see dups and will understand this.
my $DoMSGHist = 1;			# OFF ONLY IF YOU ARE SURE

# Narf's message id cache must be sized to (usually) be larger than
# the number of articles the filter keeps history for. This attemps
# to get it right for our filter or Jeremy Nixon's cleanfeed. Alter
# it for any other filter.
$::ArticleHistSize = 10000		if (!defined $::ArticleHistSize);
$::BIHistSize = $::ArticleHistSize	if (!defined $::BIHistSize);
$MSGHIST = $::BIHistSize + 100;		# extra slop for obscure cases

# Message-IDs are cached in @msgids and %msgids, the latter for speed and
# the former for FIFO.
# 
# YYY: The following is a hack, but it can save us thrashing. We know
# that the @msgids array will eventually grow to be $MSGHIST+2 large
# at most ($MSGHIST, plus the current article, plus a cancel), so we
# pre-allocate it this large. This will do an extra shift() through
# the core loop for the first $MSGHIST articles, but this is trivial
# in the long term.
my (@msgids, %msgids);
$#msgids = $MSGHIST+2			if ($DoMSGHist);

# global variables:
my ($pleasestop, $hupbing, $savestate, $runonce, $dryrun, $cancreject);
$pleasestop = 0;		# SIGTERM sets this to one, for an orderly shutdown
$hupbing = 0;			# SIGHUP, ditto. Forces buffer flushes.
$savestate = 0;			# asks us to save saveable state.

# options that can or should only be set at program startup.
$runonce = 0;			# run only once
$dryrun = 0;			# do we not do file operations? 
				# (creating a real destination batchfile,
				# removing input batch files)
$cancreject = 1;		# reject cancels for spam we've rejected when
				# we can.
# options that might be tweaked while we run.
$::MAXARTSIZE = 0;		# no maximum article size.
$::waitint = 120;		# how long to sleep while waiting for work.
$::batchsize = 1300000;		# how many bytes a finished batch can be before
				# we rotate it. This is large so that we can
				# pass relaynews large batches to amortize the
				# overhead.
$::maxbatches = 0;		# how many batches we process in one pass
$::xtra = 1;			# report extra details about rejected articles
$::logngs = 0;			# log newsgroups in _ lines
$::logsigs = 1;			# log article signatures in _ lines
$::verbose = 3;			# verbosity. The meaning of verbosity:
# 0	- log only rejections
# 1	- also log accepted articles
# 2	- also log the start and end of runs (with times)
# 3	- also log the start and end of batches (with times)
# 4	- print some debugging output
# 5	- print some more debugging output

# Stave off memory fragmentation by only copying so much of an article
# body into the __BODY__ element of %::hdr (important in the face of
# people posting articles that are 600k, 900k, 1M, or even 3M in size).
# This is dangerous if the filter thinks it is looking at the entire
# article in __BODY__.
# If it is safe, the filter can create the global variable $::MaxArtSize,
# which is the number of bytes of article body at and above which the filter
# behaves no differently regardless how many more bytes the body has. Narf
# will automatically pick up this value.
# If your filter does not and you want to trim anyways, set $::MAXARTCOPY
# here. But you're probably better off setting $::MaxArtSize in your filter.
my ($maxartcopy);
$::MAXARTCOPY = 0;				# default; allow filter to
						# determine the right value
# $maxartcopy is the working variable. *Don't* set it here.

# This hash maps the lower-case version of a header to the canonically cased
# version thereof that filter_innd.pl may depend on. It is in no particular
# order and I hate it.
my (%propcase);
%propcase = qw/message-id Message-ID control Control
	       sender Sender subject Subject lines Lines
	       from From nntp-posting-host NNTP-Posting-Host
	       newsgroups Newsgroups followup-to Followup-To
	       content-type Content-Type organization Organization
	       content-disposition Content-Disposition
	       content-base Content-Base references References
	       x-cancelled-by X-Cancelled-By x-canceled-by X-Canceled-By
	       x-cancelbot X-Cancelbot x-mailer X-Mailer/;
# this is used later.
#$propcase = join "|", keys %propcase;

#
# We maintain a seperate cache of cancel messages for spam that we
# have rejected (well, we do if cancel rejection is enabled).
# This cache is independant of the message-id cache and is sized
# in a completely different manner. Some statistical crunching on
# our logfiles has led me to this number as excellent. Crunch yours
# if desired to see for yourself.
#
# Our stats suggest that you can get half of everything with a
# 200-message cache; almost 60% with 500; almost 65% with 1000;
# somewhat under 80% with 2000; 85% with 3500; and about 90%
# with 5000. Getting to 95% requires around 20,000 entries.
# How much of a drop rate you want will depend on your feelings
# about how much you want to offload from relaynews and your control
# spool.
$::CMSGHIST = 5000;				# we're generous.
my (@cmsgids, %cmsgids);
# duplicate the message-id hack.
$#cmsgids = $::CMSGHIST+1		if $cancreject;


# ---------------------------------------------------------------------
# Catch SIGHUP and SIGTERM and react appropriately.
sub catch_shutdown {
	$pleasestop = 1;
}
sub catch_hup {
	$hupbing = 1;
	$savestate = 1;
}

$SIG{TERM} = \&catch_shutdown;
$SIG{HUP} = \&catch_hup;

# ----------------------------------------------------------------------
# Subroutines to deal with a cache of message-IDs.
# Although using 'my ($msg) = @_;' appears slightly faster, it does
# involve an extra copy and thus a bit of memory thrashing. We'll pay
# the extra time to avoid that.
sub checkmsgid {
	return $DoMSGHist && exists $msgids{$_[0]};
}
sub pushmsgid {
	return if !$DoMSGHist;
	# I think using 'undef' to create an undefined value
	# is more efficient than setting it to '1' or the like;
	# I could be wrong.
	push @msgids, $_[0];	$msgids{$_[0]} = undef;
}
sub trimmsgids {
	return if !$DoMSGHist;
	local $^W = 0;			# see below
	while ($#msgids > $MSGHIST) {
		# we may attempt to delete an undefined element here,
		# because the pre-filled @msgids cache has the front
		# full of undefined elements, so if we haven't filled
		# it with real messages yet we pop one of them and
		# attempt to delete it. Fortunately this is harmless.
		delete $msgids{shift(@msgids)};
	}
}
# These variant versions of the above routines simply use a different
# cache (that is always present), the cache for cancel suppression.
sub checkcmsgid {
	return $cancreject && exists $cmsgids{$_[0]};
}
sub pushcmsgid {
	return if !$cancreject;
	push @cmsgids, $_[0];	$cmsgids{$_[0]} = undef;
}
sub trimcmsgids {
	return if !$cancreject;
	local $^W = 0;
	while ($#cmsgids > $::CMSGHIST) {
		delete $cmsgids{shift(@cmsgids)};
	}
}
sub delcmsgid {
	my $k = $_[0];
	delete $cmsgids{$k};
	@cmsgids = grep (($_ ne $k), @cmsgids);
}

# ----------------------------------------------------------------------
# Processs a single batchfile. We are passed three filehandles; the first
# is the input batch, the second is the output batch, and the third
# is the place to report rejections to. It returns 0 if all went well,
# 1 otherwise. Note that rejecting articles *is* considered 'going well'.
# We go to some lengths to not make new strings in common paths.
sub munchbatch {
	my ($len, $orig, $tlen, $art, $nlen);
	my ($res, $msg, $k);
	my ($inbatch, $outbatch, $rejectlog, $bname) = @_;
	my $first = 1;

	while (<$inbatch>) {
		# eat the '#! rnews ...' line, checking and parsing it.
		chomp;
		# Does it parse?
		if (!/^#! rnews \d+ \S+$/) {
			# the following is a GORY HACK.
			if (!$first || /^#! rnews /) {
				print $rejectlog "! unparseable batchfile.\n";
				($::verbose > 4) && print "Bogus start line: $_\n";
				return 0;
			}
			# Since this is the first article, we assume that
			# it is a bare article generated via a POST.
			# get the file size.
			# stat should by all rights work on $inbatch, but
			# it does not appear to be a go. Either way from here
			# we lose, sigh.
			if (!defined($len = (stat($bname))[7])) {
				print $rejectlog "! cannot stat $bname for local posting: $!\n";
				($::verbose > 4) && print "Could not stat $bname for local posting: $!\n";
				return 0;
			}
			# ... and make up a fake #! rnews line, using
			# the length.
			$_ = "#! rnews $len LOCAL";
			# ... and seek back to the front of the article,
			# instead of one line in.
			seek $inbatch, 0, 0;
		}
		$first = 0;		# no longer the first article.
		# Trim front ...
		s/^#! rnews //;
		# and split. Guaranteed to be valid, too.
		($len, $orig) = split(' ', $_);

		# Is this article too big?
		if ($::MAXARTSIZE > 0 && $len > $::MAXARTSIZE) {
			if (!seek($inbatch, $len, 1)) {
				print $rejectlog "! failed to seek forward $len bytes.\n";
				return 0;
			}
			# We cannot log the message ID, because we refuse to
			# read the article.
			print $rejectlog "- - ", $orig, " ", $len, " EARTTOBIG\n";
			next;
		}

		# chow down on the actual article.
		$tlen = read $inbatch, $art, $len;
		if ($tlen != $len) {
		    print $rejectlog "! wanted $len bytes, got only $tlen.\n";
		    return 0;
		}
	
		# Blow open the headers.
		&headercrack($art);
		# We care more about having a realistic value than being
		# just defined. A defined and blank message-ID is as useless
		# as an undefined one.
		# XXX: Probably we ought not to die.
		die "Null message-ID!"	if ($::hdr{'Message-ID'} eq '');
		$msg = $::hdr{'Message-ID'};	# save message-ID.

		# Reject duplicate message-IDs before calling the filter.
		# we check the cancel cache first because it's far more
		# likely to hit.
		if (&checkcmsgid($msg) || &checkmsgid($msg)) {
			if (&checkcmsgid($msg)) {
				print $rejectlog "- ", $msg, " ", $orig, " ", $len, " unneeded/dup cancel\n";
# XXX: although a clever idea for making the cache stretch longer, this
# turns out to have an unfortunate performance impact.
#				&delcmsgid($msg);	# streeetch longer
			} elsif ($::hdr{'Control'}) {
				print $rejectlog "- ", $msg, " ", $orig, " ", $len, " dup cmsg\n";
			} else {
				print $rejectlog "- ", $msg, " ", $orig, " ", $len, " dup\n";
			}
			next;
		} else {
			# remember that we've seen it.
			# We don't bother pushing the message IDs of control
			# messages, because the filter won't consider them
			# spam in any case (well, it had better not or it will
			# delete things we don't want it to). So we might as
			# well save space. This also corresponds to what the
			# filter really tracks, which is what we care about for
			# dup suppression.
			(!defined($::hdr{'Control'})) && &pushmsgid($msg);
		}
		
		# Call the filtering stuff; the header cracking has given us
		# the %hdr associative array that filter_art() wants.
		$res = &filter_art();
		if (length($res) > 0) {
			# remove any newlines that the filter may have
			# accidentally stuck in $res. This usually happens
			# because in perl, \s matches newlines too.
			$res =~ tr /\n//d;
			# and print information.
			print $rejectlog "- ",$msg, " ", $orig, " ", $len, " rejected: ", $res, "\n";
			&dumpxtra($rejectlog, $msg, $res)	if ($::xtra);
			&dumpxtranemp($rejectlog, $msg, $res)
					if ($::xtra && $res =~ /New EMP/);
			&dumpbadart($rejectlog, $msg, $res, \$art, $orig)
					if (!$dryrun &&
					    ($::xtra || $orig eq 'LOCAL') &&
					    $::DUMPDIR);
			# The following is a heuristic hack. We reject spam;
			# spam is often cancelled; we have no need of cancels
			# for things we've already rejected.
			# We don't do this for things that the filter has
			# learned, because if the filter is restarted they
			# could sneak through again before they're relearned.
			# Nor do we bother immunizing against control messages
			# (including cancels themselves, of course)
			if ($res !~ /EMP/ && !$::hdr{'Control'} && $cancreject) {
				$msg =~ s/^</<cancel./;
				&pushcmsgid($msg);
				&trimcmsgids;
			}
		} else {
			# It has been accepted.
			# Write it out again.
			$nlen = length $art;
			# We use a single print command just because we can.
			# We use a comma because using "...\n$art" would force
			# an article copy. The remaining quote use is just simpler.
			if (!(print $outbatch "#! rnews ",$nlen," ",$orig,"\n", $art)) {
				print $rejectlog "! write error to destination: $!\n";
				return 0;
			}
			# and log the acceptance
			$::verbose && print $rejectlog "+ ", $msg, " ", $orig, " ", $len, "\n";
		}
		# Make the article go away now. We could rely on perl
		# to do this implicitly at the next read, but I would rather
		# chuck it explicitly. The vague hope is less memory thrashing.
		$art = undef;
		# Make sure the copy of the article body GOES AWAY TOO ahem.
		# We could undefine all the article headers (we don't want to
		# delete them, since that might shrink the hash and cause
		# memory thrashing), but it turns out to take extra time for
		# no big gain in memory size. (Possibly it even costs us size)
		$::hdr{'__BODY__'} = undef;

		# Purge the message-id cache of extra entries.
		&trimmsgids();
	}
	return 1;		# all is OK, mister.
}

# Given an article (in one big variable), perform the manglings
# necessary to set up things for the INN filtering module. Particularly,
# pull the headers out of the article.
# NOTE: we use call by reference (explicit use of $_[0]) to save ourselves
# an implicit article copy.
# We must use $::hdr to pass 'use strict vars', because it must be visible
# in the filtering module, which is in a different file; hence it cannot be
# my()'d.
sub headercrack {
	my $sepin;
	my $header;
	my(@t, $i, $t, $t1);
	my ($hl, $blen);

	# Find the end of the headers.
	$sepin = index $_[0], "\n\n";
	# break them out.
	$header = substr($_[0], 0, $sepin);

	# count header lines so that we can regenerate Lines: if necessary.
	# There are two newlines after this that are not going to be counted
	# in Lines:.
	# this is the length of the body to be.
	$blen = length($_[0]) - length($header) - 2;	# (two newlines)
	# only do this if necessary. it's only necessary if maxartcopy
	# is on and is smaller than the length of the body.
	$hl = ($header =~ tr /\n//)	if ($maxartcopy &&
					    $maxartcopy < $blen);

	# The following cute incantation is from the perl manpages.
	# ... well, almost. The perl manpage uses \s, which matches
	# newlines in the odd (but legal) case of:
	#	'Header: foobar\n \nOther header: ...'. With \s, the
	# other header gets glued into the header. Sigh.
	$header =~ s/\n[ 	]+/ /g;		# fix continuation lines

	# Forcefully canonicalize the case of innocent headers.
	# Although my heart quails at the remaining redundant work,
	# my heart reminds me that it's probably faster than doinking
	# around in associative hashes.
	# Notice our clever abuse of $propcase as a scalar and 
	# %propcase as a hash. Fortunately they're related.
	# The switches make it case-independant, a multiline string
	# that we will scan all of, and finally promise $propcase won't
	# change. Executing the replacement pattern as code is unnecessary.
#	$header =~ s/^($propcase):/$propcase{lc($1)}:/gimo;
#	YYY: slower than the alternate approach, probably because it's
#		normally uncommon to have to canonicalize everything and
#		this does it even if it doesn't need to.

# XXX: old debugging code.
# for clarity, pull off the first line and beat it with a club for use.
#	$t1 = $header;
#	$t1 =~ s/\n.*//g;
#	print "'$t1'\n";

	# You might think that it should be '\s+', but that would mishandle
	# 'Header:' (no space) by failing to match it.
	@t = split (/^([^:]+):\s*/m, $header);
	chomp @t;				# trim newlines off.
#print "'@t[0]', '@t[1]', '@t[2]'\n";		# XXX: old debugging code.
	# the shift should NOT be necessary. But the split() generates a
	# blank first element, for some inexplicable reason I don't
	# understand.
	shift @t		if ($t[0] eq '');

	# There is a rare case where we will have an odd number of
	# entries. This happens if the last header value is blank;
	# join drops it, instead of giving a '' entry.
	# Fix this by pushing a blank on the end.
	push @t, ""			if ((@t %2) != 0);

	if ((scalar(@t) % 2) != 0) {
		print scalar(@t), " - '", $t[$#t], "'\n";
		print "DUMP:\n";
		print join("\n", @t); print "\n";
#		die "Bogus \@t array";		# no death for the daemon
	}

	# Now create the headers associative array.
	# %hdr is the name that the perl filtering stuff expects to
	# have.
	%::hdr = @t;

	# micro-optimization is arguably better than no optimization.
	undef @t; undef $header;

	# We now post-canonicalize the headers into the right case.
	# Although there are two other approaches (the s/// up above,
	# and peering at what will become the keys in the @t array
	# before we make it a hash), both are slower than this. Don't
	# look at *me*, bwana.
	foreach $i (keys %::hdr) {
		$t = lc($i);
		# if the key exists and the key is not the canonical
		# form for case, convert it over by making a new entry
		# in the right case and deleting the old one.
		if (exists $propcase{$t} && $i ne $propcase{$t}) {
			$t1 = $propcase{$t};
			$::hdr{$t1} = $::hdr{$i};
			delete $::hdr{$i};
			($::verbose > 3) && print "Canonicalizing header $i\n";
		}
	}

	# The INN perl patches expect the body in __BODY__ as a
	# header hash element. Supply it.
	if ($maxartcopy && $maxartcopy < $blen) {
		($::verbose > 3) && print "Only creating $maxartcopy bytes from a true body of $blen bytes.\n";
		$::hdr{'__BODY__'} = substr($_[0], $sepin+2, $maxartcopy);
	} else {
		$::hdr{'__BODY__'} = substr($_[0], $sepin+2);
	}

	# Create a Lines: header if we don't have one.
	# (I think INN may do this automatically? I don't know. But the
	# filter depends on it being there. Hmm, inspection suggests that
	# INN does not add one automatically.)
	if (!$::hdr{'Lines'}) {
		my $lcnt;
		# now it gets complicated.
		# if we have truncated the article body via $maxartcopy,
		# then we cannot just count newlines in __BODY__.
		if ($maxartcopy && $maxartcopy < $blen) {
			$lcnt = ($_[0] =~ tr /\n//) - 2 - $hl;
		} else {
			# simple is OK.
			$lcnt = ($::hdr{'__BODY__'} =~ tr/\n//);
		}
		$::hdr{'Lines'} = $lcnt;
		# happens too often on control messages to natter about.
		($::verbose > 3 && !$::hdr{'Control'}) && print "Created header - Lines: $lcnt\n";
	}

	# We record the size of the article itself because this information
	# is not readily recoverable from just the %::hdr array's contents,
	# even neglecting $maxartcopy. It's also cheap.
	$::hdr{'__SIZE__'} = length($_[0]);
}

# Identify an article as being from an interesting place, based on various
# bits of information. This is a separate routine so that it can be
# dynamically reloaded. As such it can't use external my()'d variables, just
# true globals.
# Return null ("") if no special ID can or should be attributed to the
# article.
sub dumpxtraid {
	my ($res) = @_;

	if ($res =~ /^Netzilla/ ||
	    $::hdr{'Path'} =~ /newsfeeds\.com|news\.jam\.comm?|news\.cm\.org!bcarh189\.bnr\.ca|bcarh8c\.bnr\.ca!(?:enzo\.grapa\.it|news\.primenet\.com)|!207\.70\.214\.[0-9]{1,3}!|!199\.5\.259\.66!|!metro.goldwyy?n.au!/) {
		return 'NETZILLA';
	}

	# no ID possible.
	return '';
}

# Print extra information about a rejected article.
# format is:
#	_ <MSG-ID> {CONTROL-CMD|-} PATH-TAIL {NNTP-POSTING-HOST|_} SBI {BODYHASH|_} {NEWSGROUPS|NEWSGROUP-COUNT|_} {SPECIAL-ORIGIN}
# new things in new narf revisions are added at the END of this.
sub dumpxtra {
	my ($rejectlog, $msg, $res) = @_;
	my ($hash, $spec, $sbi);
	my $pathtail = $::hdr{'Path'};
	my $nntph = $::hdr{'NNTP-Posting-Host'};
	my $cntl = $::hdr{'Control'};
	my $ngs = $::hdr{'Newsgroups'};

	$sbi = &calcsbi();

	# construction beings:
	print $rejectlog "_ ", $msg;

	# control message?
	if ($cntl) {
		$cntl =~ s/\s.*$//;
		print $rejectlog ' ', $cntl;
	} else {
		print $rejectlog ' -';
	}

	# turn whitespace into an _ in $pathtail. otherwise people
	# explode horribly all over the floor. I hate people who
	# have whitespace there...
	$pathtail =~ tr / \t/_/;

	# tail of path, if long enough.
	# includes the special Netzilla exemption to smoke out the people
	# who feed them.
	$pathtail = '...' . $1
		if ($pathtail =~ /(![^!]+![^!]+!(?:207\.70\.214\.\d+|[^!]*\bnewsfeeds\.com)!.*)$/ ||
		    $pathtail =~ /((?:!+[^!]+){7})$/);
	print $rejectlog ' ', $pathtail;
	# NNTP posting host.
	$nntph = ($nntph =~ /^\S+$/) ? $nntph  : '_';
	print $rejectlog ' ', $nntph;

	# What's the (S)BI of this article? More than 2 decimal points is
	# a waste of space and besides, that's how many Ed Falk uses.
	if ($sbi && (int($sbi) == $sbi)) {
		print $rejectlog ' ', $sbi;
	} elsif ($sbi) {
		printf $rejectlog ' %-.2f', $sbi;	
	}

	# hello dolly. welcome to hash-land.
	$hash = &filter_bodyhash()	if (defined &filter_bodyhash &&
					    !$cntl && $::logsigs);
	if ($hash) {
		# change spaces, if any, since we insist on the _ line
		# being parseable.
		$hash =~ tr / /_/;
		print $rejectlog ' ', $hash;
	} else {
		print $rejectlog ' _';
	}

	# log newsgroups if desired.
	if ($::logngs) {
		$ngs =~ tr/ \t//d;	# shouldn't be any, but just in case...
		print $rejectlog ' ', $ngs;
	} else {
		# since SBI looks at Followup-To:, the count of newsgroups
		# may contain non-redudant information.
		my $ngc = ($ngs =~ tr /,//) + 1;
		print $rejectlog ' ', $ngc;
	}

	# this spot is dedicated to Netzilla with love and affection and
	# the best filters we can devise. No, really.
	# it can be generalized to match on people who attempt to hide their
	# origin. The intent is that logging scripts can pull out a true
	# count for them.
	$spec = &dumpxtraid($res);
	if ($spec) {
		print $rejectlog ' ', $spec;
	} else {
		print $rejectlog ' _';
	}

	# ... and finish off.
	print $rejectlog "\n";
}
# Dump extra speshul information about new EMPs.
# All lines start with '= <MSID> CLASS'. The only class now is
# 'new-emp'.
sub dumpxtranemp {
	my ($rejectlog, $msg, $res) = @_;
	my ($hash, $l);
	print $rejectlog "= ", $msg, " new-emp";
	$hash = &filter_bodyhash()	if (defined &filter_bodyhash);
	if ($hash) {
		$hash =~ tr / /_/;
	} else {
		$hash = '_';
	}
	print $rejectlog ' ', $hash;
	if ($::hdr{'Sender'}) {
		print $rejectlog ' ', $::hdr{'Sender'};
	} else {
		print $rejectlog ' ', $::hdr{'From'};
	}
	print $rejectlog ' | ', $::hdr{'Subject'}, ' | ', $::hdr{'Lines'};
	# and wind up
	print $rejectlog "\n";
}
# Calculate the SBI for a given article.
# BI:		square root of newsgroup count (ngc)
# BI2:		(ngc + sqrt(ngc))/2
# SBI:		as BI2, but using min(ngc, followup-to count)
sub calcsbi {
	my $ngc;
	my $ftc;

	$ngc = ($::hdr{'Newsgroups'} =~ tr /,//) + 1;
	if ($::hdr{'Followup-To'}) {
		$ftc = ($::hdr{'Followup-To'} =~ tr /,//) + 1;
		$ngc = $ftc	if $ftc < $ngc;
	}
	return ($ngc + sqrt($ngc))/2;
}

# Dump an article if and as required.
sub dumpbadart {
	my ($rejectlog, $msg, $res, $aref, $src) = @_;
	my $fname;
	my $alen = length($$aref);

	# do we dump it anywhere?
	$fname = &filter_logname($res, $src)	if (defined &filter_logname);
	# rejected local postings are ALWAYS saved. Always always.
	return		unless ($fname || ($src eq "LOCAL"));
	# rejected local postings are marked specially.
	$fname = "local" . ($fname ? '-' . $fname : '')
					if ($src eq "LOCAL");

	if (!open(ARTDUMP, ">>$::DUMPDIR/$fname")) {
		warn "Cannot open $::DUMPDIR/$fname: $!";
		$::DUMPDIR=();
		return;
	}
	if (!(print ARTDUMP "#! rnews ", $alen, " ", $fname, "\n", $$aref)) {
		warn "Could not write $::DUMPDIR/$fname: $!";
		$::DUMPDIR = ();
		return;
	}
	close ARTDUMP;
}

#
# Deal with a single batchfile. At this level we are shuffling around
# files; munchbatch will eat the spam itself.
# We are passed in the name of the batchfile, the output batch filehandle,
# and the report file handle, in that order. We return nothing in particular.
# The batch filename is sans directories.
sub dobatch {
	my ($bname, $outbatch, $rejectlog) = @_;
	my ($time, $bfile, $res);

	# perform checks and open.
	$bfile = "$SRCDIR/$bname";
	if (! -f $bfile) {
		print $rejectlog "! $bfile doesn't exist or isn't a file.\n";
		return 0;
	}
	unless (open IBATCH, $bfile) {
		print $rejectlog "! can't open batchfile $bfile: $!\n";
		return 0;
	}

	# It's open, so go:
	$time = localtime;
	($::verbose > 2) && print $rejectlog ": start $bfile at $time\n";
	$res = &munchbatch(*IBATCH{IO}, $outbatch, $rejectlog, $bfile);
	# whatever happens, we want to close it.
	close IBATCH;
	$time = localtime;
	if ($res) {
		# We have succeeded. Log if verbose
		($::verbose > 2) && print $rejectlog ": end $bfile at $time\n";
	} else {
		# There has been an error. Failure has already been logged.
		($::verbose > 2) && print $rejectlog ": bad $bfile at $time\n";
	}
	return $res;
}

##
# Scan the source directory looking for work files. If found, process up
# to so many of them. How many of them is the sole argument. This is the
# major work process, and so it pays attention to $pleasestop, $hupbing,
# and so on.
# In order to gracefully handle being shut down in the middle of life,
# we don't delete the input batch files until we're sure that the output
# batch file has been comitted: ie, it has been closed and renamed to the
# permanent name.
sub rundir {
	my ($max) = @_;
	my ($tm, $ent, $time, $rollover, $res, $dest);
	my (@batches, $batch, @handled);
	my ($grade, $ngrade);

	# Get to where we should be.
	chdir $SRCDIR		or die "Cannot chdir to $SRCDIR: $!";

	# Begin the scan.
	opendir(BDIR, ".")	or die "Cannot opendir $SRCDIR: $!";
	while (defined($ent = readdir(BDIR))) {
		# skip over bad entries.
		$ent eq "." || $ent eq ".."		and next;
		# Pick up only valid CNews batchfiles
		$ent !~ /^\d[\d.]+\.t$/			and next;

		push @batches, $ent;
	}
	closedir BDIR;

	# No work to do? return immediately.
	return			if (scalar(@batches) == 0);

	# To do this properly, we must now sort the list of batches
	# into lexical (aka arrival time) order.
	@batches = sort @batches;

	# Take the first N of them, if desired. We do this by
	# truncating the list.
	if ($max > 0 && $#batches >= $max) {
		my ($t) = scalar(@batches);
		($::verbose > 4) && print "Have too many batches ($t), truncating to $max of them.\n";
		$#batches = $max-1;
	}

	# We now have some collection of batches.
	# In order to process them, we need to open an output batch
	# and the log.
	open LOG, ">>$LOGFILE"	or die "Cannot open $LOGFILE: $!";
	select LOG; $| = 1;
	select STDOUT;

	# We open under a temporary safe name.
	if ($dryrun) {
		$dest = "/dev/null";	# dryrun doesn't write a real batch
	} else {
		$dest = "$DSTDIR/temp-$$";
	}
	# We always TRUNCATE partial output batches that may be left behind.
	# This can never lose articles (the input batches for a partial output
	# batch won't have been removed) and guarantees us a good batchfile.
	# A lingering destination batchfile might be in pretty much any state.
	open DBATCH, ">$dest"	or die "Cannot open destination batch $dest: $!";

	# Run over each batch file.
	$time = localtime;
	($::verbose > 1) && print LOG ": starting run at $time\n";
	foreach $batch (@batches) {
		# Does this input batch have a grade?
		undef $ngrade;
		$ngrade = $1		if ($batch =~ /^(\d\.)/);

		# Does this batch have a different grade than the last batch?
		# If so, we need to force an output batch rollover now.
		$rollover++		if (defined($grade) &&
					    $grade ne $ngrade);

		# Has the destination batch file grown big enough to warrant
		# rotation? (Or any of the other reasons to roll it over.)
		# We roll at the top of the loop instead of at the bottom
		# in order to properly handle grades.
		if ($::batchsize <= (stat($dest))[7] ||
		    ($rollover && -s $dest)) {
			close DBATCH	or die "Error closing destination batch: $dest: $!";
			$tm = time;
			($::verbose > 3) && print "rolling $dest to $DSTDIR/$grade$tm.t due to large size.\n";
			if (!$dryrun) {
				rename($dest, "$DSTDIR/$grade$tm.t") or die "Error renaming $dest to $DSTDIR/$tm.t: $!";
			}
			# With the output batchfile flushed and properly
			# renamed, we can now purge all the handled input
			# batches.
			&delbatches(@handled);
			@handled = ();		# null the list.
			# Reopen the batchfile, of course.
			open DBATCH, ">$dest"	or die "Cannot reopen destination batch $dest: $!";
		}
		$rollover = 0;		# Output batch rolled if necessary.

		# Set the grade of the current batch.
		$grade = $ngrade;

		# Call dobatch.
		($::verbose > 4) && print "Invoking dobatch on $batch\n";
		$res = &dobatch($batch, *DBATCH{IO}, *LOG{IO});
		# Handle success and failure.
		if ($res) {
			# Success: push the done batchfile.
			push @handled, $batch;
		} else {
			# Failure causes an immediate batchfile flush.
			$rollover++;
			# plus saving of the bad batchfile.
			($::verbose > 1) && print LOG ": bad batchfile $batch at $time\n";
			($::verbose > 3) && print "saving bad batchfile $SRCDIR/$batch to $BADDIR/$batch\n";
			if (!$dryrun) {
				$time = localtime;
				rename("$SRCDIR/$batch", "$BADDIR/$batch") or
				print LOG "! failed to rename $SRCDIR/$batch to $BADDIR/$batch: $!\n";
			}
		}

		# Force a flush of data to the outgoing batch file, since
		# we will soon check its size.
		# (We cannot avoid this by using '-s DBATCH'; thanks acres,
		# perl.)
		select DBATCH; $| = 1; select STDOUT;
		select DBATCH; $| = 0; select STDOUT;

		# After each batch is processed we check some important
		# cases. First: shutdown signalled?
		if ($pleasestop || -e $STOPFILE) {
			($::verbose > 1) && print LOG ": ending run on signal or stopfile.\n";
			($::verbose > 3) && print "Ending run due to signal or stopfile.\n";
			# Fall through the bottom of the loop.
			last;
		}
		# Do we want to redo the log file?
		if ($hupbing) {
			($::verbose > 3) && print "reopening $LOGFILE from signal\n";
			close LOG	or warn "Error closing $LOGFILE: $!\n";
			open LOG, ">>$LOGFILE"	or die "Cannot open $LOGFILE: $!";
			select LOG; $| = 1;
			select STDOUT;
			$hupbing = 0;
			$rollover++;		# roll the output batch too.
		}
		# batch rollover is handled at the top of the loop, because
		# of grades.
	}

	# Our run has ended. We must close files and roll the destination
	# batch over, if it's non-zero.
	close DBATCH		or die "Error closing destination batch $dest: $!";
	if (-z $dest && -f $dest) {
		# We were SO efficient at despamming we deleted EVERYTHING.
		# Remove the destination.
		($::verbose > 3) && print "deleting zero-length destination batch $dest\n";
		if (!$dryrun) {
			unlink($dest)	or die "Cannot unlink $dest: $!\n";
		}
	} else {
		$tm = time;
		($::verbose > 3) && print "renaming $dest to $DSTDIR/$grade$tm.t\n";
		if (!$dryrun) {
			rename($dest, "$DSTDIR/$grade$tm.t")	or die "Error renaming $dest to $DSTDIR/$tm.t: $!\n";
		}
	}
	# Having flushed and successfully renamed the output batchfile, we
	# purge the input batchfiles.
	&delbatches(@handled);
	# Close log and clean up the shop.
	$time = localtime;
	($::verbose > 1) && print LOG ": run ending at $time\n";
	close LOG	or warn "Error closing $LOGFILE: $!\n";
	return;
}

# Delete handled batches.
sub delbatches {
	my (@handled) = @_;
	my ($t);
	while (scalar(@handled)) {
		$t = pop @handled;
		($::verbose > 3) && print "Removing handled batchfile $t\n";
		if (!$dryrun) {
			unlink "$SRCDIR/$t"	or
			print LOG "! Cannot remove source batchfile $SRCDIR/$t: $!\n";
		}
	}
}

# --------------------------------------------------------------------
# Save and reload the EMP history. This is deep friendship with the
# internals of the INN filter we use ... but it's very nice to have
# the history preserved over shutdowns.
sub loademphist {
	($::verbose > 3) && print "Loading EMP history from $::EMPFILE.\n";
	open EMPHIST, "<$::EMPFILE"	or die "Cannot open EMP history $::EMPFILE: $!";
	while (<EMPHIST>) {
		chomp;
		$::EMP{$_} = -1;	# -1 serves as a convenient 'loaded
					# from narf' indicator, and it's still
					# true in perl.
		push(@::EMP_Hist, $_);
	}
	close EMPHIST;			# if it errors on close, who cares?
}

sub saveemphist {
	my ($k);
	# Force sync if necessary.
	&filter_sync()		if (defined &filter_sync);
	return unless (defined %::EMP && defined @::EMP_Hist);
	($::verbose > 3) && print "Saving EMP history to $::EMPFILE.\n";
	if (!(open EMPHIST, ">$::EMPFILE")) {
		warn "Can't open EMP history $::EMPFILE: $!";
		return;
	}
	foreach $k (@::EMP_Hist) {
		if (!(print EMPHIST $k, "\n")) {
			warn "Can't save an EMP key: $!";
			close EMPHIST;
			return;
		}
	}
	close EMPHIST			or warn "Can't close EMP historry $::EMPFILE: $!";
}

## --------------------------------------------
# reset anything as necessary or useful after a filter reload.
# This is also run when we start up.
sub postload_hook {
	# maxartcopy is the lower of $::MAXARTCOPY and $::MaxArtSize if both
	# are defined, or whichever one of them is defined, or isn't defined.
	if ($::MAXARTCOPY && $::MaxArtSize) {
		$maxartcopy = ($::MAXARTCOPY < $::MaxArtSize) ? $::MAXARTCOPY :
							      $::MaxArtSize;
	} elsif ($::MaxArtSize) {
		$maxartcopy = $::MaxArtSize;
	} elsif ($::MAXARTCOPY) {
		# filter has set no max but the configuror claims to know
		# better.
		$maxartcopy = $::MAXARTCOPY;
	} else {
		# The filter asserts that every byte is sacred and the
		# installer has not overridden it. Believe it.
		$maxartcopy = 0;
	}

	($::verbose > 3) && do {
		print "MAXARTCOPY ", $maxartcopy ? "wired to $maxartcopy\n" : "off\n";
	}
}

## --------------------------------------------
# Main code.
# There actually isn't much here.
# I guess I did all the work earlier.

# Random geegaws assembled with a chainsaw.
use Getopt::Std;

getopts('V:vDc:m:b:s:d:e:l:w:OP:C:S:E:Xq:N') || &usage(0);

# Handle entirely too many options.
# odd <thing> || <thing> usage is to shut up perl -w.
$dryrun ++				if ($::opt_D || $::opt_D);
$::verbose ++				if ($::opt_v || $::opt_v);
$runonce++				if ($::opt_O || $::opt_O);
$::xtra = !$::xtra				if ($::opt_X || $::opt_X);
$::logngs = !$::logngs			if ($::opt_N || $::opt_N);
$::verbose = $::opt_V			if (defined $::opt_V);
$::maxbatches = $::opt_c			if ($::opt_c);
$::MAXARTSIZE = $::opt_m			if (defined $::opt_m);
$::batchsize = $::opt_b			if ($::opt_b);
$SRCDIR = $::opt_s			if ($::opt_s);
$DSTDIR = $::opt_d			if ($::opt_d);
$BADDIR = $::opt_e			if ($::opt_e);
$LOGFILE = $::opt_l			if ($::opt_l);
$::waitint = $::opt_w			if ($::opt_w);
$PIDFILE = $::opt_P			if ($::opt_P);
$CONFFILE = $::opt_C			if ($::opt_C);
$STOPFILE = $::opt_S			if ($::opt_S);
$::EMPFILE = $::opt_E			if ($::opt_E);
$::DUMPDIR = $::opt_q			if ($::opt_q);

# Enforce that we have, like, no command line arguments. Really.
@ARGV != 0 && usage(0);

# This may not be required, but it's sane.
warn "$0: Warning: -D specified without -O, this may not behave well.\n"
	if ($dryrun && !$runonce);
warn "$0: PID filename '$PIDFILE' not absolute, we hope you know what you're doing.\n"
	unless ($PIDFILE =~ /^\//);

# Insist on a few basic sane things, like the directories existing.
chdir $DSTDIR	or die "Cannot chdir to batch destination directory $DSTDIR: $!\n";
chdir $BADDIR	or die "Cannot chdir to bad batch directory $BADDIR: $!\n";
chdir $SRCDIR	or die "Cannot chdir to batch source directory $SRCDIR: $!\n";
if ($::DUMPDIR) {
	chdir $::DUMPDIR
	 or die "Cannot chdir to rejected article directory $::DUMPDIR: $!\n";
} else {
	warn "DUMPDIR not configured; rejected local posts will not be saved.\n";
}

# Sanity checks:
die "Wait interval must be > 0"	unless ($::waitint > 0);
die "Batch size must be > 0" unless ($::batchsize > 0);

# Save our process ID for other people to use.
# (Hopefully it's an absolute file name.)
open PID, ">$PIDFILE"	or die "Cannot open PID file $PIDFILE: $!";
print PID $$, "\n"	or die "Cannot write PID to $PIDFILE: $!";
close PID		or die "Cannot close PID file $PIDFILE: $!";

# hi there, fans and fanatics
($::verbose > 3) && print "narf version $::NARFVERSION starting at " .
			localtime, "\n";

my ($omt, $oct, $mt, $ct);		# what I put up with...
# Save the mod time of the filtering script so we know when to reload it.
$omt = (stat("$FILTDIR/filter_innd.pl"))[9];
# Ditto for the 'conf' file.
$oct = (stat($CONFFILE))[9]		if (-e $CONFFILE);
# ... and load, if present.
do $CONFFILE				if (-e $CONFFILE);

# Initialize filtering.
&filter_mode();				# takes arguments?
&postload_hook();

# Reload our EMP history if we have one.
if (-s $::EMPFILE) {
	&loademphist();
}
# Are we going to be able to save to it?
if (!(open EMPHIST, ">>$::EMPFILE")) {
	($::verbose > 3) && print "EMP history file $::EMPFILE unwriteable, no saves available: $!\n";
	undef $::EMPFILE;
}
close EMPHIST;

# Now, run forever.
while (!$pleasestop) {
	# Is the filter stale? If so, reload.
	$mt = (stat("$FILTDIR/filter_innd.pl"))[9];
	if ($omt != $mt && -f "$FILTDIR/filter_innd.pl") {
		my $time = localtime;
		($::verbose > 3) && print "Reloading $FILTDIR/filter_innd.pl.\n";
		# Attempt to trap errors.
		while (!(do "$FILTDIR/filter_innd.pl") || !defined &filter_mode || !defined &filter_art) {
			warn "Error reloading filter at $time";
			($::verbose > 3) && print "Filter load error, retrying after $::waitint seconds.\n";
			sleep $::waitint;	# wait, try again.
		};
		&filter_mode();		# takes arguments?
		&postload_hook();
		$omt = $mt;
	}
	# By contrast, we could hardly care less if the conf file exists
	# or loads with errors.
	if (-e $CONFFILE) {
		$ct = (stat($CONFFILE))[9];
		if ($ct != $oct) {
			($::verbose > 3) && print "Loading conf file $CONFFILE.\n";
			do $CONFFILE;
			$oct = $ct;
			# oh, okay, we're nice...
			&postload_hook();
		}
	} else {
		$oct = 0;	# no file forces reload if it reappears
	}

	# Do a run.
	# ... but only if the stop file doesn't exist.
	if (! -e $STOPFILE) {
		($::verbose > 4) && print "Scanning, maxbatches $::maxbatches\n";
		&rundir($::maxbatches);
	} else {
		($::verbose > 4) && print "Stopped due to $STOPFILE existing.\n";
	}
	if ($savestate && $::EMPFILE) {
		($::verbose > 3) && print "Saving EMP history due to signal.\n";
		&saveemphist();
		$savestate = 0;
	}

	# break out if desired.
	last if ($runonce || $pleasestop);

	# Sleep for a while.
	($::verbose > 4) && print "Going to sleep for $::waitint seconds\n";
	sleep $::waitint;
	# repeat forever.
}
# XXX: consistency check for memory leaks.
&consistchk			if (defined &consistchk);	
# Save the EMP history to date. We know there are no newlines in the
# keys.
if ($::EMPFILE) {
	($::verbose > 3) && print "Saving EMP history on exit\n";
	&saveemphist();
}

($::verbose > 3) && print "Exiting due to shutdown signal.\n";
exit 0;