#!/usr/bin/perl our $ID = q$Id: nan-archive,v 1.15 2009-01-02 21:59:40 eagle Exp $; # # nan-archive -- Archive posts to news.announce.newgroups. # # Copyright 2003, 2008 Russ Allbery # # This program is free software; you may redistribute it and/or modify it # under the same terms as Perl itself. ############################################################################## # Local customization ############################################################################## # The path to the news.announce.newgroups archive. our $ARCHIVE = '/srv/nan/archive/archive'; # The e-mail address to which to report errors. our $ERRORS = 'rra@stanford.edu'; # The path to the keyring used for PGPMoose verification. our $KEYRING = '/srv/nan/archive/keyring'; # The log file into which to record actions taken. The current year and # month will be appended. our $LOGBASE = '/srv/nan/archive/logs/log'; our $LOG; # The PGPMoose signing identity. our $SIGNER = 'Moderator of news.announce.newgroups '; ############################################################################## # Modules and declarations ############################################################################## require 5.006; use strict; use subs qw(log); use Getopt::Long qw(GetOptions); use Fcntl qw(LOCK_EX); use IO::Handle (); use News::Article; use PGP::Sign (); use POSIX qw(strftime); ############################################################################## # Error reporting ############################################################################## # Open the log file. sub openlog { $LOG = $LOGBASE . '.' . strftime ('%Y-%m', gmtime); open (LOG, ">> $LOG") or die "$0: can't open $LOG: $!\n"; } # Log a message to the log file. sub log { my $log = $LOGBASE . '.' . strftime ('%Y-%m', gmtime); if ($log ne $LOG) { close LOG; openlog; } my $date = strftime ('%Y-%m-%d %T', gmtime); print LOG "$date [$$] ", @_, "\n"; LOG->flush; } # Log a fatal message and then exit. sub logdie { log ('FATAL: ', @_); exit 1; } # Report an error via e-mail and then exit. This should not cause the article # to be retried, since it was still received and saved successfully, but it # won't be deleted out of the queue. sub fail { my ($id, $message) = @_; my $sendmail = grep { -x $_ } qw(/usr/sbin/sendmail /usr/lib/sendmail); $sendmail ||= 'sendmail'; open (MAIL, "|$sendmail -oi -oem $ERRORS") or die "$0: cannot fork sendmail: $!\n"; print MAIL "To: $ERRORS\n"; print MAIL "From: news.announce.newgroups Archiver <$ERRORS>\n"; print MAIL "Subject: Error while processing $id\n"; print MAIL "\n"; print MAIL "Encountered the following error:\n\n"; print MAIL " $message\n\n"; print MAIL "while processing $id\n"; close MAIL; if ($? == 0) { logdie $message; } else { log ('FATAL: ', $message); logdie 'cannot send mail, sendmail exit status ', ($? >> 8), "\n"; } } ############################################################################## # Message parsing ############################################################################## # Parse an article into a hash of headers and an array containing the message. # Takes the file handle to the article, its message ID, an array ref into # which to put the message, and a hash ref into which to put parsed headers. sub parse_article { my ($fh, $id, $message, $hdr) = @_; # Parse the headers first, checking for duplicates as we go. my $last; local $_; while (<$fh>) { s/\r\n\z/\n/; push (@$message, $_); last if ($_ eq "\n"); if (/^(\S+):\s+(.+)/) { $last = lc $1; if (exists $$hdr{$last}) { $$hdr{$last} .= ' ' . $2; } else { $$hdr{$last} = $2; } } elsif (defined ($last) && /^(\s.+)/) { $$hdr{$last} .= $1; } else { log "$id broken headers"; return 0; } } # Check the validity of the message. unless (@$message) { log "$id appears to be empty"; return 0; } unless ($$hdr{'archive-name'}) { log "$id has no Archive-Name header"; return 0; } if ($$hdr{'archive-name'} !~ m%^([A-Za-z][\w.]+/)?[a-z0-9+.-]+\s*$%) { log "$id has an invalid Archive-Name header"; return 0; } # Slurp up the article body. while (<$fh>) { s/\r\n\z/\n/; last if ($_ eq ".\n"); s/^\.\././; push (@$message, $_); } # Check the PGPMoose signature. my $article = News::Article->new ($message); unless ($article) { log "$id could not be parsed"; return 0; } $PGP::Sign::PGPPATH = $KEYRING; my $signer = $article->verify_pgpmoose ('news.announce.newgroups'); unless ($signer && $signer eq $SIGNER) { if (not defined $signer) { log "$id has an invalid signature"; } else { $signer =~ s/\n/_/g; log "$id has an invalid signature from $signer"; } return 0; } return 1; } # Given an Archive-Name header, turn that into a pathname relative to the root # of the archive. sub parse_archive { my ($archive) = @_; $archive =~ s/\s+$//; if ($archive =~ m%/%) { return $archive; } else { my ($hierarchy) = split (/\./, $archive, 2); $hierarchy = 'other.articles' unless $hierarchy; return "$hierarchy/$archive"; } } ############################################################################## # Message archiving ############################################################################## # Archive a message. Takes the file into which to archive the message and the # complete message as a reference to an array. Uses a lock file at the top of # the archive directory to prevent any contention. sub archive { my ($file, $id, $article) = @_; fail ($id, "bad archive name $file") if (!$file || $file =~ m%^\.|^-|^/|\s%); fail ($id, "bad archive name $file") if (($file =~ tr%/%/%) != 1); $file = "$ARCHIVE/$file"; my $dir = $file; $dir =~ s%/[^/]+$%%; fail ($id, "directory $dir does not exist") unless -d $dir; my $message = join ('', @$article); $message = 'From usenet ' . scalar (gmtime) . "\n" . $message; my $umask = umask 002; open (LOCK, "+> $ARCHIVE/.lock") or fail ($id, "cannot open lock file $ARCHIVE/.lock: $!"); flock (LOCK, LOCK_EX) or fail ($id, "cannot lock $ARCHIVE/.lock: $!"); open (FILE, ">> $file") or fail ($id, "cannot open $file: $!"); print FILE "\n" if -s $file; print FILE $message or fail ($id, "cannot write to $file: $!"); close FILE or fail ($id, "cannot write to $file: $!"); close LOCK; umask $umask; } ############################################################################## # Main routine ############################################################################## # Trim extraneous garbage from the path. $| = 1; my $fullpath = $0; $0 =~ s%.*/%%; # Parse command-line options. my ($help, $version); Getopt::Long::config ('bundling'); GetOptions ('h|help' => \$help, 'v|version' => \$version) or exit 1; if ($help) { print "Feeding myself to perldoc, please wait....\n"; exec ('perldoc', '-t', $fullpath); } elsif ($version) { my $version = join (' ', (split (' ', $ID))[1..3]); $version =~ s/,v\b//; $version =~ s/(\S+)$/($1)/; $version =~ tr%/%-%; print $version, "\n"; exit; } # Get permissions right. umask 007; # Open our log file. openlog; # Processing loop. If file names were specified on the command-line, process # that single file. Otherwise, we listen on stdin and get file names and # message IDs, one set per line, separated by spaces. my ($file, $id); if (@ARGV) { for my $file (@ARGV) { my $id = $file; # Parse the article. open (ART, $file) or fail ($id, "cannot open article $file: $!"); my (@message, %hdr); next unless parse_article (\*ART, $id, \@message, \%hdr); # Archive the article. my $archive = parse_archive ($hdr{'archive-name'}); archive ($archive, $id, \@message); log "$id archived (manual)"; } } else { while () { ($file, $id) = split; # Parse the article. open (ART, $file) or fail ($id, "cannot open article $file: $!"); my (@message, %hdr); next unless parse_article (\*ART, $id, \@message, \%hdr); # Archive the article. my $archive = parse_archive ($hdr{'archive-name'}); archive ($archive, $id, \@message); log "$id archived"; } continue { unlink $file or logdie "cannot unlink article $file: $!"; } } ############################################################################## # Documentation ############################################################################## =head1 NAME nan-archive - Archive posts to news.announce.newgroups =head1 SYNOPSIS B [I ...] =head1 REQUIREMENTS Perl 5.6.0 or later and the Perl modules News::Article and PGP::Sign, both of which are available from CPAN. =head1 DESCRIPTION B processes messages posted to news.announce.newgroups, verifying the article's PGPMoose signature and then archiving it according to its Archive-Name header. All actions are logged to a log file. It expects to receive on standard input file names and message IDs, one pair of file name and message ID per line. The message ID is optional, but is used for logging until the message has been parsed and therefore omitting it may result in log messages that have no associated ID. The file is deleted when it has been successfully processed. Optionally, instead, one or more files may be specified on the command line. In this case, they'll be processed as above, but they'll be noted as manually processed in the logs. The Archive-Name header field must be in one of three formats: the name of a newsgroup (for articles affecting that group); a path formed from a directory, a slash, and a filename; or some other filename, which will be stored in the C directory. If the Archive-Name header field contains only a newsgroup name, it will be saved in a directory named after the top-level hierarchy of that newsgroup name. =head1 FILES =over 4 =item F The archive area for all messages. The top level should just be directories, and all messages will be archived in those directories according to the naming scheme described above. The archived messages will be stored in mbox format. The archive is locked against multiple simultaneous writers by using fcntl locking on a file named C<.lock> at the top level. =item F The keyring used to verify messages using the PGPMoose protocol. All messages must be signed by a key stored in this keyring with a key ID matching the one configured at the top of this script. =item F Where actions are logged. %Y is replaced by the current four-digit year and %m by the current two digit month. =back =head1 NOTES B should be run for each message posted to news.announce.newgroups. Its input format is designed to work with the B server that will come with INN 2.5 and later and is available from INN's Subversion repository, but it should also work as a channel feed from pre-storage-API versions of INN (1.x). It will not work without modification via a channel feed from a current version of INN, since it doesn't understand the storage API and doesn't know how to retrieve articles by tokens. To run B with B, create C, C, and C directories and the GnuPG keyring as described above. Then, run B on some port, configuring it to use the C directory and run B. A typical B command line would be: tinyleaf /srv/nan-archive/spool /srv/nan-archive/scripts/nan-archive I run B using B (from the ucspi-tcp package), but B should work equally well. Then, set up a feed of news.announce.newgroups to the system running B. =head1 SEE ALSO inetd(8), tcpserver(1), tinyleaf(8) L will have the latest version of this script. =head1 AUTHOR Russ Allbery =head1 COPYRIGHT AND LICENSE Copyright 2003, 2008 Russ Allbery This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself. =cut