#!/usr/bin/perl
$ID = q$Id: multilog-watch,v 1.12 2004/12/09 03:04:08 eagle Exp $;
#
# multilog-watch -- Watches a multilog file for irregularities.
#
# Written by Russ Allbery <rra@stanford.edu>
# Copyright 2001, 2002, 2004 Board of Trustees, Leland Stanford Jr. University
#
# This program is free software; you can redistribute it and/or modify it
# under the same terms as Perl itself.

##############################################################################
# Site configuration
##############################################################################

# Default directory for configuration files.
$CONFIGDIR = '/etc/leland';

# Path to the qmail-remote binary.
$QREMOTE = '/var/qmail/bin/qmail-remote';

##############################################################################
# Modules and declarations
##############################################################################

require 5.005;

use strict;
use vars qw(%CONFIG $CONFIGDIR $ID $QREMOTE);

use Getopt::Long qw(GetOptions);
use POSIX qw(strftime);

##############################################################################
# Configuration parsing
##############################################################################

# Parse a configuration file and fill out the %CONFIG hash.  Also make sure
# that all the required configuration parameters are set.
sub config_parse {
    my $file = shift;
    open (CONFIG, $file) or die "$0: cannot open $file: $!\n";
    $CONFIG{ignore} = [];
    while (<CONFIG>) {
        chomp;
        next if /^\s*\#/;
        next if /^\s*$/;
        if (/^\s*(\S+)\s*=\s*(.*)/) {
            my ($param, $value) = ($1, $2);
            $param = lc $param;
            unless ($value) {
                warn "$0: parse error on line $. of $file: $!\n";
            }
            $CONFIG{$param} = $value;
        } elsif (m%^\s*/(.*)/\s*$%) {
            push (@{ $CONFIG{ignore} }, qr/$1/);
        } else {
            warn "$0: parse error on line $. of $file: $!\n";
        }
    }
    close CONFIG;
    for (qw/alert dir sender server subject/) {
        unless ($CONFIG{$_}) {
            warn "$0: paramter $_ not set in $file\n";
        }
    }
}

##############################################################################
# Time parsing
##############################################################################

# Converts a TAI64N timestamp to fractional seconds since epoch.  Returns
# undef on any error.
sub tai64n_decode {
    my $timestamp = shift;
    $timestamp =~ s/^\@// or return;

    # Convenience for multilog files, so that one doesn't have to strip off
    # just the timestamp before passing the file to this sub.
    $timestamp =~ s/\.[us]$//;

    # Reject invalid timestamps.
    return unless $timestamp =~ /^[a-f0-9]{24}$/;

    # We cheat and don't handle the full range of TAI.  Instead, pull off the
    # initial 2^62 and the remainder will be seconds since epoch for more
    # years than I care about.
    my ($seconds, $nanoseconds) = ($timestamp =~ /^(.{16})(.{8})$/);
    return unless defined ($seconds) && defined ($nanoseconds);
    $seconds =~ s/^40+//;
    my $time = hex ($seconds) + (hex ($nanoseconds) / 1e9);

    # The TAI epoch is ten seconds later than the UTC epoch due to initial
    # leap seconds, so adjust here.  This is the simple thing to do and works
    # on systems that keep UTC in conjunction with multilog installations that
    # have no leapseconds configuration.  In any more sophisticated TAI time
    # installation, this will lose, but I don't have any such system and
    # therefore haven't figured out the right thing to do.
    $time -= 10;
    return $time;
}

# Convert fractional seconds since epoch to a human-readable time.
sub frac_local {
    my $timestamp = shift;
    $timestamp =~ s/\.(\d+)$//;
    my $fraction = $1;
    my $time = strftime ("%Y-%m-%d %T", localtime $timestamp) . ".$fraction";
    return $time;
}

##############################################################################
# Implementation
##############################################################################

# Clean up $0 for error reporting.
my $fullpath = $0;
$0 =~ s%^.*/%%;

# Parse command-line options.
my ($help, $version);
Getopt::Long::config ('bundling', 'no_ignore_case');
GetOptions ('help|h' => \$help, 'version|v' => \$version) or exit 1;
if ($help) {
    print "Feeding myself to perldoc, please wait....\n";
    exec ('perldoc', '-t', $fullpath) or die "$0: can't fork: $!\n";
} elsif ($version) {
    my $version = join (' ', (split (' ', $ID))[1..3]);
    $version =~ s/,v\b//;
    $version =~ s/(\S+)$/($1)/;
    print $version, "\n";
    exit 0;
}

# The path to the config file is the only argument.
my $config = shift or die "$0: no config file specified\n";
$config = $CONFIGDIR . '/' . $config unless $config =~ m%^/%;
config_parse $config;
my $checkpoint = "$CONFIG{dir}.last";

# Grab the timestamp of the last time we looked at the logs, if available.
my $lastcheck = 0;
if (open (CP, $checkpoint)) {
    # Skip the first line; it's a comment.
    <CP>;
    $lastcheck = <CP>;
    close CP;
    chomp $lastcheck;
    if ($lastcheck !~ /^\d+\.\d+$/) {
        warn "$0: invalid timestamp in $checkpoint: $lastcheck\n";
        $lastcheck = 0;
    }
}

# Now, scan the directory looking for timestamp files.  Grab any that are old
# log files and whose end date post-dates our last check time.  Always scan
# current.
opendir (LOGS, $CONFIG{dir}) or die "$0: cannot open $CONFIG{dir}: $!\n";
my @files = grep { /^\@/ && tai64n_decode ($_) > $lastcheck } readdir LOGS;
push (@files, 'current');
closedir LOGS;

# Now, process each file.  We spit our output out through a direct pipe to
# qmail-remote if we have any to avoid any breakage in the local mail queue.
my ($checking, $sending, $timestamp) = (0, 0, '');
@ARGV = map { "$CONFIG{dir}/$_" } @files;
LINE: while (<>) {
    s/^(^\@[a-f0-9]+) // or next;
    $timestamp = $1;
    if (!$checking && tai64n_decode ($timestamp) > $lastcheck) {
        $checking = 1;
    }
    next unless $checking;
    for my $regex (@{ $CONFIG{ignore} }) {
        next LINE if /$regex/;
    }
    unless ($sending) {
        my $command = "$QREMOTE $CONFIG{server} $CONFIG{sender}"
            . " $CONFIG{alert}";
        open (MAIL, "| $command >/dev/null 2>&1")
            or die "$0: unable to fork qmail-remote: $!\n";
        print MAIL "From: $CONFIG{sender}\nTo: $CONFIG{alert}\n";
        print MAIL "Subject: $CONFIG{subject}\n\n";
        $sending = 1;
    }
    print MAIL frac_local (tai64n_decode ($timestamp)), ' ', $_;
}
if ($sending) {
    close MAIL;
    die ("$0: qmail-remote exited with status", ($? >> 8), "\n") if $? != 0;
}
if ($checking) {
    open (CP, "> $checkpoint") or die "$0: cannot open $checkpoint: $!\n";
    print CP "# Last check time generated automatically by multilog-watch.\n";
    print CP tai64n_decode ($timestamp), "\n";
    close CP or die "$0: cannot flush $checkpoint: $!\n";
}

##############################################################################
# Documentation
##############################################################################

=head1 NAME

multilog-watch - Filters a syslog file and mails the results

=head1 SYNOPSIS

multilog-watch [B<-hv>] I<config>

=head1 DESCRIPTION

B<multilog-watch> parses the logs in a multilog(1) directory, picking up
where the last invocation left off, and filters out all of the boring lines
as configured in I<config>.  It then mails any remaining lines, if any, to
the address specified in I<config>.  The mail is sent using qmail-remote to
bypass any problems with the mail system on the host, which means that this
program will only work on a system running qmail.

If I<config> isn't an absolute path, it's taken to be relative to
/etc/leland.

The TAI64N timestamps in the multilog log files are converted to an
ISO-style human-readable form for the mail message.

=head1 OPTIONS

=over 4

=item B<-h>, B<--help>

Print out this documentation (which is done simply by feeding the script to
C<perldoc -t>).

=item B<-v>, B<--version>

Print the version of B<filter-syslog> and exit.

=back

=head1 CONFIGURATION FILE

There are two types of valid lines in the configuration file; variable
settings, and filter patterns.  A variable setting looks like:

    variable = value

where I<value> can contain whitespace (but can't begin with whitespace).  A
filter pattern looks like:

    /regex/

where I<regex> is a regular expression matching lines that are "boring" and
shouldn't be reported (sans the initial timestamp).

The following variables are recognized:

=over 4

=item alert

The address to which to mail the filtering results.  No mail will be sent if
all of the input lines are filtered out by the regexes provided.  This
variable must be set.

=item dir

The multilog directory to filter.  Note that the parent directory has to be
writable by the user running B<multilog-watch>, so that it can save its
timestamp for the last time the log was checked.  This variable must be set.

=item sender

The address from which to mail the filtering results (used for the envelope
sender and the To: header).  This variable must be set.

=item server

The server through which to send mail with qmail-remote.  This server should
be fairly reliable and rarely down, since if it is the mail may be lost.
This variable must be set.

=item subject

The value to use for the Subject: header of the filtering results.  This
variable must be set.

=back

If there are any input lines that don't match one of the filter rules, they
will be mailed to the value of I<alert> with a subject given by I<subject>.

=head1 EXAMPLES

Use the configuration file /etc/leland/qmail.filter and filter the log
directory given in it:

    multilog-watch /etc/leland/qmail.filter

Here's a sample configuration file that filters out normal qmail messages
from the logs in /var/log/qmail and sends the result to root@example.com
with a Subject: header of "example syslog filter results", using
smtp.example.com as the sending server.  The return address will also be
root@example.com

    dir = /var/log/qmail
    alert = root@example.com
    sender = root@example.com
    server = smtp.example.com
    subject = example syslog filter results

    /^new msg \d+$/
    /^info msg \d+:/
    /^starting delivery \d+:/
    /^status:/
    /^delivery \d+: success:/
    /^end msg \d+$/

This particular configuration would mail to root@example.com every message
related to an unsuccessful mail delivery.

=head1 FILES

=over 4

=item F</etc/leland>

If the configuration file given on the command line isn't an absolute path,
it is looked for in this directory.  This default can be changed at the top
of this program.

=back

B<multilog-watch> creates a file named I<dir>.last where I<dir> is the name
of the directory that it's filtering, containing the timestamp of the last
successful filter run.  It reads this file if its present and ignores any
log messages before that time.  This is both an optimization to keep from
having to reparse logs and a way to keep from sending duplicate reports
about abnormal messages.

=head1 BUGS

B<multilog-watch> makes no attempt to handle leap seconds or to handle TAI
to UTC conversion correctly.  It was written with hard-coded adjustments for
systems where the system clock is in UTC and no leap seconds file is
present.  In that situation, software generating TAI64N timestamps will
generate timestamps with an epoch 10 seconds different than UTC, which
B<multilog-watch> adjusts for.

This is obviously not the correct approach, which would be to use logic
analogous to what libtai does.  It was just the expedient thing to do for my
immediate problem.

=head1 SEE ALSO

See L<http://cr.yp.to/daemontools.html> for information on multilog and the
rest of the daemontools package.

The current version of this program is available from its web page at
L<http://www.eyrie.org/~eagle/software/multilog-watch/>.

=head1 AUTHOR

Russ Allbery <rra@stanford.edu>

=head1 COPYRIGHT AND LICENSE

Copyright 2001, 2002, 2004 Board of Trustees, Leland Stanford Jr. University.

This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself.

=cut