#!/usr/bin/perl $version = '$Id: modcount,v 0.2 1996/11/14 05:28:07 eagle Exp $'; # # modcount -- Do statistical analysis of the breakdown of moderated groups. # Copyright 1996 Russ Allbery # # This program is free software; you can redistribute it and/or modify it # under the same terms as Perl itself. # # Syntax: # modcount [-8 | -h ] # # is a comma-separated list of top-level hierarchies to # check (no spaces). is the file name of an active file. -8 # specifies that the Big Eight should be checked. # # Edit the database at the end of this script to change how groups are # classified. require 5.001; use strict; use vars qw($version @out); # Parse the command line and determine the extent of our check. sub parse_opts { my @hierarchies; if ($ARGV[0] eq '-8') { @hierarchies = ('comp', 'humanities', 'misc', 'news', 'rec', 'sci', 'soc', 'talk'); shift @ARGV; } elsif ($ARGV[0] eq '-h') { shift @ARGV; @hierarchies = split (/,/, shift @ARGV) or die "$0: -h requires an argument\n"; } my $active = shift @ARGV or die "$0: no active file specified\n"; if (@hierarchies) { my $regex = '^(' . join ('|', @hierarchies) . ')\.'; return ($active, $regex); } else { return $active; } } # Parse the database at the end of the script. rule specifies # that all groups matching that pattern are non-discussion groups. except # d means that that particular group is a discussion group. except # a (or anything other than d) says that that group is a # non-discussion group. Anything not matched by an exception or a rule is # considered a discussion group. sub parse_data { my ($rules, $exceptions) = @_; while () { my @rule = split; if ($rule[0] eq 'rule') { push (@$rules, $rule[1]); } elsif ($rule[0] eq 'except') { $exceptions->{$rule[1]} = ($rule[2] =~ /^d/) ? 0 : 1; } } } # Main routine, which opens and parses the active file and builds a list of # groups placed into each category, as well as creating statistics for each # top-level hierarchy. { my ($active, $regex) = &parse_opts; my (@rules, %exceptions); &parse_data (\@rules, \%exceptions); my (%stats, @discuss, @nondiscuss); open (ACTIVE, $active) or die "cannot open active file $active: $!\n"; GROUP: while () { my @fields = split; if ($regex) { next unless $fields[0] =~ /$regex/o } my @components = split (/\./, $fields[0]); $stats{$components[0]}->[($fields[3] eq 'm') ? 1 : 0]++; next unless ($fields[3] eq 'm'); if (defined $exceptions{$fields[0]}) { if ($exceptions{$fields[0]}) { $stats{$components[0]}->[2]++; push (@nondiscuss, $fields[0]); } else { $stats{$components[0]}->[3]++; push (@discuss, $fields[0]); } } else { my $rule; foreach $rule (@rules) { if ($fields[0] =~ /\Q$rule/) { $stats{$components[0]}->[2]++; push (@nondiscuss, $fields[0]); next GROUP; } } $stats{$components[0]}->[3]++; push (@discuss, $fields[0]); } } my (@totals, $hierarchy); foreach $hierarchy (sort keys %stats) { my $total = $stats{$hierarchy}->[0] + $stats{$hierarchy}->[1]; @out = ($hierarchy, $stats{$hierarchy}->[0], sprintf ("%.1f", $stats{$hierarchy}->[0] / $total * 100), $stats{$hierarchy}->[2], sprintf ("%.1f", $stats{$hierarchy}->[2] / $total * 100), $stats{$hierarchy}->[3], sprintf ("%.1f", $stats{$hierarchy}->[3] / $total * 100)); write; $-++; $totals[0] += $total; $totals[1] += $out[1]; $totals[2] += $out[3]; $totals[3] += $out[5]; } print "\n"; @out = ('TOTALS', $totals[1], sprintf ("%.1f", $totals[1] / $totals[0] * 100), $totals[2], sprintf ("%.1f", $totals[2] / $totals[0] * 100), $totals[3], sprintf ("%.1f", $totals[3] / $totals[0] * 100)); write; print '-' x 67 . "\n\nNon-discussion groups:\n\n"; print join ("\n", sort @nondiscuss); print "\n\nDiscussion groups:\n\n"; print join ("\n", sort @discuss); } # Output formats to get a nice table listing. format STDOUT_TOP = Hierarchy unmod non-discuss discuss ------------------------------------------------------------------- . format STDOUT = @<<<<<<<<<<<< @>>> - @>>>>% @>>> - @>>>>% @>>> - @>>>>% $out[0], $out[1], $out[2], $out[3], $out[4], $out[5], $out[6] . __END__ rule .announce rule .answers rule .info rule .binaries. rule .sources. rule .reviews rule .creative rule .stories rule clari. except comp.ai.jair.papers n except comp.ai.nlang-know-rep n except comp.ai.vision n except comp.archives n except comp.bugs.4bsd.ucb-fixes n except comp.doc.techreports n except comp.home.misc n except comp.infosystems.kiosks d except comp.infosystems.www.authoring.cgi d except comp.internet.library n except comp.internet.net-happenings n except comp.lang.sigplan n except comp.laser-printers n except comp.mail.maps n except comp.networks.noctools.tools n except comp.newprod n except comp.org.eff.news n except comp.org.fidonet n except comp.patents n except comp.protocols.iso.x400.gateway n except comp.research.japan n except comp.risks n except comp.society n except comp.society.cu-digest n except comp.std.lisp n except comp.std.unix n except comp.sys.concurrent n except comp.sys.ibm.pc.digest n except comp.sys.m68k.pc n except comp.sys.mac.digest n except comp.theory.info-retrieval d except misc.activism.progressive n except misc.education.multimedia n except misc.health.injuries.rsi.moderated n except misc.news.bosnia n except misc.news.east-europe.rferl n except misc.news.southasia n except misc.test.moderated n except news.lists n except rec.arts.erotica n except rec.aviation.questions n except rec.boats.marketplace n except rec.food.recipes n except rec.games.frp.archives n except rec.humor.funny n except rec.humor.oracle n except rec.music.promotional n except rec.sport.baseball.analysis n except sci.astro.hubble n except sci.finance.abstracts n except sci.psychology.journals.psyche n except sci.psychology.journals.psycoloquy n except sci.space.news n except soc.politics n except soc.politics.arms-d n