#!/usr/bin/perl -w use strict; $|++; use Net::NNTP; use WWW::Search; use LWP::Simple; use URI; ## CONFIG ## my $NOISY = 1; my @GROUPS = qw(comp.lang.perl.misc rec.humor.funny pdx.general comp.risks); my $DAYS_AGO_FROM = 21; my $DAYS_AGO_TO = 19; my %NNTP = ( 'in' => {host => 'news.inetXXXXX.com'}, 'te' => {host => 'localhost:1191', tunnel => 'ssh -f -q -L 1191:news.teleXXXX.com:119 teleXXXX.com sleep 180', }, 'ag' => {host => 'localhost:1190', tunnel => 'ssh -f -q -L 1190:herXXX.rXXXX.com:119 agXXX.rXXXX.com sleep 180', }, ); ## END CONFIG ## my %id; my $FROM = days_ago_to_deja_date($DAYS_AGO_FROM); my $TO = days_ago_to_deja_date($DAYS_AGO_TO); ## deja phase { my %seen; my $search = WWW::Search->new('Dejanews'); $search->native_query ("", { groups => join(',', @GROUPS), fromdate => $FROM, todate => $TO, }); $search->maximum_to_retrieve(10000); print "Deja: " if $NOISY; while (my $result = $search->next_result) { my $url = $result->url; my $uri = URI->new($url); my %query = $uri->query_form; next unless exists $query{AN}; print "." if $NOISY; my($an) = $query{AN} =~ /(\d+)/; next if $seen{$an}++; $uri->query_form(AN => "$an.1", fmt => 'raw'); next unless $_ = get "$uri"; next unless /^Message-ID:\s+(.*\S)\s*$/m; $id{$1}{DJ}++; } print "\n" if $NOISY; } ## alta phase { my $search = WWW::Search->new('AltaVista::AdvancedNews'); $search->native_query (join(" OR ", map "newsgroups:$_", @GROUPS), { d0 => days_ago_to_alta_date($DAYS_AGO_FROM), d1 => days_ago_to_alta_date($DAYS_AGO_TO), }); $search->maximum_to_retrieve(10000); print "Alta: " if $NOISY; while (my $result = $search->next_result) { my $url = $result->url; print "." if $NOISY; $url =~ s/news\?msg/news?plain\@msg/; next unless $_ = get $url; next unless /^Message-ID:\s+(.*\S)\s*$/m; $id{$1}{AV}++; } print "\n" if $NOISY; } my @msg_id = map { $_->[0] } sort { $a->[2] cmp $b->[2] or $a->[1] cmp $b->[1] or $a->[0] cmp $b->[0] } map { /(.*)\@(.*)/ ? [$_, $1, $2] : [$_, "", ""] } keys %id; ## nntp phase for my $short_host (sort keys %NNTP) { my %INFO = %{$NNTP{$short_host}}; if (my $tun = $INFO{tunnel}) { print "launching $tun\n" if $NOISY; system $tun; } my $c = Net::NNTP->new($INFO{host}); unless (defined $c) { warn "cannot connect to $short_host, skipping\n"; next; } if ($INFO{user}) { $c->authinfo($INFO{user},$INFO{pass}); } for my $msg_id (@msg_id) { print "$msg_id at $short_host: " if $NOISY; if ($c->nntpstat($msg_id)) { $id{$msg_id}{$short_host}++; print "yes" if $NOISY; } else { print "no" if $NOISY; } print "\n" if $NOISY; } } my @hosts = sort keys %NNTP; print "report from $FROM to $TO for @GROUPS\n"; for my $msg_id (@msg_id) { for my $host ("DJ","AV",@hosts) { print $id{$msg_id}{$host} ? $host : " "; print " "; } print "$msg_id\n"; } ## subroutines sub days_ago_to_deja_date { my $days = shift; my @gm = gmtime(time - 86400 * $days); return sprintf "%02d/%02d/%04d", 1 + $gm[4], $gm[3], 1900 + $gm[5]; } sub days_ago_to_alta_date { my $days = shift; my @gm = gmtime(time - 86400 * $days); return sprintf "%02d-%02d-%04d", $gm[3], 1 + $gm[4], 1900 + $gm[5]; }