#!/usr/bin/perl -w use strict; $|++; ## begin config my $CNN_URL = "http://headlinenews.cnn.com/QUICKNEWS/virtual/swf.headline.txt"; my $CNN_CACHE = "contest.cnn-cache"; # flat file my $DB_MEMORY = "contest.memory"; # dbmopen ## end config use CGI qw(:all -no_debug col thead tbody); use LWP::Simple qw(mirror); use Getopt::Long; GetOptions( "refresh=i" => \ (my $REFRESH = 10), # meta refresh time in minutes "output=s" => \ (my $OUTPUT = "index.html"), # output file "expire=i" => \ (my $EXPIRE = 1440), # expire time in minutes "clear!" => \ (my $CLEAR = 0), # clear the cache "<>" => sub { $Getopt::Long::error++; warn "Unknown arg: $_[0]\n" }, ) or die "see code for usage\n"; my @STATES = split /, |\n/, <<'end'; ALABAMA, ALASKA, ARIZONA, ARKANSAS, CALIFORNIA, COLORADO, CONNECTICUT, D.C. DELAWARE, FLORIDA, GEORGIA, HAWAII, IDAHO, ILLINOIS, INDIANA, IOWA, KANSAS KENTUCKY, LOUISIANA, MAINE, MARYLAND, MASSACHUSETTS, MICHIGAN, MINNESOTA MISSISSIPPI, MISSOURI, MONTANA, NEBRASKA, NEVADA, NEW HAMPSHIRE, NEW JERSEY NEW MEXICO, NEW YORK, NORTH CAROLINA, NORTH DAKOTA, OHIO, OKLAHOMA, OREGON PENNSYLVANIA, RHODE ISLAND, SOUTH CAROLINA, SOUTH DAKOTA, TENNESSEE, TEXAS UTAH, VERMONT, VIRGINIA, WASHINGTON, WEST VIRGINIA, WISCONSIN, WYOMING end { my $s = mirror($CNN_URL, $CNN_CACHE); last if $s == 200; # we got new data last if $s == 304; # no new data, but we have to expire things die "status is $s, aborting\n"; } dbmopen(my %DB, $DB_MEMORY, 0644) or die "Cannot dbmopen $DB_MEMORY: $!"; open STDIN, $CNN_CACHE or die "Cannot open $CNN_CACHE: $!"; open STDOUT, ">$OUTPUT~" or die "Cannot create $OUTPUT~: $!"; $CGI::Q = CGI->new(\*STDIN) or die "Cannot parse $CNN_CACHE\n"; %DB = () if $CLEAR; # bye bye all that we know ## first pass: add the new headlines for (my $i = 1; my $headline = param("headline$i"); $i++) { my $state = param("state$i"); my $key = "$state\n$headline"; if (defined $DB{$key}) { # just update modtime $DB{$key} =~ s/\s\d+/" " . time/e; } else { # add the entry $DB{$key} = time . " " . time; } } ## second pass: expire the old headlines for my $key (keys %DB) { delete $DB{$key} if $DB{$key} =~ /\s(\d+)/ and $1 < time - $EXPIRE * 60; } ## final pass: generate the report my @data = sort { $a->[0] cmp $b->[0] or $a->[2] <=> $b->[2] or $a->[1] cmp $b->[1] } map { [ (split /\n/), (split /\s+/, $DB{$_})[0] ] } keys %DB; my %states_seen; my @table_guts = map { my ($state, $headline, $stamp) = @$_; Tr( td($states_seen{$state}++ ? escapeHTMLnobreak($state) : a({-name => fixname($state)}, escapeHTMLnobreak($state))), td(escapeHTMLnobreak(((localtime $stamp) =~ /(.*)\s/)[0])), td(escapeHTML($headline)), )."\n"; } @data; print start_html(-title => "CNN Headline News", -head => meta({-http_equiv => 'refresh', -content => $REFRESH * 60})); print table({-border => 1, -cellspacing => 0, -cellpadding => 4}, col({-width => "0*", -align => 'right'}), # state col({-width => "0*"}), # date col({-width => "*"}), # item thead(Tr(th({-colspan => 3, -align => 'center'}, join " | ", map { $states_seen{$_} ? a({-href => fixname("#$_")}, escapeHTMLnobreak($_)) : escapeHTMLnobreak($_); } @STATES)."\n")), tbody(@table_guts)); print end_html; close STDOUT; rename "$OUTPUT~", $OUTPUT or die "Cannot rename $OUTPUT~ to $OUTPUT: $!"; exit 0; sub escapeHTMLnobreak { local $_ = escapeHTML("@_"); s/ / /g; $_; } sub fixname { local $_ = shift; tr/a-zA-Z\#/_/cs; $_; }