#!/usr/bin/perl -w $|++; use DBI; use URI; use Memoize; memoize('string_to_location'); ## CONFIG ## my $DSN = 'dbi:mysql:httpd_logs'; my $DB_AUTH = 'username:passwd; my $OUTPUT = "/home/merlyn/Html/sitemap.gif"; my $OUTPUT_TMP = "$OUTPUT~"; my $DAY = 7; ## END CONFIG ## ## database phase my $dbh = DBI->connect($DSN, (split ':', $DB_AUTH), { RaiseError => 1 }); $dbh->do("SET OPTION SQL_BIG_TABLES = 1"); my $sth = $dbh->prepare(qq( select Referer, Url from requests where When > date_sub(now(), interval $DAY day) and ( Url not like '/%/%' or Url like '/perltraining/%' or Url like '/merlyn/%' or Url like '/cgi/%' or Url like '/perl/%' or Url like '/icons/%' or Url like '/books/%' ) and Url not like '%.jpg' and Url not like '%.gif' and Url not like '/perl/bigword%' and Host not like '%.stonehenge.%' and Vhost = 'web.stonehenge.com' and Referer is not null )); $sth->execute(); my %count; while (my ($referer, $url) = $sth->fetchrow_array) { $_ = string_to_location($_) for $referer, $url; ++$count{"$referer $url"}; } $dbh->disconnect(); ## end database phase ## set up output, yes must do these in this order open STDOUT, ">$OUTPUT_TMP" or die "Cannot create $OUTPUT_TMP: $!"; open STDOUT, "|/usr/local/bin/dot -Tgif" or die "Cannot fork: $!"; my $max = 0; $max < $_ and $max = $_ for values %count; print <<'END'; digraph d { ranksep = 0.5; nodesep = 0.1; node [ style=invis, width=0.1, height=0.5, fontname="helvetica", fontsize=12, ]; edge [ // arrowsize=0.5, fontname="helvetica",fontsize=10, ]; // mention these so they usually end up near the top "http://something"; // "/merlyn"; END for (keys %count) { my $count = $count{$_}; my $ratio = $count / $max; next if $ratio < 0.01; my $heat = $ratio ** 0.5; my $weight = sprintf "%.2f", $heat * 99 + 1; my $color = sprintf q{"%.2f,%.2f,%.2f"}, $heat*0.30+0.70, 0.95, 1; my ($src, $dst) = split; print qq{ "$src" -> "$dst" [weight=$weight, color=$color];\n}; } print <<'END'; } END close STDOUT or die "Something wrong with waiting: $!"; rename $OUTPUT_TMP, $OUTPUT or die "Cannot rename: $!"; ## end of program, start of subroutines sub string_to_location { my $uri = URI->new_abs(shift, "http://www.stonehenge.com/")->canonical; { if ($uri->scheme eq 'http') { return "unknown" unless defined $uri->host; if ($uri->host =~ /^(w3|www|web)\.stonehenge\.com$/i) { if ($uri->path_query =~ /^\/cgi\/go\/(.*)/s) { ## outbound link $uri = URI->new_abs("$1", "http://www.stonehenge.com"); redo; } if ($uri->path =~ /^(.*\/)index\.html/s) { $uri->path("$1"); } for ($uri->path) { return "/wt-column" if m{^/merlyn/WebTechniques/col\d\d}; return "/wt" if m{^/merlyn/WebTechniques}; return "/ur-column" if m{^/merlyn/UnixReview/col\d\d}; return "/ur" if m{^/merlyn/UnixReview}; return "/lm-column" if m{^/merlyn/LinuxMag/col\d\d}; return "/lm" if m{^/merlyn/LinuxMag}; return "/pt-page" if m{^/perltraining/.*html}; return "/pt" if m{^/perltraining}; return "/pictures" if m{^/merlyn/Pictures/}; return "/merlyn-other" if m{^/merlyn/.+}; return "/merlyn" if m{^/merlyn/}; return "/books" if m{^/books/}; return "/cgi/amazon" if m{^/cgi/amazon}; return "/cgi/wtsearch" if m{^/cgi/wtsearch}; return "/cgi" if m{^/(?:cgi|perl)/}; return "/" if $_ eq "/"; return "/(other)"; } } for ($uri->host) { return "http://geek-girl" if /geek-girl/; return "http://perl.com" if /perl\.com/; return "http://perl.org" if /(pm|perl)\.org/; } return "http://something"; } return "unknown"; } }