#!/home/merlyn/bin/perl -w $|++; use strict; use File::Basename; use File::Copy; use File::Find; use File::Path; use IO::File; my $TOP = "/home/merlyn/Html/merlyn/WebTechniques"; my $TOP_URL = "http://www.stonehenge.com/merlyn/WebTechniques"; my $OUT = "/home/merlyn/Web/Reltree/Out"; sub WITHIN { $_[0] =~ m{ ^\Qhttp://www.stonehenge.com/merlyn/WebTechniques/\E }xs; } ## "use MyParser;" ## BEGIN { package MyParser; use HTML::Parser; use HTML::Entities (); use URI::URL; use vars qw(@ISA); @ISA = qw(HTML::Parser); sub new { my $pack = shift; my $self = $pack->SUPER::new; @{$self}{qw(__base __out __within)} = @_; $self; } sub declaration { my $self = shift; my ($decl) = @_; $self->{__out}->print(""); } sub start { my $self = shift; my ($tag, $attr, $attrseq, $origtext) = @_; my $out = $self->{__out}; $out->print("<$tag"); for (keys %$attr) { $out->print(" $_=\""); my $val = $attr->{$_}; if ("$tag $_" =~ /^(a href|img src)$/) { $val = url($val)->abs($self->{__base},1); if ($self->{__within}->($val)) { $val = $val->rel($self->{__base}); } } $out->print(HTML::Entities::encode($val, '<>&"')); $out->print('"'); } $out->print(">"); } sub end { my $self = shift; my ($tag) = @_; $self->{__out}->print(""); } sub text { my $self = shift; my ($text) = @_; $self->{__out}->print("$text"); } sub comment { my $self = shift; my ($comment) = @_; $self->{__out}->print(""); } } ## end "use MyParser;" ## find(\&translate, "$TOP/./"); sub translate { return unless -f; (my $rel_name = $File::Find::name) =~ s{ .*/\./ }{}xs; my $src_url = "$TOP_URL/$rel_name"; my $out_file = "$OUT/$rel_name"; mkpath dirname($out_file), 1; print "... $out_file\n"; if (/\.html$/) { my $out = IO::File->new(">$out_file") or die "Cannot create $out_file: $!"; MyParser->new($src_url,$out,\&WITHIN)->parse_file($_); } else { copy $_, $out_file; } }