#!/home/merlyn/bin/perl -Tw use strict; $|++; BEGIN { $SIG{__DIE__} = sub { print "Status: 404 Not Found\nContent-type: text/plain\n\n"; print "Perl error was $_[0]\n"; exit 0; } } use LWP; use HTTP::Cookies; my $JARFILENAME = "/tmp/merlyn.cookie.jar"; my $ua = LWP::UserAgent->new; $ua->env_proxy; my $jar = HTTP::Cookies->new(File => $JARFILENAME, AutoSave => 1); my $url = $ENV{PATH_INFO}; die "missing PATH_INFO" unless defined $url; die "malformed PATH_INFO: $url" unless $url =~ s/^\///; my $query = $ENV{QUERY_STRING}; $url .= "?$query" if defined $query and length $query; ## it'd be nice if there were a nicer way to do this my $script_name = substr($ENV{SCRIPT_URI}, 0, -length $ENV{PATH_INFO}); my $req = HTTP::Request->new($ENV{REQUEST_METHOD} => $url); if ($ENV{CONTENT_LENGTH}) { my $buf; read(STDIN, $buf, $ENV{CONTENT_LENGTH}); $req->content($buf); } if ($ENV{CONTENT_TYPE}) { $req->content_type($ENV{CONTENT_TYPE}); } for (sort grep !/^HOST$/, map /^HTTP_(.*)/, keys %ENV) { $req->header($_, $ENV{"HTTP_$_"}); } $jar->add_cookie_header($req); my $response = $ua->simple_request($req); $jar->extract_cookies($response); $response->scan(sub { my ($h, $v) = @_; $response->remove_header($h) if $h =~ /^(X-Meta-|Content-Length$)/i or $v =~ /\n/; }); if ($response->is_redirect) { my $loc = $response->header("Location"); $response->header(Location => "$script_name/$loc"); } print $response->headers_as_string; print "\n"; if ($response->content_type eq "text/html") { eval join "", ; die if $@; my $p = MyFilter->new($url, $script_name); $p->parse($response->content); $p->eof; } else { print $response->content; } __END__ { # begin embedded package package MyFilter; require HTML::Parser; require HTML::Entities; require URI::URL; use vars qw(@ISA); @ISA = qw(HTML::Parser); my %linkElements = # from HTML::Element.pm ( body => 'background', base => 'href', a => 'href', img => [qw(src lowsrc usemap)], # lowsrc is a Netscape invention form => 'action', input => 'src', 'link' => 'href', # need quoting since link is a perl builtin frame => 'src', applet => 'codebase', area => 'href', ); my %tag_attr; for my $tag (keys %linkElements) { my $tagval = $linkElements{$tag}; for my $attr (ref $tagval ? @$tagval : $tagval) { $tag_attr{"$tag $attr"}++; } } sub new { my $pack = shift; my $self = $pack->SUPER::new(); $self->{Url} = shift; $self->{ScriptName} = shift; $self; } ## some items stolen from HTML::Filter sub output { print $_[1]; } sub declaration { $_[0]->output("") } sub comment { $_[0]->output("") } sub text { $_[0]->output($_[1]) } sub end { $_[0]->output("") } sub start { my $self = shift; my ($tag, $attr, $attrseq, $origtext) = @_; $self->output("<$tag"); for (keys %$attr) { $self->output(" $_=\""); my $val = $attr->{$_}; if ($tag_attr{"$tag $_"}) { # needs rewrite $val = URI::URL::url($val)->abs($self->{Url},1); # make absolute if ($val->scheme eq "http") { $val = $self->{ScriptName} . "/$val"; # force return to us } } $self->output(HTML::Entities::encode($val, '<>&"')); $self->output('"'); } $self->output(">"); } } # end embedded package