#!/usr/bin/perl use XML::XSH; xsh <<'END_XSH'; recovering 1; # for broken entity recovery (a frequent HTML problem) quiet; # avoid tracing of open open HTML animals = "http://www.oreilly.com/animals.html"; foreach {1..2} { foreach //table[not(.//table) and contains(tr[1]/td[$__], "Book Title") ]/tr[position() > 1] { # pwd; $cover = string(td[last()]); $subject = string(td[last() - 1]); eval { push @{$cover{$cover}}, $subject; } } } create t1 root; foreach {sort keys %cover} { ## print "animal $__"; insert element cover into /root; cd /root/cover[last()]; insert element animal into .; insert text $__ into animal; foreach {sort @{$cover{$__}}} { ## print "book $__"; insert element book into .; insert text $__ into book[last()]; } } quiet; # avoid final message from ls ls /; END_XSH