Sorry, Darryl and all others who tried my script. - It was just the wrong version with a bug. Here's my latest beta version of 'blowjob.pl' (sorry for the name as well :) which should work now. It reads all urls listed in a textfile to the local hard disk and now produces all neccesary folders, so that the local documents and imgs should have the right paths. But there's a problem: It produces memory leaks (system heap) and I Don't know why?! Sometimes it even crashes! --- Help is appreciated. (Does it have something to do with parameter passing to subroutines and the 'my' declarations?) 'Blowjob' is free, modify it as you like and forget about the copyright notice... ;-) The next version will support various command line options, but this probably only will make sense if somebody is able to fix the memory problem... Greetings, Erich. -------------------------------------------------------------<snip #! /usr/bin/perl use LWP::UserAgent; use File::Basename; use URI::URL; use HTML::LinkExtor; =head1 NAME B<Blowjob> - a simple http sucker =head1 SYNOPSIS I<Usage:> blowjob [<filename>] <filename> is a file that contains a list of URLs separated by newline. Default <filename> is 'blowjob.job'. =head1 DESCRIPTION Reads a number of http-documents from www-servers into a local default folder. The URLs to read from are specified in the input file on the command line. =cut ### startup $version = "0.37 beta"; &say( "Blowjob/$version (c) 1996 by Erich Rast\n\n" ); ### filename and path & misc $file = shift; $file = 'blowjob.job' unless $file; # default input file die "Too many arguments.\n" if ( @ARGV ); # more than 1 argument ? $home = `pwd`; chop($home); my $priority = 777; # read/write/execute for everybody! use Config; if ( $Config{ 'osname' } =~ /^macos/i ) { $dir_sep = ':'; $dir_up = ':'; } else { if ( $Config{ 'osname' } =~ /^msdos/i ) { $dir_sep = '\\'; } else { $dir_sep = '/' } $dir_up = '..'; } ### create user agent $ua = new LWP::UserAgent; $ua->agent("Blowjob/$version"); ### set up parser callback routine sub callback { my ($tag, %attr) = @_; return if ( $tag ne 'img' and $tag ne 'a' ); # forget all but links and images if ( $tag eq 'img' ) { push(@imgs, values %attr) } else { push(@links, values %attr) } } ### make parser $p = HTML::LinkExtor->new(\&callback); ################# ### main loop ### ################# # globals: $host, $job, $url, $res, $wd_count, $f_count, $d_count; $wd_count = $f_count = $d_count = 1; local $workdir; open( IN, $file ) || die "Cannot open jobfile '$file': $!\n"; binmode IN; JOB: while ( $job = <IN> ) { next JOB if ( $job =~ /^#/o ); chop ( $job ); $url = new URI::URL $job; if ( $workdir = $url->host ) { $workdir .= ('#' . ++$wd_count) if (-e $workdir); mkdir $workdir, $priority || say&("# Couldn't create '$workdir' with permission $priority: $!\n"); chdir $workdir || &say("# Couldn't chdir to '$workdir': $!\n"); &get_url( $url ); chdir $home || &say("# Couldn't chdir to '$home': $!\n"); } else { &say( "# Not a valid URL: '$job'\n" ); } } close IN; &say( "Done.\n" ); ### get_url( $url ) ### get a url and all images and local documents sub get_url { my( $url ) = @_; my $res = &http_get( $url ); return unless $res; my $base = $res->base; @imgs = (); @links = (); $p->parse( $res->content ); # parse for img and link tags @imgs = map { $_ = url($_, $base)->abs; } @imgs; @locals = map { $_ = url($_, $base)->abs; } &get_locals( @links ); foreach $local (@locals) { &http_get( $local, $base ); } foreach $img (@imgs) { &http_get( $img, $base ); } } ### get_locals( @any_links ) ==> @local_abs_links ### return all local @links sub get_locals { my @locals; foreach $link (@_) { unless ( $link =~ m|//|so ) { push @locals, $link } } # maybe map would be more elegant here return @locals; } ### http_get( $url, $base ) ==> $response_object ### send http GET request and return object received ### side effect: save received object to $url path without $base sub http_get { my ($url, $base ) = @_; my $req = new HTTP::Request GET => $url; my $res = $ua->request( $req ); if ( $res->is_error ) { &say("# Couldn't get <$url>: ", $res->code, " - ", $res->message, "\n" ); return 0; } $req = $res->request; $url = $req->url; # server may have returned changed url (eg. added '/' etc.) $base = $res->base unless $base; &write_file( $res, $url, $base ); return $res; } ### &write_file( $response_object, $url, $base ) ### write file to local disk from an absolute url and the base ### side effect: create the OS specific path if it does not yet exist sub write_file { my ($res, $url, $base) = @_; my $old_url = $url; $url =~ s/($base)(\S*)/$2/g; # get $url path without $base path $url =~ s|/|$dir_sep|g; # change seperator from unix to our OS my ($file, $path) = fileparse( $url); # decide path from filename my @components = split( $dir_sep, $path); # split path in single dirs my ($count, $i) = 0; # just to be sure foreach $dir (@components) { # create each dir and make it current $dir .= ('#' . ++$d_count) if (-e $dir); mkdir $dir, $priority || &say( "# Couldn't create '$dir' with permission $priority: $!\n" ); if ( chdir $dir ) { ++$count } else { &say("# Couldn't chdir to '$dir': $!\n") } } $file = 'index.html' unless $file; # default $file .= ('#' . ++$f_count) if (-e $file ); open( OUT, ">" . $file ) || &say( "# Couldn't write to '$file': $!\n" ); print OUT $res->content; close OUT; &say( "<$old_url> read as '$file'\n" ); for ($i = 0; $i = $count; $i++) { # go back to dir we came from chdir $dir_up || &say("# Couldn't chdir to '$dir_up': $!\n"); } # end of strange directory hacking } ### say( @comments ) ### display comments if necessary sub say { foreach $comment (@_) { print $comment } } __END__