LWP::Parallel::RobotUA

LWP::Parallel::RobotUA

am 23.03.2006 07:38:47 von Peter Hill

Hi,
I'm trying to get web documents returned for analysis using the RobotUA part
of LWP::Parallel, but for some reason the callback function never completes;
specifically in the sample code below (output at end) the line
print "We never get here.\n";
is never executed, which is where I would expect to call my analysis code.
What dumb error am I committing?

#! /usr/bin/perl -w
use strict;
use LWP::Parallel::RobotUA qw(:CALLBACK);
my $MAX_SIZE = 100000; #bytes

my $ua = LWP::Parallel::RobotUA->new('foobar/1.0','foo@bar.com');
$ua -> delay(0.5);
$ua -> in_order (1); # handle requests in order of registration
$ua -> duplicates(0); # ignore duplicates
$ua -> timeout (2); # in seconds
$ua -> redirect (1); # follow redirects
$ua -> max_hosts(5);
$ua -> max_req(5);

# register initial request
addURL('http://www.cpan.org/');
# this is the main (implicit) loop
my $something = $ua -> wait(15);

sub callback_for_parse {
my ($content, $response, $protocol, $entry) = @_;
print "handling answer from ",$response->request->url,": ",
length($content), " bytes, Code ", $response->code, ", ",
$response->message,"\n";
if (length $content) {
print "... received chunk ",length($content)," bytes, type
".$response->content_type."\n";
$response->add_content($content);
if (length($response->content) < $MAX_SIZE and $response->content_type
=~ /text\/html/i) {
print "... returning ",length($content)."\n";
# print "content is :".$content."\n";
print "response is :".$response."\n";
print "protocol is :".$protocol."\n";
print "entry is :".$entry."\n";
return length $content;
}
else{
print "oversize or not text/html: content-type is ".$response ->
content_type."\n";
}
}
print "We never get here.\n";
return C_ENDCON;
}

sub addURL {
my $url = shift;
my $request = new HTTP::Request('GET', $url);
$ua -> register($request,\&callback_for_parse);
print "... registered request for $url\n";
}

# output
.... registered request for http://www.cpan.org/
handling answer from http://www.cpan.org/: 4138 bytes, Code 200, OK
.... received chunk 4138 bytes, type text/html
.... returning 4138
response is :HTTP::Response=HASH(0x155b87c)
protocol is :LWP::Parallel::Protocol::http=HASH(0x2951c18)
entry is :LWP::Parallel::UserAgent::Entry=HASH(0x28e8da8)
handling answer from http://www.cpan.org/: 1665 bytes, Code 200, OK
.... received chunk 1665 bytes, type text/html
.... returning 1665
response is :HTTP::Response=HASH(0x155b87c)
protocol is :LWP::Parallel::Protocol::http=HASH(0x2951c18)
entry is :LWP::Parallel::UserAgent::Entry=HASH(0x28e8da8)