[PATCH] suppress "Parsing of undecoded UTF-8 will give ..." warnings

[PATCH] suppress "Parsing of undecoded UTF-8 will give ..." warnings

am 01.06.2007 20:38:15 von miyagawa

Not sure if this works perfect, but the test makes sure it doesn't
emit warnings in UTF-8 webpages, even with the parse_head option on.

=== lib/LWP/Protocol.pm
============================================================ ======
--- lib/LWP/Protocol.pm (revision 5525)
+++ lib/LWP/Protocol.pm (local)
@@ -103,7 +103,16 @@
my $parser;
if ($parse_head && $response->content_type eq 'text/html') {
require HTML::HeadParser;
- $parser = HTML::HeadParser->new($response->{'_headers'});
+ require HTTP::Headers::Util;
+ $parser = HTML::HeadParser->new($response->{'_headers'});
+
+ if (my @ct =
HTTP::Headers::Util::split_header_words($response->header("C ontent-Type")))
{
+ my(undef, undef, %ct_param) = @{$ct[-1]};
+ if ($ct_param{charset} && $ct_param{charset} eq 'utf-8') {
+ $parser->utf8_mode(1);
+ }
+ }
+
}
my $content_size = 0;

=== t/live/utf8.t
============================================================ ======
--- t/live/utf8.t (revision 5525)
+++ t/live/utf8.t (local)
@@ -0,0 +1,18 @@
+use strict;
+use warnings;
+
+use Test::More tests => 2;
+use LWP::UserAgent;
+
+my $warnings;
+local $SIG{__WARN__} = sub { $warnings .= "@_" };
+
+my $url = "http://ja.wikipedia.org/wiki/%E3%83%A1%E3%82%A4%E3%83%B3%E3 %83%9A%E3%83%BC%E3%82%B8";
+my $ua = LWP::UserAgent->new;
+my $res = $ua->get($url);
+
+like $res->header('Content-Type'), qr/charset=utf-8/;
+ok !$warnings, "No warnings";
+
+
+


--
Tatsuhiko Miyagawa