pjs/webtools/mozbot/BotModules/WWW.bm

135 строки
4.9 KiB
Plaintext

################################
# WWW Module #
################################
package BotModules::WWW;
use vars qw(@ISA);
@ISA = qw(BotModules);
1;
# RegisterConfig - Called when initialised, should call registerVariables
sub RegisterConfig {
my $self = shift;
$self->SUPER::RegisterConfig(@_);
# $self->registerVariables(
# # [ name, save?, settable? ]
# ['x', 1, 1, 0],
# );
}
sub Help {
my $self = shift;
my ($event) = @_;
return {
'' => 'The WWW module provides a web interface.',
'wwwsize' => 'Reports on the size of a webpage. Syntax: \'wwwsize http://...\'',
'wwwlint' => 'Reports on whether the webpage contains any obvious (I mean _really_ obvious) no-nos like <layer> or document.all. Syntax: \'wwwlint http://...\'',
'wwwdoctype' => 'Reports on the doctype of a webpage. (Warning: Does not check that the doctype is not commented out!) Syntax: \'wwwdoctype http://...\'',
'wwwtitle' => 'Tries to heuristically determine a web page\'s title. Syntax: \'wwwtitle http://...\'',
};
}
sub Told {
my $self = shift;
my ($event, $message) = @_;
if ($message =~ /^\s*wwwsize\s+(.+?)\s*$/osi) {
$self->Fetch($event, $1, 'size');
} elsif ($message =~ /^\s*wwwlint\s+(.+?)\s*$/osi) {
$self->Fetch($event, $1, 'lint');
} elsif ($message =~ /^\s*wwwdoctype\s+(.+?)\s*$/osi) {
$self->Fetch($event, $1, 'doctype');
} elsif ($message =~ /^\s*wwwtitle\s+(.+?)\s*$/osi) {
$self->Fetch($event, $1, 'title');
} else {
return $self->SUPER::Told(@_);
}
return 0; # dealt with it...
}
sub Fetch {
my $self = shift;
my ($event, $uri, $type) = @_;
$self->getURI($event, $uri, $type);
}
sub GotURI {
my $self = shift;
my ($event, $uri, $output, $type) = @_;
my $chars = length($output);
if ($type eq 'size') {
if ($chars) {
$self->say($event, "$uri is $chars bytes long.");
} else {
$self->say($event, "$uri is either empty, or I could not download it.");
}
} elsif ($type eq 'lint') {
# ignore whether things are commented out or not.
unless ($chars) {
$self->say($event, "$uri is either empty, or I could not download it.");
} else {
my @status;
if ($output =~ /document\.all/os) {
push(@status, 'document.all');
}
if ($output =~ /document\.layers/os) {
push(@status, 'document.layers');
}
if ($output =~ /<i?layer/osi) {
push(@status, 'the <layer> tag');
}
if (@status) {
my $status = shift(@status);
if (@status) {
while (scalar(@status) > 1) {
$status .= ', '.shift(@status);
}
$status .= ' and '.shift(@status);
}
$self->say($event, "$uri contains $status.");
} else {
$self->say($event, "$uri doesn't have any _obvious_ flaws..."); # XXX doesn't work! try php.net
}
}
} elsif ($type eq 'doctype') {
# assume doctype is not commented.
unless ($chars) {
$self->say($event, "$uri is either empty, or I could not download it.");
} elsif ($output =~ /(<!DOCTYPE\s[^>]*>)/osi) {
my $doctype = $1;
$doctype =~ s/[\n\r]+/ /gosi;
# -- #mozilla was here --
# <Hixie> it would break 99% of the web if we didn't do it that way.
# <Hixie> including most of my test cases ;-)
# <dbaron> test cases don't matter...
# <dbaron> you'll fix them if we decide they're wrong
# <dbaron> but the web is a problem
if (length($doctype) > 220) { # arbitrary length greater than two 80 character lines
$self->say($event, "$uri has a very long and possibly corrupted doctype (maybe it has an internal subset).");
} else {
$self->say($event, "$uri has the following doctype: $doctype");
}
} else {
$self->say($event, "$uri has no specified doctype.");
}
} elsif ($type eq 'title') {
# assume doctype is not commented.
unless ($chars) {
$self->say($event, "$uri is either empty, or I could not download it.");
} elsif ($output =~ /<title\s*>(.*?)<\/title\s*>/osi or
$output =~ /<h1\s*>(.*?)<\/h1\s*>/osi) {
my $title = $1;
$title =~ s/\s+/ /gosi;
if (length($title) > 100) { # arbitrary length
$title = substr($title, 0, 100) . '...';
}
$self->say($event, "$uri has the following title: $title");
} else {
$self->say($event, "$uri has no specified title.");
}
} else {
return $self->SUPER::GotURI(@_);
}
}