pjs/webtools/mozbot/BotModules/XMLLogger.bm

219 строки
7.7 KiB
Plaintext

# -*- Mode: perl; tab-width: 4; indent-tabs-mode: nil; -*-
################################
# XMLLogger Module #
################################
# Original Author: Matt Jones
# National Center for Ecological Analysis and Synthesis (NCEAS)
# University of California Santa Barbara
#
# This package creates an XML log file of the messages sent to IRC channels
# which mozbot has joined. The content that is logged can be selected using
# regular expression filters, although by default all messages are logged
package BotModules::XMLLogger;
use vars qw(@ISA);
@ISA = qw(BotModules);
1;
sub Help {
my $self = shift;
my ($event) = @_;
my $help = {
'' => 'This module keeps an XML log of channels.',
};
if ($self->isAdmin($event)) {
$help->{''} .= ' It can be configured to only accept messages matching certain patterns. The \'acceptedPatterns\' module variable is a list of regular expressions to use when determining what to log. The \'blockedPatterns\' list is the opposite.';
$help->{'rotatelogs'} = 'Creates a new log file for each channel and moves the old one to a date-stamped version, making sure that the XML is valid. Syntax: \'rotatelogs\'.';
}
return $help;
}
# RegisterConfig - Called when initialised, should call registerVariables
sub RegisterConfig {
my $self = shift;
$self->SUPER::RegisterConfig(@_);
$self->registerVariables(
# [ name, save?, settable? ]
['acceptedPatterns', 1, 1, ['']], # by default match everything
['blockedPatterns', 1, 1, []], # by default block nothing
);
}
sub Told {
my $self = shift;
my ($event, $message) = @_;
if ($self->isAdmin($event)) {
if ($message =~ /^\s*rotate\s*logs?\s*$/osi) {
$self->RotateLogs($event);
} else {
return $self->SUPER::Told(@_);
}
} else {
return $self->SUPER::Told(@_);
}
return 0; # we've dealt with it, no need to do anything else.
}
sub Log {
my $self = shift;
my ($event) = @_;
if (($event->{'firsttype'} eq 'Told') or
($event->{'firsttype'} eq 'Heard')) {
$self->DoLog($event, 'msg');
} elsif (($event->{'firsttype'} eq 'Felt') or
($event->{'firsttype'} eq 'Saw')) {
$self->DoLog($event, 'emote');
} elsif (($event->{'firsttype'} eq 'SpottedKick') or
($event->{'firsttype'} eq 'Kicked')) {
$self->DoLog($event, 'kick');
} elsif ($event->{'firsttype'} eq 'SpottedPart') {
$self->DoLog($event, 'part');
} elsif ($event->{'firsttype'} eq 'SpottedQuit') {
$self->DoLog($event, 'quit');
} elsif ($event->{'firsttype'} eq 'SpottedJoin') {
$self->DoLog($event, 'join');
} elsif ($event->{'firsttype'} eq 'SpottedNickChange') {
$self->DoLog($event, 'nick');
} elsif ($event->{'firsttype'} eq 'ModeChange') {
$self->DoLog($event, 'mode');
} elsif ($event->{'firsttype'} eq 'SpottedTopicChange') {
$self->DoLog($event, 'topic');
} # XXX should log notices
return $self->SUPER::Log(@_);
}
sub DoLog {
my $self = shift;
my ($event, $messageType) = @_;
if ($event->{'channel'} ne '') { # don't log private messages
foreach my $pattern (@{$self->{'acceptedPatterns'}}) {
my $regexp = $self->sanitizeRegexp($pattern);
if (($regexp eq '') ||
($event->{'fulldata'} =~ /$regexp/s) ||
($event->{'from'} =~ /$regexp/s)) {
# wohay, we have a candidate!
# now check for possible blockers...
unless ($self->isBlocked($event)) {
$self->WriteMessage($event->{'channel'},
$event->{'from'},
$event->{'fulldata'},
$messageType);
return; # only store each message once, regardless of how many patterns it matches
}
}
}
}
}
sub isBlocked {
my $self = shift;
my ($event) = @_;
foreach my $blockedPattern (@{$self->{'blockedPatterns'}}) {
my $regexp = $self->sanitizeRegexp($blockedPattern);
if ($event->{'data'} =~ /$regexp/s) {
return 1;
}
}
return 0;
}
sub WriteMessage {
my $self = shift;
my ($channel, $from, $message, $messageType) = @_;
# Open the log file and append the message
$channel = $self->sanitiseChannelName($channel);
my $logName = $self->getLogFilename("$channel.xml.part");
if (open(LOG, ">>$logName")) {
my $msgtime = $self->logdate(time()); # XXX use $event->{'time'}
# sanitise the output
$_ = $self->escapeXML($_) for ($messageType, $channel, $from, $msgtime, $message);
print LOG "<$messageType channel=\"$channel\" nick=\"$from\" time=\"$msgtime\">$message</$messageType>\n";
close(LOG);
} else {
$self->debug("Error logging, failed to open log $logName");
}
}
sub RotateLogs {
my $self = shift;
my ($event) = @_;
my $errors = 0;
foreach my $channel (@{$self->{'channels'}}) {
$self->debug("Rotating log for $channel...");
# XXX could (optionally) output message to channel saying so
$errors += $self->RotateLogFile($event, $channel);
}
$errors = $errors == 1 ? "$errors error" : "$errors errors";
$self->say($event, "Finished rotating logs, $errors.");
}
sub RotateLogFile {
my $self = shift;
my ($event, $channel) = @_;
# create new names
$channel = $self->sanitiseChannelName($channel);
my $time = $self->filedate(time()); # XXX should use $event->{'time'}
my $partName = $self->getLogFilename("$channel.xml.part");
my $finalName = $self->getLogFilename("$channel-$time.xml");
# try to finalise file
if (-e $finalName) {
$self->debug("error rotating log for $channel, destination already existed");
return 1; # report error
} elsif (not (-e $partName and -s $partName)) {
$self->debug("skipping $channel log rotation, log was empty");
return 0; # not an error condition
} elsif (open(FinalLog, ">$finalName")) {
# opened new file, add the XML and copy the data over
print FinalLog "<?xml version=\"1.0\"?>\n"; # XXX optional -- do we really want to add this?
print FinalLog "<irclog>\n";
open(PartLog, "<$partName"); # XXX error checking
while (defined($_ = <PartLog>)) {
print FinalLog;
}
close(PartLog);
print FinalLog "</irclog>";
close(FinalLog);
unlink($partName); # delete the part log, ready for new data
} else {
$self->debug("error rotating log for $channel, failed to open $finalName");
return 1; # doh, report error
}
return 0
}
# logdate: return nice looking date and time stamp
sub logdate {
my $self = shift;
my ($sec, $min, $hour, $mday, $mon, $year) = gmtime(shift or time);
return sprintf("%d-%02d-%02dT%02d:%02d:%02dZ", $year + 1900, $mon + 1, $mday, $hour, $min, $sec);
}
# return a date and time stamp suitable for file names
sub filedate {
my $self = shift;
my ($sec, $min, $hour, $mday, $mon, $year) = gmtime(shift or time);
return sprintf('%d%02d%02d-%02d%02d%02d', $year + 1900, $mon + 1, $mday, $hour, $min, $sec);
}
sub sanitiseChannelName {
my $self = shift;
my($channel) = @_;
$channel =~ s/([^\#&+a-zA-Z0-9-])//gosi; # sanitize
$channel =~ m/^(.*)$/os; # detaint
return $1;
}
# escape XML characters as needed
sub escapeXML {
my $self = shift;
my ($string) = @_;
$string =~ s/&/&amp;/gos;
$string =~ s/'/&apos;/gos;
$string =~ s/"/&quot;/gos;
$string =~ s/</&lt;/gos;
$string =~ s/>/&gt;/gos;
return $string;
}