#!/usr/bonsaitools/bin/perl -w # -*- Mode: perl; indent-tabs-mode: nil -*- # # The contents of this file are subject to the Mozilla Public # License Version 1.1 (the "License"); you may not use this file # except in compliance with the License. You may obtain a copy of # the License at http://www.mozilla.org/MPL/ # # Software distributed under the License is distributed on an "AS # IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or # implied. See the License for the specific language governing # rights and limitations under the License. # # The Original Code is NewsBot # # The Initial Developer of the Original Code is Netscape Communications # Corporation. Portions created by Netscape are # Copyright (C) 1998 Netscape Communications Corporation. All # Rights Reserved. # # Contributor(s): Dawn Endico # Harvest pointers to news articles and their summaries from mailbox file # and write html and rdf files from it. # # usage: newsbot [mailfile1, mailfile2...] [rdffile] # each mail file is standard mbox format such as a sendmail spool file. # rdffile is where to put the generated rdf. # output is written to standard output. require 5.00397; use strict; use Mail::Folder::Mbox; use Mail::Address; my $rdffile = pop (@ARGV); #name of file to write rdf data unless (@ARGV) { # command line argument should be list of mail files to # process. If none given, use this file. my $mailfile = "/var/mail/newsbot"; die("No mail\n") if (!-f $mailfile); push(@ARGV, $mailfile); } for my $file (@ARGV) { my $folder = new Mail::Folder('AUTODETECT', $file); unless ($folder) { warn("can't open $folder: $!"); next; } printheader(); my %articlehash; my @articles; my $index=0; for my $msg (sort { $a <=> $b } $folder->message_list) { my $entity = $folder->get_mime_message($msg); my $submitter = $entity->get('From'); chomp($submitter); $submitter =~ s/&/&/g; $submitter =~ s//>/g; my $submitdate = $entity->get('Date'); chomp($submitdate); # # This is important for weeding out junk. Submissions must be multipart # mime messages as created by Messenger when forwarding a news article. # the first part is text/html or text/plain. The second part should be # type message/rfc822. This format preserves the header information of # the original news article (especially the Message-ID). It also makes # it more difficult for random junk mailed to newsbot to litter the # web page. We don't want someone to post an article, cc newsbot and # have a big thread begin where the rest of the messages continue # being cc'd to newsbot. # # 99-05-27: allow message/news in addition to message/rfc822 for the # second part as this is what communicator 3.x uses. -endico if ($entity->parts < 2) { next; } my @parts = $entity->parts; if ( !($parts[0]->head->mime_type =~ /text\/html/) && !($parts[0]->head->mime_type =~ /text\/plain/) ) { next; } if ( !( ($parts[1]->head->mime_type =~ /message\/rfc822/) || ($parts[1]->head->mime_type =~ /message\/news/) ) ) { next; } my $IO; my $summary = ""; if ($IO = $entity->parts(0)->open("r")) { $summary = $summary . $_ while (defined($_ = $IO->getline)); $IO->close; if ( $entity->parts(0)->head->mime_type =~ /text\/plain/ ) { # line beginning with -- is a signature seperator. Delete the sig $summary =~ s/^--.*//ms; $summary =~ s/&/&/g; $summary =~ s//>/mg; $summary =~ s/(http:\/\/([\S])+)/$1<\/A>/mg; $summary =~ s/(ftp:\/\/([\S])+)/$1<\/A>/mg; $summary =~ s/<(([\S])+@([\S])+)>/<$1<\/A>>/mg; } } my $news = ""; if ($IO = $entity->parts(1)->open("r")) { $news = $news . $_ while (defined($_ = $IO->getline)); $IO->close; } # check to make sure this is a news article. If not, skip it. $news =~ /^Newsgroups: ([^\n]+)/m ; my $newsgroups = $1; if (!$newsgroups) { next; } $newsgroups =~ s/(netscape.public.mozilla.([\w-.])+)/\n\n $1<\/A>/g; $news =~ /^Message-ID: <([^>]+)/m; my $MID = $1; $news =~ /^From: ([^\n]+)/m; my $from = $1; $news =~ /^Subject: ([^\n]+)/m; my $subject = $1; $subject =~ s/^Re://ig; $subject =~ s/^Fwd://ig; $subject =~ s/&/&/g; $subject =~ s//>/g; $news =~ /^Date: ([^\n]+)/m; my $date = $1; my %article; if (! %articlehash->{"$MID"}) { %articlehash->{"$MID"}=\%article; $index += 1; $articles[$index]=\%article; } %article->{'Message-ID'} = $MID; %article->{'Subject'} = $subject; %article->{'Date'} = $date; %article->{'From'} = $from; %article->{'Newsgroups'} = $newsgroups; %article->{'Summary'} = $summary; %article->{'submitter'} = $submitter; %article->{'submitdate'} = $submitdate; $entity->purge; } #for loop $folder->close; for (my $i=$index; $i > 0 ; $i--) { printarticle ($articles[$i]); } printfooter(); if ($rdffile) { printrdf (\@articles, $rdffile); } } sub printrdf() { my ($ref, $rdffile) = @_; my @articles = @{$ref}; unless (open (RDFFILE,">$rdffile") ){ die "Couldn\'t open rdf file:\"$rdffile\"\n"; } select RDFFILE; my $header =<<'RDFHEAD'; Mozilla NewsBot http://www.mozilla.org/newsbot/ Pointers to the hottest mozilla newsgroup threads. Mozilla http://www.mozilla.org/images/hack.gif http://www.mozilla.org/newsbot/ RDFHEAD print $header; my $index = @articles - 1; # only print newest 15 articles my $min = 0; if ($index > 15) { $min = $index - 15; } for (my $i=$index; $i > $min ; $i--) { print (" \n"); print (" " . $articles[$i]->{'Subject'} . "\n"); print (" http://www.deja.com/[LB=http://www.mozilla.org/]/msgid.xp?MID=<" . $articles[$i]->{'Message-ID'} . ">\n"); print (" \n\n"); } print "\n"; } #end printrdf() sub printarticle() { my ($artref) = @_; my %article = %{$artref}; print "\n

\n"; print "{'Message-ID'} . "\">\n"; print "\n"; print "
\n"; print %article->{'Subject'} ."\n"; print "\n"; print "
\n"; print %article->{'Summary'}; print "
\n"; print "\nPosted: " . %article->{'Date'} ."\n"; print "
"; print %article->{'Newsgroups'} . "\n"; print "
\n"; print "{'Message-ID'} . ">\">\n"; print "View Article -\n"; print "{'Message-ID'} ."%3e%231/1\">\n"; print "View Thread\n"; print "\n"; print "\n"; print "\n"; print "
\n"; } sub printheader() { my $header =<<'ENDHEAD'; newsbot

newsbot

Since not everyone has a chance to keep up with all the mozilla news groups, newsbot is here to collect pointers to some of the more important announcements, discussions, and goings-on.

When you see an article of interest to the general mozilla community forward it to newsbot@mozilla.org and write a summary of the article. Newsbot will add your summary to this page and make pointers back to the original article and its thread in DejaNews. For My Netcape users we also have a channel for newsbot.

For Netscape Communicator users, this means pressing the Forward button and writing a summary in the message window. (Forwarding as "quoted" or "inline" confuses newsbot. Be sure to forward as attachment.) For users of other clients, the forwarded message should be a multipart MIME message where the first part is text/plain or text/html and contains your summary, and the second part is type message/rfc822 and contains the news article.

ENDHEAD print $header } sub printfooter() { my $footer =<<'ENDFOOT';

Send newsbot feedback to Dawn Endico. ENDFOOT print $footer; }