pjs/webtools/tinderbox/scrape.pl

#! /usr/bin/perl
# -*- Mode: perl; indent-tabs-mode: nil -*-
#
# The contents of this file are subject to the Mozilla Public License
# Version 1.1 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS"
# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
# License for the specific language governing rights and limitations
# under the License.
#
# The Original Code is Tinderbox
#
# The Initial Developer of the Original Code is Netscape Communications
# Corporation. Portions created by Netscape are Copyright (C) 1999
# Netscape Communications Corporation. All Rights Reserved.
#
# Contributor(s): Chris McAfee <mcafee@netscape.com>

#
# scrape.pl - Process log-scraped data into scrape.dat
#   Write data to $tree/scrape.dat in the following format,
#
#  <logfilename>|blurb1|blurb2|blurb3 ...
#

use strict;

sub usage {
  warn "./scrape.pl <tree> <logfile>";
}

use Compress::Zlib;
use lib "@TINDERBOX_DIR@";
require "tbglobals.pl";
my $debug = 0;

$ENV{PATH} = "@SETUID_PATH@";

unless ($#ARGV == 1) {
  &usage;
  die "Error: Wrong number of arguments\n";
}

my ($tree, $logfile) = @ARGV;

print "scrape.pl($tree, $logfile)\n" if ($debug);

$tree = &trick_taint($tree);
$logfile = &trick_taint($logfile);

die "Error: No tree named $tree" unless -r "$::tree_dir/$tree/treedata.pl";
require "$::tree_dir/$tree/treedata.pl";

# Search the build log for the scrape data
#
my $gz = gzopen("$::tree_dir/$tree/$logfile", "rb")
  or die "gzopen($::tree_dir/$tree/$logfile): $!\n";
my @scrape_data = find_scrape_data($gz);
$gz->gzclose();

if (!defined(@scrape_data)) {
    print "No scrape data found in log.\n" if ($debug);
    exit(0);
}

# Save the scrape data to 'scrape.dat'
#
my $lockfile = "$::tree_dir/$tree/scrape.sem";
my $lock = &lock_datafile($lockfile);
open(SCRAPE, ">>", "$::tree_dir/$tree/scrape.dat") or die "Unable to open $::tree_dir/$tree/scrape.dat";
print SCRAPE "$logfile|".join('|', @scrape_data)."\n";
close SCRAPE;
&unlock_datafile($lock);
unlink($lockfile);

#print "scrape_data = ";
#my $i;
#foreach $i (@scrape_data) {
#  print "$i ";
#}
#print "\n";


# end of main
#============================================================

sub find_scrape_data {
  my ($gz) = $_[0];
  local $_;
  my @rv;
  my @line;
  my ($bytesread, $gzline);
  while (defined($gz) && (($bytesread = $gz->gzreadline($gzline)) > 0)) {
      if ($gzline =~ m/TinderboxPrint:/) {
          # Line format:
          #  TinderboxPrint:<general html>
          
          # Strip off the TinderboxPrint: part of the line
          chomp($gzline);
          $gzline =~ s/.*TinderboxPrint://;
          
          # No longer use ; to create separate lines.
          #@line = split(';', $_);
          
          push(@rv, $gzline);
      }
  }
  return @rv;
}
Change /usr/bonsaitools/bin/perl to /usr/bin/perl [p=reed r=justdave] 2006-12-07 07:59:39 +03:00			`#! /usr/bin/perl`
General purpose version of bloat.pl 2001-11-02 08:22:00 +03:00			`# -- Mode: perl; indent-tabs-mode: nil --`
			`#`
			`# The contents of this file are subject to the Mozilla Public License`
			`# Version 1.1 (the "License"); you may not use this file except in`
			`# compliance with the License. You may obtain a copy of the License at`
			`# http://www.mozilla.org/MPL/`
			`#`
			`# Software distributed under the License is distributed on an "AS IS"`
			`# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the`
			`# License for the specific language governing rights and limitations`
			`# under the License.`
			`#`
			`# The Original Code is Tinderbox`
			`#`
			`# The Initial Developer of the Original Code is Netscape Communications`
			`# Corporation. Portions created by Netscape are Copyright (C) 1999`
			`# Netscape Communications Corporation. All Rights Reserved.`
			`#`
			`# Contributor(s): Chris McAfee <mcafee@netscape.com>`

			`#`
			`# scrape.pl - Process log-scraped data into scrape.dat`
			`# Write data to $tree/scrape.dat in the following format,`
			`#`
			`# <logfilename>\|blurb1\|blurb2\|blurb3 ...`
			`#`

Add 'use strict;' to expose globals. Remove uses of global $tree & $form. Remove tabs from showbuilds.pl Bug #359451 r=bear 2007-01-23 20:49:10 +03:00			`use strict;`

General purpose version of bloat.pl 2001-11-02 08:22:00 +03:00			`sub usage {`
			`warn "./scrape.pl <tree> <logfile>";`
			`}`

More fixes for setuid tinderbox mail: * Use Compress::Zlib instead of gunzip in warning & scrape scripts * Exit with error if warning or scrape scripts fail. * Do not make warning.pl or scrape.pl setuid Bug #344695 r=bear 2006-08-17 02:07:17 +04:00			`use Compress::Zlib;`
Use setuid scripts to handle mail. Bug #344695 r=bear 2006-07-17 21:14:35 +04:00			`use lib "@TINDERBOX_DIR@";`
More fixes for setuid tinderbox mail: * Use Compress::Zlib instead of gunzip in warning & scrape scripts * Exit with error if warning or scrape scripts fail. * Do not make warning.pl or scrape.pl setuid Bug #344695 r=bear 2006-08-17 02:07:17 +04:00			`require "tbglobals.pl";`
Process build mail via cron. Bug #354462 r=bear,justdave 2006-10-19 22:13:12 +04:00			`my $debug = 0;`
General purpose version of bloat.pl 2001-11-02 08:22:00 +03:00
Use setuid scripts to handle mail. Bug #344695 r=bear 2006-07-17 21:14:35 +04:00			`$ENV{PATH} = "@SETUID_PATH@";`
General purpose version of bloat.pl 2001-11-02 08:22:00 +03:00
			`unless ($#ARGV == 1) {`
			`&usage;`
			`die "Error: Wrong number of arguments\n";`
			`}`

Add 'use strict;' to expose globals. Remove uses of global $tree & $form. Remove tabs from showbuilds.pl Bug #359451 r=bear 2007-01-23 20:49:10 +03:00			`my ($tree, $logfile) = @ARGV;`
General purpose version of bloat.pl 2001-11-02 08:22:00 +03:00
Process build mail via cron. Bug #354462 r=bear,justdave 2006-10-19 22:13:12 +04:00			`print "scrape.pl($tree, $logfile)\n" if ($debug);`

More fixes for setuid tinderbox mail: * Use Compress::Zlib instead of gunzip in warning & scrape scripts * Exit with error if warning or scrape scripts fail. * Do not make warning.pl or scrape.pl setuid Bug #344695 r=bear 2006-08-17 02:07:17 +04:00			`$tree = &trick_taint($tree);`
			`$logfile = &trick_taint($logfile);`

Use $::tree_dir to allow tree data to be decoupled from tinderbox server directory. Bug #409052 r=bear 2007-12-19 23:00:54 +03:00			`die "Error: No tree named $tree" unless -r "$::tree_dir/$tree/treedata.pl";`
			`require "$::tree_dir/$tree/treedata.pl";`
General purpose version of bloat.pl 2001-11-02 08:22:00 +03:00
Bug 106386 Correct misspellings in source code r=bc rs=brendan 2006-11-02 23:21:50 +03:00			`# Search the build log for the scrape data`
General purpose version of bloat.pl 2001-11-02 08:22:00 +03:00			`#`
Use $::tree_dir to allow tree data to be decoupled from tinderbox server directory. Bug #409052 r=bear 2007-12-19 23:00:54 +03:00			`my $gz = gzopen("$::tree_dir/$tree/$logfile", "rb")`
			`or die "gzopen($::tree_dir/$tree/$logfile): $!\n";`
Add 'use strict;' to expose globals. Remove uses of global $tree & $form. Remove tabs from showbuilds.pl Bug #359451 r=bear 2007-01-23 20:49:10 +03:00			`my @scrape_data = find_scrape_data($gz);`
More fixes for setuid tinderbox mail: * Use Compress::Zlib instead of gunzip in warning & scrape scripts * Exit with error if warning or scrape scripts fail. * Do not make warning.pl or scrape.pl setuid Bug #344695 r=bear 2006-08-17 02:07:17 +04:00			`$gz->gzclose();`

			`if (!defined(@scrape_data)) {`
Only print 'No data' message when debugging. 2006-10-19 22:56:38 +04:00			`print "No scrape data found in log.\n" if ($debug);`
More fixes for setuid tinderbox mail: * Use Compress::Zlib instead of gunzip in warning & scrape scripts * Exit with error if warning or scrape scripts fail. * Do not make warning.pl or scrape.pl setuid Bug #344695 r=bear 2006-08-17 02:07:17 +04:00			`exit(0);`
			`}`
General purpose version of bloat.pl 2001-11-02 08:22:00 +03:00
			`# Save the scrape data to 'scrape.dat'`
			`#`
Use $::tree_dir to allow tree data to be decoupled from tinderbox server directory. Bug #409052 r=bear 2007-12-19 23:00:54 +03:00			`my $lockfile = "$::tree_dir/$tree/scrape.sem";`
Process build mail via cron. Bug #354462 r=bear,justdave 2006-10-19 22:13:12 +04:00			`my $lock = &lock_datafile($lockfile);`
Use $::tree_dir to allow tree data to be decoupled from tinderbox server directory. Bug #409052 r=bear 2007-12-19 23:00:54 +03:00			`open(SCRAPE, ">>", "$::tree_dir/$tree/scrape.dat") or die "Unable to open $::tree_dir/$tree/scrape.dat";`
General purpose version of bloat.pl 2001-11-02 08:22:00 +03:00			`print SCRAPE "$logfile\|".join('\|', @scrape_data)."\n";`
			`close SCRAPE;`
Process build mail via cron. Bug #354462 r=bear,justdave 2006-10-19 22:13:12 +04:00			`&unlock_datafile($lock);`
			`unlink($lockfile);`
General purpose version of bloat.pl 2001-11-02 08:22:00 +03:00
Tinderbox,aaa,bbb,ccc,ddd format. 2001-11-02 12:37:35 +03:00			`#print "scrape_data = ";`
			`#my $i;`
			`#foreach $i (@scrape_data) {`
			`# print "$i ";`
			`#}`
			`#print "\n";`

General purpose version of bloat.pl 2001-11-02 08:22:00 +03:00
			`# end of main`
			`#============================================================`

			`sub find_scrape_data {`
More fixes for setuid tinderbox mail: * Use Compress::Zlib instead of gunzip in warning & scrape scripts * Exit with error if warning or scrape scripts fail. * Do not make warning.pl or scrape.pl setuid Bug #344695 r=bear 2006-08-17 02:07:17 +04:00			`my ($gz) = $_[0];`
General purpose version of bloat.pl 2001-11-02 08:22:00 +03:00			`local $_;`
Tinderbox,aaa,bbb,ccc,ddd format. 2001-11-02 12:37:35 +03:00			`my @rv;`
			`my @line;`
More fixes for setuid tinderbox mail: * Use Compress::Zlib instead of gunzip in warning & scrape scripts * Exit with error if warning or scrape scripts fail. * Do not make warning.pl or scrape.pl setuid Bug #344695 r=bear 2006-08-17 02:07:17 +04:00			`my ($bytesread, $gzline);`
			`while (defined($gz) && (($bytesread = $gz->gzreadline($gzline)) > 0)) {`
			`if ($gzline =~ m/TinderboxPrint:/) {`
			`# Line format:`
			`# TinderboxPrint:<general html>`

			`# Strip off the TinderboxPrint: part of the line`
			`chomp($gzline);`
			`$gzline =~ s/.*TinderboxPrint://;`

			`# No longer use ; to create separate lines.`
			`#@line = split(';', $_);`

Process build mail via cron. Bug #354462 r=bear,justdave 2006-10-19 22:13:12 +04:00			`push(@rv, $gzline);`
More fixes for setuid tinderbox mail: * Use Compress::Zlib instead of gunzip in warning & scrape scripts * Exit with error if warning or scrape scripts fail. * Do not make warning.pl or scrape.pl setuid Bug #344695 r=bear 2006-08-17 02:07:17 +04:00			`}`
General purpose version of bloat.pl 2001-11-02 08:22:00 +03:00			`}`
Tinderbox,aaa,bbb,ccc,ddd format. 2001-11-02 12:37:35 +03:00			`return @rv;`
General purpose version of bloat.pl 2001-11-02 08:22:00 +03:00			`}`