1999-03-05 00:46:07 +03:00
|
|
|
# Backwards.pm
|
|
|
|
|
|
|
|
# Copyright (C) 1999 Uri Guttman. All rights reserved.
|
|
|
|
# mail bugs, comments and feedback to uri@sysarch.com
|
|
|
|
|
|
|
|
|
|
|
|
use strict ;
|
|
|
|
|
|
|
|
package Backwards ;
|
|
|
|
|
|
|
|
use Symbol ;
|
|
|
|
use Fcntl ;
|
|
|
|
use Carp ;
|
|
|
|
use integer ;
|
|
|
|
|
|
|
|
#my $max_read_size = 3 ;
|
|
|
|
|
|
|
|
my $max_read_size = 1 << 13 ;
|
|
|
|
|
|
|
|
# support tied handles. the tied calls map directly to the object methods
|
|
|
|
|
|
|
|
*TIEHANDLE = \&new ;
|
|
|
|
*READLINE = \&readline ;
|
|
|
|
|
|
|
|
# create a new Backwards object
|
|
|
|
|
|
|
|
sub new {
|
|
|
|
|
|
|
|
my( $class, $filename ) = @_ ;
|
|
|
|
|
|
|
|
my( $handle, $seek_pos, $read_size, $self ) ;
|
|
|
|
|
|
|
|
# get a new handle symbol and the real file handle
|
|
|
|
|
|
|
|
$handle = gensym() ;
|
|
|
|
|
|
|
|
# open the file for reading
|
|
|
|
|
1999-08-26 05:24:03 +04:00
|
|
|
unless( open( $handle, $filename ) ) {
|
1999-03-05 00:46:07 +03:00
|
|
|
carp "Can't open $filename $!" ;
|
|
|
|
return ;
|
|
|
|
}
|
|
|
|
|
|
|
|
# seek to the end of the file
|
|
|
|
|
1999-08-26 05:24:03 +04:00
|
|
|
sysseek( $handle, 0, 2 ) ;
|
1999-03-05 00:46:07 +03:00
|
|
|
$seek_pos = tell( $handle ) ;
|
|
|
|
|
|
|
|
# get the size of the first block to read,
|
|
|
|
# either a trailing partial one (the % size) or full sized one (max read size)
|
|
|
|
|
|
|
|
$read_size = $seek_pos % $max_read_size || $max_read_size ;
|
|
|
|
|
|
|
|
# create the hash for the object, bless and return it
|
|
|
|
|
|
|
|
$self = {
|
|
|
|
'file_name' => $filename,
|
|
|
|
'handle' => $handle,
|
|
|
|
'read_size' => $read_size,
|
|
|
|
'seek_pos' => $seek_pos,
|
|
|
|
'lines' => [],
|
|
|
|
} ;
|
|
|
|
|
|
|
|
return( bless( $self, $class ) ) ;
|
|
|
|
}
|
|
|
|
|
|
|
|
sub readline {
|
|
|
|
|
|
|
|
my( $self, $line_ref ) = @_ ;
|
|
|
|
|
|
|
|
my( $handle, $lines_ref, $seek_pos, $read_cnt, $read_buf,
|
|
|
|
$file_size, $read_size, $text ) ;
|
|
|
|
|
|
|
|
# get the buffer of lines
|
|
|
|
|
|
|
|
$lines_ref = $self->{'lines'} ;
|
|
|
|
|
|
|
|
while( 1 ) {
|
|
|
|
|
|
|
|
# see if there is more than 1 line in the buffer
|
|
|
|
|
|
|
|
if ( @{$lines_ref} > 1 ) {
|
|
|
|
|
|
|
|
# we have a complete line so return it
|
|
|
|
|
|
|
|
return( pop @{$lines_ref} ) ;
|
|
|
|
}
|
|
|
|
|
|
|
|
# we don't have a complete, so have to read blocks until we do
|
|
|
|
|
|
|
|
$seek_pos = $self->{'seek_pos'} ;
|
|
|
|
|
|
|
|
# see if we are at the beginning of the file
|
|
|
|
|
|
|
|
if ( $seek_pos == 0 ) {
|
|
|
|
|
|
|
|
# the last read never made more lines, so return the last line in the buffer
|
|
|
|
# if no lines left then undef will be returned
|
|
|
|
|
|
|
|
return( pop @{$lines_ref} ) ;
|
|
|
|
}
|
|
|
|
|
|
|
|
#print "c size $read_size\n" ;
|
|
|
|
|
|
|
|
# we have to read more text so get the handle and the current read size
|
|
|
|
|
|
|
|
$handle = $self->{'handle'} ;
|
|
|
|
$read_size = $self->{'read_size'} ;
|
|
|
|
|
|
|
|
# after the first read, always read the maximum size
|
|
|
|
|
|
|
|
$self->{'read_size'} = $max_read_size ;
|
|
|
|
|
|
|
|
# seek to the beginning of this block and save the new seek position
|
|
|
|
|
|
|
|
$seek_pos -= $read_size ;
|
|
|
|
$self->{'seek_pos'} = $seek_pos ;
|
1999-08-26 05:24:03 +04:00
|
|
|
sysseek( $handle, $seek_pos, 0 ) ;
|
1999-03-05 00:46:07 +03:00
|
|
|
|
|
|
|
#print "seek $seek_pos\n" ;
|
|
|
|
|
|
|
|
# read in the next (previous) block of text
|
|
|
|
|
1999-08-26 05:24:03 +04:00
|
|
|
sysseek($handle, 0, 1);
|
1999-03-05 00:46:07 +03:00
|
|
|
$read_cnt = sysread( $handle, $read_buf, $read_size ) ;
|
|
|
|
|
|
|
|
#print "Read <$read_buf>\n" ;
|
|
|
|
|
|
|
|
# if ( $read_cnt != $read_size ) {
|
|
|
|
# print "bad read cnt $read_cnt != size $read_size\n" ;
|
|
|
|
# return( undef ) ;
|
|
|
|
# }
|
|
|
|
|
|
|
|
# prepend the read buffer to the leftover (possibly partial) line
|
|
|
|
|
|
|
|
$text = $read_buf . ( pop @{$lines_ref} || '' ) ;
|
|
|
|
|
|
|
|
# split the buffer into a list of lines
|
|
|
|
# this may want to be $/ but reading files backwards assumes plain text and
|
|
|
|
# newline separators
|
|
|
|
|
|
|
|
@{$lines_ref} = $text =~ m[(^.*\n|^.+)]mg ;
|
|
|
|
|
|
|
|
#print "Lines \n=>", join( "<=\n=>", @{$lines_ref} ), "<=\n" ;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
__END__
|
|
|
|
|
|
|
|
|
|
|
|
=head1 NAME
|
|
|
|
|
|
|
|
Backwards.pm -- Read a file backwards by lines.
|
|
|
|
|
|
|
|
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
|
|
|
|
use Backwards ;
|
|
|
|
|
|
|
|
# Object interface
|
|
|
|
|
|
|
|
$bw = Backwards->new( 'log_file' ) ;
|
|
|
|
|
|
|
|
while( $log_line = $bw->readline ) {
|
|
|
|
print $log_line ;
|
|
|
|
}
|
|
|
|
|
|
|
|
# Tied Handle Interface
|
|
|
|
|
|
|
|
tie *BW, 'log_file' ;
|
|
|
|
|
|
|
|
while( <BW> ) {
|
|
|
|
print ;
|
|
|
|
}
|
|
|
|
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
|
|
|
|
|
|
|
|
This module reads a file backwards line by line. It is simple to use,
|
|
|
|
memory efficient and fast. It supports both an object and a tied handle
|
|
|
|
interface.
|
|
|
|
|
|
|
|
It is intended for processing log and other similar text files which
|
|
|
|
typically have new entries appended. It uses newline as the separator
|
|
|
|
and not $/ since it is only meant to be used for text.
|
|
|
|
|
|
|
|
It works by reading large (8kb) blocks of text from the end of the file,
|
|
|
|
splits them on newlines and stores the other lines until the buffer runs
|
|
|
|
out. Then it seeks to the previous block and splits it. When it reaches
|
|
|
|
the beginning of the file, it stops reading more blocks. All boundary
|
|
|
|
conditions are handled correctly. If there is a trailing partial line
|
|
|
|
(no newline) it will be the first line returned. Lines larger than the
|
|
|
|
read buffer size are ok.
|
|
|
|
|
|
|
|
=head2 Object Interface
|
|
|
|
|
|
|
|
|
|
|
|
There are only 2 methods in Backwards' object interface, new and
|
|
|
|
readline.
|
|
|
|
|
|
|
|
=head2 new
|
|
|
|
|
|
|
|
New takes just a filename for an argument and it either returns the
|
|
|
|
object on a successful open on that file or undef.
|
|
|
|
|
|
|
|
=head2 readline
|
|
|
|
|
|
|
|
Readline takes no arguments and it returns the previous line in the file
|
|
|
|
or undef when there are no more lines in the file.
|
|
|
|
|
|
|
|
|
|
|
|
=head2 Tied Handle Interface
|
|
|
|
|
|
|
|
The only tied handle calls supported are TIEHANDLE and READLINE and they
|
|
|
|
are typeglobbed to new and readline respectively. All other tied handle
|
|
|
|
operations will generate an unknown method error. Do not seek, write or
|
|
|
|
do any other operation other than <> on the handle.
|