gecko-dev/js/js2/java/LineBuffer.java

334 строки
11 KiB
Java

/* -*- Mode: Java; tab-width: 8 -*-
* Copyright © 1998 Netscape Communications Corporation,
* All Rights Reserved.
*/
import java.io.Reader;
import java.io.IOException;
/**
* An input buffer that combines fast character-based access with
* (slower) support for retrieving the text of the current line. It
* also supports building strings directly out of the internal buffer
* to support fast scanning with minimal object creation.
*
* Note that it is customized in several ways to support the
* TokenStream class, and should not be considered general.
*
* Credits to Kipp Hickman and John Bandhauer.
*
* @author Mike McCabe
*/
final class LineBuffer {
/*
* for smooth operation of getLine(), this should be greater than
* the length of any expected line. Currently, 256 is 3% slower
* than 4096 for large compiles, but seems safer given evaluateString.
* Strings for the scanner are are built with StringBuffers
* instead of directly out of the buffer whenever a string crosses
* a buffer boundary, so small buffer sizes will mean that more
* objects are created.
*/
static final int BUFLEN = 256;
LineBuffer(Reader in, int lineno) {
this.in = in;
this.lineno = lineno;
}
int read() throws IOException {
if (end == offset && !fill())
return -1;
// Do only a bitmask + branch per character, at the cost of
// three branches per low-bits-only character.
if ((buffer[offset] & '\ufff0') == 0) {
if (buffer[offset] == '\r') {
// if the next character is a newline, skip past it.
if ((offset + 1) < end) {
if (buffer[offset + 1] == '\n')
offset++;
} else {
// set a flag for fill(), in case the first char of the
// next fill is a newline.
lastWasCR = true;
}
}
else if (buffer[offset] != '\n') {
return (int) buffer[offset++];
}
offset++;
prevStart = lineStart;
lineStart = offset;
lineno++;
return '\n';
}
return (int) buffer[offset++];
}
void unread() {
if (offset == 0)
// We can get here when we're asked to unread() an
// implicit EOF_CHAR.
// This would also be wrong behavior in the general case,
// because a peek() could map a buffer.length offset to 0
// in the process of a fill(), and leave it there. But
// the scanner never calls peek() or a failed match()
// followed by unread()... this would violate 1-character
// lookahead. So we're OK.
return;
offset--;
if ((buffer[offset] & '\ufff0') == 0
&& (buffer[offset] == '\r' || buffer[offset] == '\n')) {
// back off from the line start we presumably just registered...
lineStart = prevStart;
lineno--;
}
}
int peek() throws IOException {
if (end == offset && !fill())
return -1;
if (buffer[offset] == '\r')
return '\n';
return buffer[offset];
}
boolean match(char c) throws IOException {
if (end == offset && !fill())
return false;
// This'd be a place where we'd need to map '\r' to '\n' and
// do other updates, but TokenStream never looks ahead for
// '\n', so we don't bother.
if (buffer[offset] == c) {
offset++;
return true;
}
return false;
}
// Reconstruct a source line from the buffers. This can be slow...
String getLine() {
StringBuffer result = new StringBuffer();
int start = lineStart;
if (start >= offset) {
// the line begins somewhere in the other buffer; get that first.
if (otherStart < otherEnd)
// if a line ending was seen in the other buffer... otherwise
// just ignore this strange case.
result.append(otherBuffer, otherStart,
otherEnd - otherStart);
start = 0;
}
// get the part of the line in the current buffer.
result.append(buffer, start, offset - start);
// Get the remainder of the line.
int i = offset;
while(true) {
if (i == buffer.length) {
// we're out of buffer, let's just expand it. We do
// this instead of reading into a StringBuffer to
// preserve the stream for later reads.
char[] newBuffer = new char[buffer.length * 2];
System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
buffer = newBuffer;
int charsRead = 0;
try {
charsRead = in.read(buffer, end, buffer.length - end);
} catch (IOException ioe) {
// ignore it, we're already displaying an error...
}
if (charsRead < 0)
break;
end += charsRead;
}
if (buffer[i] == '\r' || buffer[i] == '\n')
break;
i++;
}
result.append(buffer, offset, i - offset);
return result.toString();
}
// Get the offset of the current character, relative to
// the line that getLine() returns.
int getOffset() {
if (lineStart >= offset)
// The line begins somewhere in the other buffer.
return offset + (otherEnd - otherStart);
else
return offset - lineStart;
}
// Set a mark to indicate that the reader should begin
// accumulating characters for getString(). The string begins
// with the last character read.
void startString() {
if (offset == 0) {
// We can get here if startString is called after a peek()
// or failed match() with offset past the end of the
// buffer.
// We're at the beginning of the buffer, and the previous character
// (which we want to include) is at the end of the last one, so
// we just go to StringBuffer mode.
stringSoFar = new StringBuffer();
stringSoFar.append(otherBuffer, otherEnd - 1, 1);
stringStart = -1; // Set sentinel value.
} else {
// Support restarting strings
stringSoFar = null;
stringStart = offset - 1;
}
}
// Get a string consisting of the characters seen since the last
// startString.
String getString() {
String result;
/*
* There's one strange case here: If the character offset currently
* points to (which we never want to include in the string) is
* a newline, then if the previous character is a carriage return,
* we probably want to exclude that as well. If the offset is 0,
* then we hope that fill() handled excluding it from stringSoFar.
*/
int loseCR = (offset > 0 &&
buffer[offset] == '\n' && buffer[offset - 1] == '\r') ?
1 : 0;
if (stringStart != -1) {
// String mark is valid, and in this buffer.
result = new String(buffer, stringStart,
offset - stringStart - loseCR);
} else {
// Exclude cr as well as nl of newline. If offset is 0, then
// hopefully fill() did the right thing.
result = (stringSoFar.append(buffer, 0, offset - loseCR)).toString();
}
stringStart = -1;
stringSoFar = null;
return result;
}
boolean fill() throws IOException {
// not sure I care...
if (end - offset != 0)
throw new IOException("fill of non-empty buffer");
// If there's a string currently being accumulated, save
// off the progress.
/*
* Exclude an end-of-buffer carriage return. NOTE this is not
* fully correct in the general case, because we really only
* want to exclude the carriage return if it's followed by a
* linefeed at the beginning of the next buffer. But we fudge
* because the scanner doesn't do this.
*/
int loseCR = (offset > 0 && lastWasCR) ? 1 : 0;
if (stringStart != -1) {
// The mark is in the current buffer, save off from the mark to the
// end.
stringSoFar = new StringBuffer();
stringSoFar.append(buffer, stringStart, end - stringStart - loseCR);
stringStart = -1;
} else if (stringSoFar != null) {
// the string began prior to the current buffer, so save the
// whole current buffer.
stringSoFar.append(buffer, 0, end - loseCR);
}
// swap buffers
char[] tempBuffer = buffer;
buffer = otherBuffer;
otherBuffer = tempBuffer;
// allocate the buffers lazily, in case we're handed a short string.
if (buffer == null) {
buffer = new char[BUFLEN];
}
// buffers have switched, so move the newline marker.
otherStart = lineStart;
otherEnd = end;
// set lineStart to a sentinel value, unless this is the first
// time around.
prevStart = lineStart = (otherBuffer == null) ? 0 : buffer.length + 1;
offset = 0;
end = in.read(buffer, 0, buffer.length);
if (end < 0) {
end = 0;
// can't null buffers here, because a string might be retrieved
// out of the other buffer, and a 0-length string might be
// retrieved out of this one.
hitEOF = true;
return false;
}
// If the last character of the previous fill was a carriage return,
// then ignore a newline.
// There's another bizzare special case here. If lastWasCR is
// true, and we see a newline, and the buffer length is
// 1... then we probably just read the last character of the
// file, and returning after advancing offset is not the right
// thing to do. Instead, we try to ignore the newline (and
// likely get to EOF for real) by doing yet another fill().
if (lastWasCR) {
if (buffer[0] == '\n') {
offset++;
if (end == 1)
return fill();
}
lineStart = offset;
lastWasCR = false;
}
return true;
}
int getLineno() { return lineno; }
boolean eof() { return hitEOF; }
private Reader in;
private char[] otherBuffer = null;
private char[] buffer = null;
// Yes, there are too too many of these.
private int offset = 0;
private int end = 0;
private int otherEnd;
private int lineno;
private int lineStart = 0;
private int otherStart = 0;
private int prevStart = 0;
private boolean lastWasCR = false;
private boolean hitEOF = false;
private int stringStart = -1;
private StringBuffer stringSoFar = null;
}