Bug 338252: sourceTextEncoding isn't always properly parsed (fixes some Sherlock plugins with non standard character encodings), r=mconnor

This commit is contained in:
gavin%gavinsharp.com 2006-05-29 18:48:42 +00:00
Родитель 979f59c5a5
Коммит 9c114f257f
1 изменённых файлов: 42 добавлений и 27 удалений

Просмотреть файл

@ -505,31 +505,19 @@ function bytesToString(aBytes, aCharset) {
/** /**
* Converts an array of bytes representing a Sherlock file into an array of * Converts an array of bytes representing a Sherlock file into an array of
* lines representing the useful data from the file. * lines representing the useful data from the file.
*
* @param aBytes
* The array of bytes representing the Sherlock file.
* @param aCharsetCode
* An integer value representing a character set code to be passed to
* fileCharsetFromCode, or null for the default Sherlock encoding.
*/ */
function sherlockBytesToData(aBytes) { function sherlockBytesToLines(aBytes, aCharsetCode) {
// Sherlock files can specify the file encoding they use in the file // fileCharsetFromCode returns the default encoding if aCharsetCode is null
// itself, using the sourceTextEncoding attribute. We read only ASCII var charset = fileCharsetFromCode(aCharsetCode);
// bytes here to see if we need to reinterpret the byte stream.
// XXX If convertFromByteArray provided a way to ignore or replace
// invalid byte sequences, we could use it here and avoid doing this
// ourselves.
var asciiBytes = aBytes.filter(function (n) {return !(0x80 & n);});
var asciiString = String.fromCharCode.apply(null, asciiBytes);
asciiString = asciiString.split(NEW_LINES).filter(isUsefulLine)
.join("\n");
// Look for the sourceTextEncoding attribute. It's value should be an
// integer that maps to one of the encodings in fileCharsetFromCode.
const sourceTextEncoding = /sourceTextEncoding\s*=['"](\d)['"]/i;
var sourceTE = sourceTextEncoding.exec(asciiString);
if (sourceTE && sourceTE.length > 1)
charset = fileCharsetFromCode(sourceTE[1]);
else
charset = fileCharsetFromCode(/* get the default */);
var dataString = bytesToString(aBytes, charset); var dataString = bytesToString(aBytes, charset);
ENSURE(dataString, "_onLoad: Couldn't convert byte array!", ENSURE(dataString, "sherlockBytesToLines: Couldn't convert byte array!",
Cr.NS_ERROR_FAILURE); Cr.NS_ERROR_FAILURE);
// Split the string into lines, and filter out comments and // Split the string into lines, and filter out comments and
@ -850,7 +838,7 @@ function Engine(aLocation, aSourceDataType, aIsReadOnly) {
Engine.prototype = { Engine.prototype = {
// The engine's alias. // The engine's alias.
_alias: null, _alias: null,
// The data describing the engine. Is either an array of lines, for Sherlock // The data describing the engine. Is either an array of bytes, for Sherlock
// files, or an XML document element, for XML plugins. // files, or an XML document element, for XML plugins.
_data: null, _data: null,
// The engine's data type. See data types (DATA_) defined above. // The engine's data type. See data types (DATA_) defined above.
@ -907,7 +895,7 @@ Engine.prototype = {
binaryInStream.setInputStream(fileInStream); binaryInStream.setInputStream(fileInStream);
var bytes = binaryInStream.readByteArray(binaryInStream.available()); var bytes = binaryInStream.readByteArray(binaryInStream.available());
this._data = sherlockBytesToData(bytes); this._data = bytes;
break; break;
default: default:
@ -983,7 +971,7 @@ Engine.prototype = {
aEngine._data = doc.documentElement; aEngine._data = doc.documentElement;
break; break;
case SEARCH_DATA_TEXT: case SEARCH_DATA_TEXT:
aEngine._data = sherlockBytesToData(aBytes); aEngine._data = aBytes;
break; break;
default: default:
onError(); onError();
@ -1445,7 +1433,34 @@ Engine.prototype = {
throw Cr.NS_ERROR_FAILURE; throw Cr.NS_ERROR_FAILURE;
} }
var searchSection = getSection(this._data, "search"); // First try converting our byte array using the default Sherlock encoding.
// If this fails, or if we find a sourceTextEncoding attribute, we need to
// reconvert the byte array using the specified encoding.
var sherlockLines, searchSection, sourceTextEncoding;
try {
sherlockLines = sherlockBytesToLines(this._data);
searchSection = getSection(sherlockLines, "search");
sourceTextEncoding = parseInt(searchSection["sourcetextencoding"]);
if (sourceTextEncoding) {
// Re-convert the bytes using the found sourceTextEncoding
sherlockLines = sherlockBytesToLines(this._data, sourceTextEncoding);
searchSection = getSection(sherlockLines, "search");
}
} catch (ex) {
// The conversion using the default charset failed. Remove any non-ascii
// bytes and try to find a sourceTextEncoding.
var asciiBytes = this._data.filter(function (n) {return !(0x80 & n);});
var asciiString = String.fromCharCode.apply(null, asciiBytes);
sherlockLines = asciiString.split(NEW_LINES).filter(isUsefulLine);
searchSection = getSection(sherlockLines, "search");
sourceTextEncoding = parseInt(searchSection["sourcetextencoding"]);
if (sourceTextEncoding) {
sherlockLines = sherlockBytesToLines(this._data, sourceTextEncoding);
searchSection = getSection(sherlockLines, "search");
} else
ERROR("Couldn't find a working charset", Cr.NS_ERROR_FAILURE);
}
LOG("_parseAsSherlock: Search section:\n" + searchSection.toSource()); LOG("_parseAsSherlock: Search section:\n" + searchSection.toSource());
this._name = searchSection["name"] || err("Missing name!"); this._name = searchSection["name"] || err("Missing name!");
@ -1457,7 +1472,7 @@ Engine.prototype = {
var method = (searchSection["method"] || "GET").toUpperCase(); var method = (searchSection["method"] || "GET").toUpperCase();
var template = searchSection["action"] || err("Missing action!"); var template = searchSection["action"] || err("Missing action!");
var inputs = getInputs(this._data); var inputs = getInputs(sherlockLines);
LOG("_parseAsSherlock: Inputs:\n" + inputs.toSource()); LOG("_parseAsSherlock: Inputs:\n" + inputs.toSource());
var url = null; var url = null;