Bug 576837: Conform to old invalid character-class range error. (r=Waldo)

This commit is contained in:
Chris Leary 2010-11-08 18:08:32 -08:00
Родитель e1639a9715
Коммит fec1549ee2
3 изменённых файлов: 77 добавлений и 33 удалений

Просмотреть файл

@ -0,0 +1,45 @@
/*
* Check that builtin character classes within ranges produce syntax
* errors.
*
* Note that, per the extension in bug 351463, SpiderMonkey permits hyphens
* adjacent to character class escapes in character classes, treating them as a
* hyphen pattern character. Therefore /[\d-\s]/ is okay
*
* Note: /\b/ is the backspace escape, which is a single pattern character,
* though it looks deceptively like a character class.
*/
function isRegExpSyntaxError(pattern) {
try {
var re = new RegExp(pattern);
} catch (e) {
if (e instanceof SyntaxError)
return true;
}
return false;
}
assertEq(isRegExpSyntaxError('[C-\\s]'), true);
assertEq(isRegExpSyntaxError('[C-\\d]'), true);
assertEq(isRegExpSyntaxError('[C-\\W]'), true);
assertEq(isRegExpSyntaxError('[C-]'), false);
assertEq(isRegExpSyntaxError('[-C]'), false);
assertEq(isRegExpSyntaxError('[C-C]'), false);
assertEq(isRegExpSyntaxError('[C-C]'), false);
assertEq(isRegExpSyntaxError('[\\b-\\b]'), false);
assertEq(isRegExpSyntaxError('[\\B-\\B]'), false);
assertEq(isRegExpSyntaxError('[\\b-\\B]'), false);
assertEq(isRegExpSyntaxError('[\\B-\\b]'), true);
assertEq(isRegExpSyntaxError('[\\b-\\w]'), true);
assertEq(isRegExpSyntaxError('[\\B-\\w]'), true);
/* Extension. */
assertEq(isRegExpSyntaxError('[\\s-\\s]'), false);
assertEq(isRegExpSyntaxError('[\\W-\\s]'), false);
assertEq(isRegExpSyntaxError('[\\s-\\W]'), false);
assertEq(isRegExpSyntaxError('[\\W-C]'), false);
assertEq(isRegExpSyntaxError('[\\d-C]'), false);
assertEq(isRegExpSyntaxError('[\\s-C]'), false);
assertEq(isRegExpSyntaxError('[\\w-\\b]'), false);
assertEq(isRegExpSyntaxError('[\\w-\\B]'), false);

Просмотреть файл

@ -58,6 +58,8 @@
#include "jsobjinlines.h"
#include "jsregexpinlines.h"
#include "yarr/RegexParser.h"
#ifdef JS_TRACER
#include "jstracer.h"
using namespace avmplus;
@ -184,26 +186,12 @@ js_ObjectIsRegExp(JSObject *obj)
void
RegExp::handleYarrError(JSContext *cx, int error)
{
/* Hack: duplicated from yarr/yarr/RegexParser.h */
enum ErrorCode {
NoError,
PatternTooLarge,
QuantifierOutOfOrder,
QuantifierWithoutAtom,
MissingParentheses,
ParenthesesUnmatched,
ParenthesesTypeInvalid, /* "(?" with bad next char or end of pattern. */
CharacterClassUnmatched,
CharacterClassOutOfOrder,
QuantifierTooLarge,
EscapeUnterminated
};
switch (error) {
case NoError:
case JSC::Yarr::NoError:
JS_NOT_REACHED("Precondition violation: an error must have occurred.");
return;
#define COMPILE_EMSG(__code, __msg) \
case __code: \
case JSC::Yarr::__code: \
JS_ReportErrorFlagsAndNumberUC(cx, JSREPORT_ERROR, js_GetErrorMessage, NULL, __msg); \
return
COMPILE_EMSG(PatternTooLarge, JSMSG_REGEXP_TOO_COMPLEX);
@ -211,9 +199,10 @@ RegExp::handleYarrError(JSContext *cx, int error)
COMPILE_EMSG(QuantifierWithoutAtom, JSMSG_BAD_QUANTIFIER);
COMPILE_EMSG(MissingParentheses, JSMSG_MISSING_PAREN);
COMPILE_EMSG(ParenthesesUnmatched, JSMSG_UNMATCHED_RIGHT_PAREN);
COMPILE_EMSG(ParenthesesTypeInvalid, JSMSG_BAD_QUANTIFIER);
COMPILE_EMSG(ParenthesesTypeInvalid, JSMSG_BAD_QUANTIFIER); /* "(?" with bad next char */
COMPILE_EMSG(CharacterClassUnmatched, JSMSG_BAD_CLASS_RANGE);
COMPILE_EMSG(CharacterClassOutOfOrder, JSMSG_BAD_CLASS_RANGE);
COMPILE_EMSG(CharacterClassRangeSingleChar, JSMSG_BAD_CLASS_RANGE);
COMPILE_EMSG(EscapeUnterminated, JSMSG_TRAILING_SLASH);
COMPILE_EMSG(QuantifierTooLarge, JSMSG_BAD_QUANTIFIER);
#undef COMPILE_EMSG

Просмотреть файл

@ -39,6 +39,22 @@ enum BuiltInCharacterClassID {
NewlineClassID
};
enum ErrorCode {
NoError,
PatternTooLarge,
QuantifierOutOfOrder,
QuantifierWithoutAtom,
MissingParentheses,
ParenthesesUnmatched,
ParenthesesTypeInvalid,
CharacterClassUnmatched,
CharacterClassOutOfOrder,
CharacterClassRangeSingleChar,
EscapeUnterminated,
QuantifierTooLarge,
NumberOfErrorCodes
};
// The Parser class should not be used directly - only via the Yarr::parse() method.
template<class Delegate>
class Parser {
@ -46,21 +62,6 @@ private:
template<class FriendDelegate>
friend int parse(FriendDelegate& delegate, const UString& pattern, unsigned backReferenceLimit);
enum ErrorCode {
NoError,
PatternTooLarge,
QuantifierOutOfOrder,
QuantifierWithoutAtom,
MissingParentheses,
ParenthesesUnmatched,
ParenthesesTypeInvalid,
CharacterClassUnmatched,
CharacterClassOutOfOrder,
EscapeUnterminated,
QuantifierTooLarge,
NumberOfErrorCodes
};
/*
* CharacterClassParserDelegate:
*
@ -147,6 +148,15 @@ private:
*/
void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
{
if (m_state == cachedCharacterHyphen) {
// If the RHS of a range does not contain exacly one character then a SyntaxError
// must be thrown. SpiderMonkey only errors out in the [c-\s] case as an extension.
// (This assumes none of the built in character classes contain a single
// character.)
m_err = CharacterClassRangeSingleChar;
m_state = empty;
return;
}
flush();
m_delegate.atomCharacterClassBuiltIn(classID, invert);
}
@ -404,7 +414,7 @@ private:
/*
* parseCharacterClass():
*
* Helper for parseTokens(); calls dirctly and indirectly (via parseCharacterClassEscape)
* Helper for parseTokens(); calls directly and indirectly (via parseCharacterClassEscape)
* to an instance of CharacterClassParserDelegate, to describe the character class to the
* delegate.
*/