зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1135377 - Part 3: Parse RegExp unicode character in CharacterClass. r=till, f=anba
This commit is contained in:
Родитель
e2993e2972
Коммит
fec1e26e0b
|
@ -421,8 +421,8 @@ IsSpecialClassEscape(widechar c)
|
|||
#endif
|
||||
|
||||
template <typename CharT>
|
||||
widechar
|
||||
RegExpParser<CharT>::ParseClassCharacterEscape()
|
||||
bool
|
||||
RegExpParser<CharT>::ParseClassCharacterEscape(widechar* code)
|
||||
{
|
||||
MOZ_ASSERT(current() == '\\');
|
||||
MOZ_ASSERT(has_next() && !IsSpecialClassEscape(Next()));
|
||||
|
@ -430,24 +430,30 @@ RegExpParser<CharT>::ParseClassCharacterEscape()
|
|||
switch (current()) {
|
||||
case 'b':
|
||||
Advance();
|
||||
return '\b';
|
||||
*code = '\b';
|
||||
return true;
|
||||
// ControlEscape :: one of
|
||||
// f n r t v
|
||||
case 'f':
|
||||
Advance();
|
||||
return '\f';
|
||||
*code = '\f';
|
||||
return true;
|
||||
case 'n':
|
||||
Advance();
|
||||
return '\n';
|
||||
*code = '\n';
|
||||
return true;
|
||||
case 'r':
|
||||
Advance();
|
||||
return '\r';
|
||||
*code = '\r';
|
||||
return true;
|
||||
case 't':
|
||||
Advance();
|
||||
return '\t';
|
||||
*code = '\t';
|
||||
return true;
|
||||
case 'v':
|
||||
Advance();
|
||||
return '\v';
|
||||
*code = '\v';
|
||||
return true;
|
||||
case 'c': {
|
||||
widechar controlLetter = Next();
|
||||
widechar letter = controlLetter & ~('A' ^ 'a');
|
||||
|
@ -459,35 +465,65 @@ RegExpParser<CharT>::ParseClassCharacterEscape()
|
|||
Advance(2);
|
||||
// Control letters mapped to ASCII control characters in the range
|
||||
// 0x00-0x1f.
|
||||
return controlLetter & 0x1f;
|
||||
*code = controlLetter & 0x1f;
|
||||
return true;
|
||||
}
|
||||
// We match JSC in reading the backslash as a literal
|
||||
// character instead of as starting an escape.
|
||||
return '\\';
|
||||
*code = '\\';
|
||||
return true;
|
||||
}
|
||||
case '0': case '1': case '2': case '3': case '4': case '5':
|
||||
case '6': case '7':
|
||||
// For compatibility, we interpret a decimal escape that isn't
|
||||
// a back reference (and therefore either \0 or not valid according
|
||||
// to the specification) as a 1..3 digit octal character code.
|
||||
return ParseOctalLiteral();
|
||||
*code = ParseOctalLiteral();
|
||||
return true;
|
||||
case 'x': {
|
||||
Advance();
|
||||
size_t value;
|
||||
if (ParseHexEscape(2, &value))
|
||||
return value;
|
||||
if (ParseHexEscape(2, &value)) {
|
||||
*code = value;
|
||||
return true;
|
||||
}
|
||||
// If \x is not followed by a two-digit hexadecimal, treat it
|
||||
// as an identity escape.
|
||||
return 'x';
|
||||
*code = 'x';
|
||||
return true;
|
||||
}
|
||||
case 'u': {
|
||||
Advance();
|
||||
size_t value;
|
||||
if (ParseHexEscape(4, &value))
|
||||
return value;
|
||||
// If \u is not followed by a four-digit hexadecimal, treat it
|
||||
if (unicode_) {
|
||||
if (current() == '{') {
|
||||
if (!ParseBracedHexEscape(&value))
|
||||
return false;
|
||||
*code = value;
|
||||
return true;
|
||||
}
|
||||
if (ParseHexEscape(4, &value)) {
|
||||
if (unicode::IsLeadSurrogate(value)) {
|
||||
size_t trail;
|
||||
if (ParseTrailSurrogate(&trail)) {
|
||||
*code = unicode::UTF16Decode(value, trail);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
*code = value;
|
||||
return true;
|
||||
}
|
||||
ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
|
||||
return false;
|
||||
}
|
||||
if (ParseHexEscape(4, &value)) {
|
||||
*code = value;
|
||||
return true;
|
||||
}
|
||||
// If \u is not followed by a four-digit or braced hexadecimal, treat it
|
||||
// as an identity escape.
|
||||
return 'u';
|
||||
*code = 'u';
|
||||
return true;
|
||||
}
|
||||
default: {
|
||||
// Extended identity escape. We accept any character that hasn't
|
||||
|
@ -495,27 +531,319 @@ RegExpParser<CharT>::ParseClassCharacterEscape()
|
|||
// by the ECMAScript specification.
|
||||
widechar result = current();
|
||||
Advance();
|
||||
return result;
|
||||
*code = result;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
class WideCharRange
|
||||
{
|
||||
public:
|
||||
WideCharRange()
|
||||
: from_(0), to_(0)
|
||||
{}
|
||||
|
||||
WideCharRange(widechar from, widechar to)
|
||||
: from_(from), to_(to)
|
||||
{}
|
||||
|
||||
static inline WideCharRange Singleton(widechar value) {
|
||||
return WideCharRange(value, value);
|
||||
}
|
||||
static inline WideCharRange Range(widechar from, widechar to) {
|
||||
MOZ_ASSERT(from <= to);
|
||||
return WideCharRange(from, to);
|
||||
}
|
||||
|
||||
widechar from() const { return from_; }
|
||||
widechar to() const { return to_; }
|
||||
|
||||
private:
|
||||
widechar from_;
|
||||
widechar to_;
|
||||
};
|
||||
|
||||
typedef Vector<WideCharRange, 1, LifoAllocPolicy<Infallible> > WideCharRangeVector;
|
||||
|
||||
static inline CharacterRange
|
||||
LeadSurrogateRange()
|
||||
{
|
||||
return CharacterRange::Range(unicode::LeadSurrogateMin, unicode::LeadSurrogateMax);
|
||||
}
|
||||
|
||||
static inline CharacterRange
|
||||
TrailSurrogateRange()
|
||||
{
|
||||
return CharacterRange::Range(unicode::TrailSurrogateMin, unicode::TrailSurrogateMax);
|
||||
}
|
||||
|
||||
static inline WideCharRange
|
||||
NonBMPRange()
|
||||
{
|
||||
return WideCharRange::Range(unicode::NonBMPMin, unicode::NonBMPMax);
|
||||
}
|
||||
|
||||
static const char16_t kNoCharClass = 0;
|
||||
|
||||
// Adds range or pre-defined character class to character ranges.
|
||||
// Adds a character or pre-defined character class to character ranges.
|
||||
// If char_class is not kInvalidClass, it's interpreted as a class
|
||||
// escape (i.e., 's' means whitespace, from '\s').
|
||||
static inline void
|
||||
AddRangeOrEscape(LifoAlloc* alloc,
|
||||
CharacterRangeVector* ranges,
|
||||
char16_t char_class,
|
||||
CharacterRange range)
|
||||
AddCharOrEscape(LifoAlloc* alloc,
|
||||
CharacterRangeVector* ranges,
|
||||
char16_t char_class,
|
||||
widechar c)
|
||||
{
|
||||
if (char_class != kNoCharClass)
|
||||
CharacterRange::AddClassEscape(alloc, char_class, ranges);
|
||||
else
|
||||
ranges->append(range);
|
||||
ranges->append(CharacterRange::Singleton(c));
|
||||
}
|
||||
|
||||
static inline void
|
||||
AddCharOrEscapeUnicode(LifoAlloc* alloc,
|
||||
CharacterRangeVector* ranges,
|
||||
CharacterRangeVector* lead_ranges,
|
||||
CharacterRangeVector* trail_ranges,
|
||||
WideCharRangeVector* wide_ranges,
|
||||
char16_t char_class,
|
||||
widechar c)
|
||||
{
|
||||
if (char_class != kNoCharClass)
|
||||
CharacterRange::AddClassEscape(alloc, char_class, ranges);
|
||||
else if (unicode::IsLeadSurrogate(c))
|
||||
lead_ranges->append(CharacterRange::Singleton(c));
|
||||
else if (unicode::IsTrailSurrogate(c))
|
||||
trail_ranges->append(CharacterRange::Singleton(c));
|
||||
else if (c >= unicode::NonBMPMin)
|
||||
wide_ranges->append(WideCharRange::Singleton(c));
|
||||
else
|
||||
ranges->append(CharacterRange::Singleton(c));
|
||||
}
|
||||
|
||||
static inline void
|
||||
AddUnicodeRange(LifoAlloc* alloc,
|
||||
CharacterRangeVector* ranges,
|
||||
CharacterRangeVector* lead_ranges,
|
||||
CharacterRangeVector* trail_ranges,
|
||||
WideCharRangeVector* wide_ranges,
|
||||
widechar first,
|
||||
widechar next)
|
||||
{
|
||||
MOZ_ASSERT(first <= next);
|
||||
if (first < unicode::LeadSurrogateMin) {
|
||||
if (next < unicode::LeadSurrogateMin) {
|
||||
ranges->append(CharacterRange::Range(first, next));
|
||||
return;
|
||||
}
|
||||
ranges->append(CharacterRange::Range(first, unicode::LeadSurrogateMin - 1));
|
||||
first = unicode::LeadSurrogateMin;
|
||||
}
|
||||
if (first <= unicode::LeadSurrogateMax) {
|
||||
if (next <= unicode::LeadSurrogateMax) {
|
||||
lead_ranges->append(CharacterRange::Range(first, next));
|
||||
return;
|
||||
}
|
||||
lead_ranges->append(CharacterRange::Range(first, unicode::LeadSurrogateMax));
|
||||
first = unicode::LeadSurrogateMax + 1;
|
||||
}
|
||||
MOZ_ASSERT(unicode::LeadSurrogateMax + 1 == unicode::TrailSurrogateMin);
|
||||
if (first <= unicode::TrailSurrogateMax) {
|
||||
if (next <= unicode::TrailSurrogateMax) {
|
||||
trail_ranges->append(CharacterRange::Range(first, next));
|
||||
return;
|
||||
}
|
||||
trail_ranges->append(CharacterRange::Range(first, unicode::TrailSurrogateMax));
|
||||
first = unicode::TrailSurrogateMax + 1;
|
||||
}
|
||||
if (first <= unicode::UTF16Max) {
|
||||
if (next <= unicode::UTF16Max) {
|
||||
ranges->append(CharacterRange::Range(first, next));
|
||||
return;
|
||||
}
|
||||
ranges->append(CharacterRange::Range(first, unicode::UTF16Max));
|
||||
first = unicode::NonBMPMin;
|
||||
}
|
||||
MOZ_ASSERT(unicode::UTF16Max + 1 == unicode::NonBMPMin);
|
||||
wide_ranges->append(WideCharRange::Range(first, next));
|
||||
}
|
||||
|
||||
// Negate a vector of ranges by subtracting its ranges from a range
|
||||
// encompassing the full range of possible values.
|
||||
template <typename RangeType>
|
||||
static inline void
|
||||
NegateUnicodeRanges(LifoAlloc* alloc, Vector<RangeType, 1, LifoAllocPolicy<Infallible> >** ranges,
|
||||
RangeType full_range)
|
||||
{
|
||||
typedef Vector<RangeType, 1, LifoAllocPolicy<Infallible> > RangeVector;
|
||||
RangeVector* tmp_ranges = alloc->newInfallible<RangeVector>(*alloc);
|
||||
tmp_ranges->append(full_range);
|
||||
RangeVector* result_ranges = alloc->newInfallible<RangeVector>(*alloc);
|
||||
|
||||
// Perform the following calculation:
|
||||
// result_ranges = tmp_ranges - ranges
|
||||
// with the following steps:
|
||||
// result_ranges = tmp_ranges - ranges[0]
|
||||
// SWAP(result_ranges, tmp_ranges)
|
||||
// result_ranges = tmp_ranges - ranges[1]
|
||||
// SWAP(result_ranges, tmp_ranges)
|
||||
// ...
|
||||
// result_ranges = tmp_ranges - ranges[N-1]
|
||||
// SWAP(result_ranges, tmp_ranges)
|
||||
// The last SWAP is just for simplicity of the loop.
|
||||
for (size_t i = 0; i < (*ranges)->length(); i++) {
|
||||
result_ranges->clear();
|
||||
|
||||
const RangeType& range = (**ranges)[i];
|
||||
for (size_t j = 0; j < tmp_ranges->length(); j++) {
|
||||
const RangeType& tmpRange = (*tmp_ranges)[j];
|
||||
size_t from1 = tmpRange.from();
|
||||
size_t to1 = tmpRange.to();
|
||||
size_t from2 = range.from();
|
||||
size_t to2 = range.to();
|
||||
|
||||
if (from1 < from2) {
|
||||
if (to1 < from2) {
|
||||
result_ranges->append(tmpRange);
|
||||
} else if (to1 <= to2) {
|
||||
result_ranges->append(RangeType::Range(from1, from2 - 1));
|
||||
} else {
|
||||
result_ranges->append(RangeType::Range(from1, from2 - 1));
|
||||
result_ranges->append(RangeType::Range(to2 + 1, to1));
|
||||
}
|
||||
} else if (from1 <= to2) {
|
||||
if (to1 > to2)
|
||||
result_ranges->append(RangeType::Range(to2 + 1, to1));
|
||||
} else {
|
||||
result_ranges->append(tmpRange);
|
||||
}
|
||||
}
|
||||
|
||||
auto tmp = tmp_ranges;
|
||||
tmp_ranges = result_ranges;
|
||||
result_ranges = tmp;
|
||||
}
|
||||
|
||||
// After the loop, result is pointed at by tmp_ranges, instead of
|
||||
// result_ranges.
|
||||
*ranges = tmp_ranges;
|
||||
}
|
||||
|
||||
static RegExpTree*
|
||||
UnicodeRangesAtom(LifoAlloc* alloc,
|
||||
CharacterRangeVector* ranges,
|
||||
CharacterRangeVector* lead_ranges,
|
||||
CharacterRangeVector* trail_ranges,
|
||||
WideCharRangeVector* wide_ranges,
|
||||
bool is_negated)
|
||||
{
|
||||
if (is_negated) {
|
||||
NegateUnicodeRanges(alloc, &lead_ranges, LeadSurrogateRange());
|
||||
NegateUnicodeRanges(alloc, &trail_ranges, TrailSurrogateRange());
|
||||
NegateUnicodeRanges(alloc, &wide_ranges, NonBMPRange());
|
||||
}
|
||||
|
||||
RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
|
||||
|
||||
bool added = false;
|
||||
|
||||
if (is_negated) {
|
||||
ranges->append(LeadSurrogateRange());
|
||||
ranges->append(TrailSurrogateRange());
|
||||
}
|
||||
if (ranges->length() > 0) {
|
||||
builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(ranges, is_negated));
|
||||
added = true;
|
||||
}
|
||||
|
||||
if (lead_ranges->length() > 0) {
|
||||
if (added)
|
||||
builder->NewAlternative();
|
||||
builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(lead_ranges, false));
|
||||
builder->AddAtom(NegativeLookahead(alloc, unicode::TrailSurrogateMin,
|
||||
unicode::TrailSurrogateMax));
|
||||
added = true;
|
||||
}
|
||||
|
||||
if (trail_ranges->length() > 0) {
|
||||
if (added)
|
||||
builder->NewAlternative();
|
||||
builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(
|
||||
RegExpAssertion::NOT_AFTER_LEAD_SURROGATE));
|
||||
builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(trail_ranges, false));
|
||||
added = true;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < wide_ranges->length(); i++) {
|
||||
if (added)
|
||||
builder->NewAlternative();
|
||||
|
||||
const WideCharRange& range = (*wide_ranges)[i];
|
||||
widechar from = range.from();
|
||||
widechar to = range.to();
|
||||
size_t from_lead, from_trail;
|
||||
size_t to_lead, to_trail;
|
||||
|
||||
unicode::UTF16Encode(from, &from_lead, &from_trail);
|
||||
if (from == to) {
|
||||
builder->AddCharacter(from_lead);
|
||||
builder->AddCharacter(from_trail);
|
||||
} else {
|
||||
unicode::UTF16Encode(to, &to_lead, &to_trail);
|
||||
if (from_lead == to_lead) {
|
||||
MOZ_ASSERT(from_trail != to_trail);
|
||||
builder->AddCharacter(from_lead);
|
||||
builder->AddAtom(RangeAtom(alloc, from_trail, to_trail));
|
||||
} else if (from_trail == unicode::TrailSurrogateMin &&
|
||||
to_trail == unicode::TrailSurrogateMax)
|
||||
{
|
||||
builder->AddAtom(RangeAtom(alloc, from_lead, to_lead));
|
||||
builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin,
|
||||
unicode::TrailSurrogateMax));
|
||||
} else if (from_lead + 1 == to_lead) {
|
||||
builder->AddCharacter(from_lead);
|
||||
builder->AddAtom(RangeAtom(alloc, from_trail, unicode::TrailSurrogateMax));
|
||||
|
||||
builder->NewAlternative();
|
||||
|
||||
builder->AddCharacter(to_lead);
|
||||
builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, to_trail));
|
||||
} else if (from_lead + 2 == to_lead) {
|
||||
builder->AddCharacter(from_lead);
|
||||
builder->AddAtom(RangeAtom(alloc, from_trail, unicode::TrailSurrogateMax));
|
||||
|
||||
builder->NewAlternative();
|
||||
|
||||
builder->AddCharacter(from_lead + 1);
|
||||
builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin,
|
||||
unicode::TrailSurrogateMax));
|
||||
|
||||
builder->NewAlternative();
|
||||
|
||||
builder->AddCharacter(to_lead);
|
||||
builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, to_trail));
|
||||
} else {
|
||||
builder->AddCharacter(from_lead);
|
||||
builder->AddAtom(RangeAtom(alloc, from_trail, unicode::TrailSurrogateMax));
|
||||
|
||||
builder->NewAlternative();
|
||||
|
||||
builder->AddAtom(RangeAtom(alloc, from_lead + 1, to_lead - 1));
|
||||
builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin,
|
||||
unicode::TrailSurrogateMax));
|
||||
|
||||
builder->NewAlternative();
|
||||
|
||||
builder->AddCharacter(to_lead);
|
||||
builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, to_trail));
|
||||
}
|
||||
}
|
||||
added = true;
|
||||
}
|
||||
|
||||
return builder->ToRegExp();
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
|
@ -530,9 +858,19 @@ RegExpParser<CharT>::ParseCharacterClass()
|
|||
Advance();
|
||||
}
|
||||
CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
|
||||
CharacterRangeVector* lead_ranges = nullptr;
|
||||
CharacterRangeVector* trail_ranges = nullptr;
|
||||
WideCharRangeVector* wide_ranges = nullptr;
|
||||
|
||||
if (unicode_) {
|
||||
lead_ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
|
||||
trail_ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
|
||||
wide_ranges = alloc->newInfallible<WideCharRangeVector>(*alloc);
|
||||
}
|
||||
|
||||
while (has_more() && current() != ']') {
|
||||
char16_t char_class = kNoCharClass;
|
||||
CharacterRange first;
|
||||
widechar first = 0;
|
||||
if (!ParseClassAtom(&char_class, &first))
|
||||
return nullptr;
|
||||
if (current() == '-') {
|
||||
|
@ -542,41 +880,68 @@ RegExpParser<CharT>::ParseCharacterClass()
|
|||
// following code report an error.
|
||||
break;
|
||||
} else if (current() == ']') {
|
||||
AddRangeOrEscape(alloc, ranges, char_class, first);
|
||||
if (unicode_) {
|
||||
AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges,
|
||||
char_class, first);
|
||||
} else {
|
||||
AddCharOrEscape(alloc, ranges, char_class, first);
|
||||
}
|
||||
ranges->append(CharacterRange::Singleton('-'));
|
||||
break;
|
||||
}
|
||||
char16_t char_class_2 = kNoCharClass;
|
||||
CharacterRange next;
|
||||
widechar next = 0;
|
||||
if (!ParseClassAtom(&char_class_2, &next))
|
||||
return nullptr;
|
||||
if (char_class != kNoCharClass || char_class_2 != kNoCharClass) {
|
||||
if (unicode_)
|
||||
return ReportError(JSMSG_RANGE_WITH_CLASS_ESCAPE);
|
||||
|
||||
// Either end is an escaped character class. Treat the '-' verbatim.
|
||||
AddRangeOrEscape(alloc, ranges, char_class, first);
|
||||
AddCharOrEscape(alloc, ranges, char_class, first);
|
||||
ranges->append(CharacterRange::Singleton('-'));
|
||||
AddRangeOrEscape(alloc, ranges, char_class_2, next);
|
||||
AddCharOrEscape(alloc, ranges, char_class_2, next);
|
||||
continue;
|
||||
}
|
||||
if (first.from() > next.to())
|
||||
if (first > next)
|
||||
return ReportError(JSMSG_BAD_CLASS_RANGE);
|
||||
ranges->append(CharacterRange::Range(first.from(), next.to()));
|
||||
if (unicode_)
|
||||
AddUnicodeRange(alloc, ranges, lead_ranges, trail_ranges,wide_ranges, first, next);
|
||||
else
|
||||
ranges->append(CharacterRange::Range(first, next));
|
||||
} else {
|
||||
AddRangeOrEscape(alloc, ranges, char_class, first);
|
||||
if (unicode_) {
|
||||
AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges,
|
||||
char_class, first);
|
||||
} else {
|
||||
AddCharOrEscape(alloc, ranges, char_class, first);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!has_more())
|
||||
return ReportError(JSMSG_UNTERM_CLASS);
|
||||
Advance();
|
||||
if (ranges->length() == 0) {
|
||||
ranges->append(CharacterRange::Everything());
|
||||
is_negated = !is_negated;
|
||||
if (!unicode_) {
|
||||
if (ranges->length() == 0) {
|
||||
ranges->append(CharacterRange::Everything());
|
||||
is_negated = !is_negated;
|
||||
}
|
||||
return alloc->newInfallible<RegExpCharacterClass>(ranges, is_negated);
|
||||
}
|
||||
return alloc->newInfallible<RegExpCharacterClass>(ranges, is_negated);
|
||||
|
||||
if (!is_negated && ranges->length() == 0 && lead_ranges->length() == 0 &&
|
||||
trail_ranges->length() == 0 && wide_ranges->length() == 0)
|
||||
{
|
||||
ranges->append(CharacterRange::Everything());
|
||||
return alloc->newInfallible<RegExpCharacterClass>(ranges, true);
|
||||
}
|
||||
|
||||
return UnicodeRangesAtom(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, is_negated);
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
bool
|
||||
RegExpParser<CharT>::ParseClassAtom(char16_t* char_class, CharacterRange* char_range)
|
||||
RegExpParser<CharT>::ParseClassAtom(char16_t* char_class, widechar* value)
|
||||
{
|
||||
MOZ_ASSERT(*char_class == kNoCharClass);
|
||||
widechar first = current();
|
||||
|
@ -590,13 +955,20 @@ RegExpParser<CharT>::ParseClassAtom(char16_t* char_class, CharacterRange* char_r
|
|||
case kEndMarker:
|
||||
return ReportError(JSMSG_ESCAPE_AT_END_OF_REGEXP);
|
||||
default:
|
||||
widechar c = ParseClassCharacterEscape();
|
||||
*char_range = CharacterRange::Singleton(c);
|
||||
if (!ParseClassCharacterEscape(value))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
if (unicode_) {
|
||||
char16_t lead, trail;
|
||||
if (ParseRawSurrogatePair(&lead, &trail)) {
|
||||
*value = unicode::UTF16Decode(lead, trail);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
Advance();
|
||||
*char_range = CharacterRange::Singleton(first);
|
||||
*value = first;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -185,9 +185,11 @@ class RegExpParser
|
|||
// out parameters.
|
||||
bool ParseIntervalQuantifier(int* min_out, int* max_out);
|
||||
|
||||
// Parses and returns a single escaped character. The character
|
||||
// must not be 'b' or 'B' since they are usually handled specially.
|
||||
widechar ParseClassCharacterEscape();
|
||||
// Tries to parse the input as a single escaped character. If successful
|
||||
// it stores the result in the output parameter and returns true.
|
||||
// Otherwise it throws an error and returns false. The character must not
|
||||
// be 'b' or 'B' since they are usually handled specially.
|
||||
bool ParseClassCharacterEscape(widechar* code);
|
||||
|
||||
// Checks whether the following is a length-digit hexadecimal number,
|
||||
// and sets the value if it is.
|
||||
|
@ -205,7 +207,7 @@ class RegExpParser
|
|||
// can be reparsed.
|
||||
bool ParseBackReferenceIndex(int* index_out);
|
||||
|
||||
bool ParseClassAtom(char16_t* char_class, CharacterRange* char_range);
|
||||
bool ParseClassAtom(char16_t* char_class, widechar *value);
|
||||
RegExpTree* ReportError(unsigned errorNumber);
|
||||
void Advance();
|
||||
void Advance(int dist) {
|
||||
|
|
|
@ -453,6 +453,7 @@ MSG_DEF(JSMSG_MISSING_PAREN, 0, JSEXN_SYNTAXERR, "unterminated parenth
|
|||
MSG_DEF(JSMSG_NEWREGEXP_FLAGGED, 0, JSEXN_TYPEERR, "can't supply flags when constructing one RegExp from another")
|
||||
MSG_DEF(JSMSG_NOTHING_TO_REPEAT, 0, JSEXN_SYNTAXERR, "nothing to repeat")
|
||||
MSG_DEF(JSMSG_NUMBERS_OUT_OF_ORDER, 0, JSEXN_SYNTAXERR, "numbers out of order in {} quantifier.")
|
||||
MSG_DEF(JSMSG_RANGE_WITH_CLASS_ESCAPE, 0, JSEXN_SYNTAXERR, "character class escape cannot be used in class range in regular expression")
|
||||
MSG_DEF(JSMSG_TOO_MANY_PARENS, 0, JSEXN_INTERNALERR, "too many parentheses in regular expression")
|
||||
MSG_DEF(JSMSG_UNICODE_OVERFLOW, 0, JSEXN_SYNTAXERR, "unicode codepoint should not be greater than 0x10FFFF in regular expression")
|
||||
MSG_DEF(JSMSG_UNMATCHED_RIGHT_PAREN, 0, JSEXN_SYNTAXERR, "unmatched ) in regular expression")
|
||||
|
|
|
@ -0,0 +1,236 @@
|
|||
var BUGNUMBER = 1135377;
|
||||
var summary = "Implement RegExp unicode flag -- braced pattern in RegExpUnicodeEscapeSequence in CharacterClass.";
|
||||
|
||||
print(BUGNUMBER + ": " + summary);
|
||||
|
||||
// ==== standalone ====
|
||||
|
||||
assertEqArray(/[\u{41}]/u.exec("ABC"),
|
||||
["A"]);
|
||||
|
||||
assertEqArray(/[\u{1F438}]/u.exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEq(/[\u{1F438}]/u.exec("\uD83D"),
|
||||
null);
|
||||
assertEq(/[\u{1F438}]/u.exec("\uDC38"),
|
||||
null);
|
||||
|
||||
assertEqArray(/[\u{0}]/u.exec("\u{0}"),
|
||||
["\u{0}"]);
|
||||
assertEqArray(/[\u{10FFFF}]/u.exec("\u{10FFFF}"),
|
||||
["\u{10FFFF}"]);
|
||||
assertEqArray(/[\u{10ffff}]/u.exec("\u{10FFFF}"),
|
||||
["\u{10FFFF}"]);
|
||||
|
||||
// leading 0
|
||||
assertEqArray(/[\u{0000000000000000000000}]/u.exec("\u{0}"),
|
||||
["\u{0}"]);
|
||||
assertEqArray(/[\u{000000000000000010FFFF}]/u.exec("\u{10FFFF}"),
|
||||
["\u{10FFFF}"]);
|
||||
|
||||
// RegExp constructor
|
||||
assertEqArray(new RegExp("[\\u{0}]", "u").exec("\u{0}"),
|
||||
["\u{0}"]);
|
||||
assertEqArray(new RegExp("[\\u{41}]", "u").exec("ABC"),
|
||||
["A"]);
|
||||
assertEqArray(new RegExp("[\\u{1F438}]", "u").exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEqArray(new RegExp("[\\u{10FFFF}]", "u").exec("\u{10FFFF}"),
|
||||
["\u{10FFFF}"]);
|
||||
|
||||
assertEqArray(new RegExp("[\\u{0000000000000000}]", "u").exec("\u{0}"),
|
||||
["\u{0}"]);
|
||||
|
||||
assertEqArray(eval(`/[\\u{${"0".repeat(Math.pow(2, 24)) + "1234"}}]/u`).exec("\u{1234}"),
|
||||
["\u{1234}"]);
|
||||
assertEqArray(new RegExp(`[\\u{${"0".repeat(Math.pow(2, 24)) + "1234"}}]`, "u").exec("\u{1234}"),
|
||||
["\u{1234}"]);
|
||||
|
||||
// ==== BMP + non-BMP ====
|
||||
|
||||
assertEqArray(/[A\u{1F438}]/u.exec("A\u{1F438}"),
|
||||
["A"]);
|
||||
assertEqArray(/[A\u{1F438}]/u.exec("\u{1F438}A"),
|
||||
["\u{1F438}"]);
|
||||
|
||||
// lead-only target
|
||||
assertEqArray(/[A\u{1F438}]/u.exec("\uD83DA"),
|
||||
["A"]);
|
||||
assertEq(/[A\u{1F438}]/u.exec("\uD83D"),
|
||||
null);
|
||||
|
||||
// +
|
||||
assertEqArray(/[A\u{1F438}]+/u.exec("\u{1F438}A\u{1F438}A"),
|
||||
["\u{1F438}A\u{1F438}A"]);
|
||||
|
||||
// trail surrogate + lead surrogate
|
||||
assertEqArray(/[A\u{1F438}]+/u.exec("\uD83D\uDC38A\uDC38\uD83DA"),
|
||||
["\uD83D\uDC38A"]);
|
||||
|
||||
// ==== non-BMP + non-BMP ====
|
||||
|
||||
assertEqArray(/[\u{1F418}\u{1F438}]/u.exec("\u{1F418}\u{1F438}"),
|
||||
["\u{1F418}"]);
|
||||
|
||||
assertEqArray(/[\u{1F418}\u{1F438}]+/u.exec("\u{1F418}\u{1F438}"),
|
||||
["\u{1F418}\u{1F438}"]);
|
||||
assertEqArray(/[\u{1F418}\u{1F438}]+/u.exec("\u{1F418}\uDC38\uD83D"),
|
||||
["\u{1F418}"]);
|
||||
assertEqArray(/[\u{1F418}\u{1F438}]+/u.exec("\uDC18\uD83D\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEqArray(/[\u{1F418}\u{1F438}]+/u.exec("\uDC18\u{1F438}\uD83D"),
|
||||
["\u{1F438}"]);
|
||||
|
||||
// trail surrogate + lead surrogate
|
||||
assertEq(/[\u{1F418}\u{1F438}]+/u.exec("\uDC18\uDC38\uD83D\uD83D"),
|
||||
null);
|
||||
|
||||
// ==== non-BMP + non-BMP range (from_lead == to_lead) ====
|
||||
|
||||
assertEqArray(/[\u{1F418}-\u{1F438}]/u.exec("\u{1F418}"),
|
||||
["\u{1F418}"]);
|
||||
assertEqArray(/[\u{1F418}-\u{1F438}]/u.exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEqArray(/[\u{1F418}-\u{1F438}]/u.exec("\u{1F427}"),
|
||||
["\u{1F427}"]);
|
||||
|
||||
assertEq(/[\u{1F418}-\u{1F438}]/u.exec("\u{1F417}"),
|
||||
null);
|
||||
assertEq(/[\u{1F418}-\u{1F438}]/u.exec("\u{1F439}"),
|
||||
null);
|
||||
|
||||
// ==== non-BMP + non-BMP range (from_lead + 1 == to_lead) ====
|
||||
|
||||
assertEqArray(/[\u{1F17C}-\u{1F438}]/u.exec("\uD83C\uDD7C"),
|
||||
["\uD83C\uDD7C"]);
|
||||
assertEqArray(/[\u{1F17C}-\u{1F438}]/u.exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEqArray(/[\u{1F17C}-\u{1F438}]/u.exec("\uD83C\uDF99"),
|
||||
["\uD83C\uDF99"]);
|
||||
assertEqArray(/[\u{1F17C}-\u{1F438}]/u.exec("\uD83D\uDC00"),
|
||||
["\uD83D\uDC00"]);
|
||||
|
||||
assertEq(/[\u{1F17C}-\u{1F438}]/u.exec("\uD83C\uDD7B"),
|
||||
null);
|
||||
assertEq(/[\u{1F17C}-\u{1F438}]/u.exec("\uD83C\uE000"),
|
||||
null);
|
||||
assertEq(/[\u{1F17C}-\u{1F438}]/u.exec("\uD83D\uDB99"),
|
||||
null);
|
||||
assertEq(/[\u{1F17C}-\u{1F438}]/u.exec("\uD83D\uDC39"),
|
||||
null);
|
||||
|
||||
// ==== non-BMP + non-BMP range (from_lead + 2 == to_lead) ====
|
||||
|
||||
assertEqArray(/[\u{1F17C}-\u{1F829}]/u.exec("\uD83C\uDD7C"),
|
||||
["\uD83C\uDD7C"]);
|
||||
assertEqArray(/[\u{1F17C}-\u{1F829}]/u.exec("\uD83E\uDC29"),
|
||||
["\uD83E\uDC29"]);
|
||||
|
||||
assertEqArray(/[\u{1F17C}-\u{1F829}]/u.exec("\uD83C\uDF99"),
|
||||
["\uD83C\uDF99"]);
|
||||
assertEqArray(/[\u{1F17C}-\u{1F829}]/u.exec("\uD83D\uDC00"),
|
||||
["\uD83D\uDC00"]);
|
||||
assertEqArray(/[\u{1F17C}-\u{1F829}]/u.exec("\uD83D\uDF99"),
|
||||
["\uD83D\uDF99"]);
|
||||
assertEqArray(/[\u{1F17C}-\u{1F829}]/u.exec("\uD83E\uDC00"),
|
||||
["\uD83E\uDC00"]);
|
||||
|
||||
assertEq(/[\u{1F17C}-\u{1F829}]/u.exec("\uD83C\uDD7B"),
|
||||
null);
|
||||
assertEq(/[\u{1F17C}-\u{1F829}]/u.exec("\uD83C\uE000"),
|
||||
null);
|
||||
assertEq(/[\u{1F17C}-\u{1F829}]/u.exec("\uD83D\uDB99"),
|
||||
null);
|
||||
assertEq(/[\u{1F17C}-\u{1F829}]/u.exec("\uD83D\uE000"),
|
||||
null);
|
||||
assertEq(/[\u{1F17C}-\u{1F829}]/u.exec("\uD83E\uDB99"),
|
||||
null);
|
||||
assertEq(/[\u{1F17C}-\u{1F829}]/u.exec("\uD83E\uDC30"),
|
||||
null);
|
||||
|
||||
// ==== non-BMP + non-BMP range (other) ====
|
||||
|
||||
assertEqArray(/[\u{1D164}-\u{1F438}]/u.exec("\uD834\uDD64"),
|
||||
["\uD834\uDD64"]);
|
||||
assertEqArray(/[\u{1D164}-\u{1F438}]/u.exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEqArray(/[\u{1D164}-\u{1F438}]/u.exec("\uD836\uDF99"),
|
||||
["\uD836\uDF99"]);
|
||||
assertEqArray(/[\u{1D164}-\u{1F438}]/u.exec("\uD838\uDC00"),
|
||||
["\uD838\uDC00"]);
|
||||
|
||||
assertEq(/[\u{1D164}-\u{1F438}]/u.exec("\uD834\uDD63"),
|
||||
null);
|
||||
assertEq(/[\u{1D164}-\u{1F438}]/u.exec("\uD83D\uDC39"),
|
||||
null);
|
||||
|
||||
assertEq(/[\u{1D164}-\u{1F438}]/u.exec("\uD834\uE000"),
|
||||
null);
|
||||
assertEq(/[\u{1D164}-\u{1F438}]/u.exec("\uD835\uDB99"),
|
||||
null);
|
||||
assertEq(/[\u{1D164}-\u{1F438}]/u.exec("\uD83C\uE000"),
|
||||
null);
|
||||
assertEq(/[\u{1D164}-\u{1F438}]/u.exec("\uD83D\uDB99"),
|
||||
null);
|
||||
|
||||
// ==== BMP + non-BMP range ====
|
||||
|
||||
assertEqArray(/[\u{42}-\u{1F438}]/u.exec("B"),
|
||||
["B"]);
|
||||
assertEqArray(/[\u{42}-\u{1F438}]/u.exec("C"),
|
||||
["C"]);
|
||||
assertEqArray(/[\u{42}-\u{1F438}]/u.exec("\uFFFF"),
|
||||
["\uFFFF"]);
|
||||
assertEqArray(/[\u{42}-\u{1F438}]/u.exec("\uD800\uDC00"),
|
||||
["\uD800\uDC00"]);
|
||||
|
||||
assertEqArray(/[\u{42}-\u{1F438}]/u.exec("\uD800"),
|
||||
["\uD800"]);
|
||||
assertEqArray(/[\u{42}-\u{1F438}]/u.exec("\uDBFF"),
|
||||
["\uDBFF"]);
|
||||
assertEqArray(/[\u{42}-\u{1F438}]/u.exec("\uDC00"),
|
||||
["\uDC00"]);
|
||||
assertEqArray(/[\u{42}-\u{1F438}]/u.exec("\uDFFF"),
|
||||
["\uDFFF"]);
|
||||
|
||||
assertEqArray(/[\u{42}-\u{1F438}]/u.exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/[\u{42}-\u{1F438}]/u.exec("\uDC38"),
|
||||
["\uDC38"]);
|
||||
|
||||
assertEqArray(/[\u{42}-\u{1F438}]/u.exec("\uD83D\uDBFF"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/[\u{42}-\u{1F438}]/u.exec("\uD83D\uDC00"),
|
||||
["\uD83D\uDC00"]);
|
||||
assertEqArray(/[\u{42}-\u{1F438}]/u.exec("\uD83D\uDC38"),
|
||||
["\uD83D\uDC38"]);
|
||||
assertEq(/[\u{42}-\u{1F438}]/u.exec("\uD83D\uDC39"),
|
||||
null);
|
||||
assertEq(/[\u{42}-\u{1F438}]/u.exec("\uD83D\uDFFF"),
|
||||
null);
|
||||
assertEqArray(/[\u{42}-\u{1F438}]/u.exec("\uD83D\uE000"),
|
||||
["\uD83D"]);
|
||||
|
||||
assertEq(/[\u{42}-\u{1F438}]/u.exec("A"),
|
||||
null);
|
||||
|
||||
// ==== wrong patterns ====
|
||||
|
||||
assertThrowsInstanceOf(() => eval(`/[\\u{-1}]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\u{0.0}]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\u{G}]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\u{}]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\u{{]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\u{]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\u{110000}]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\u{00110000}]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\u{100000000000000000000000000000}]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\u{FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF}]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\u{ FFFF}]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\u{FFFF }]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\u{FF FF}]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\u{F F F F}]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\u{100000001}]/u`), SyntaxError);
|
||||
|
||||
if (typeof reportCompare === "function")
|
||||
reportCompare(true, true);
|
|
@ -0,0 +1,25 @@
|
|||
var BUGNUMBER = 1135377;
|
||||
var summary = "Implement RegExp unicode flag -- empty class should not match anything.";
|
||||
|
||||
print(BUGNUMBER + ": " + summary);
|
||||
|
||||
assertEq(/[]/u.exec("A"),
|
||||
null);
|
||||
assertEq(/[]/u.exec("\uD83D"),
|
||||
null);
|
||||
assertEq(/[]/u.exec("\uDC38"),
|
||||
null);
|
||||
assertEq(/[]/u.exec("\uD83D\uDC38"),
|
||||
null);
|
||||
|
||||
assertEqArray(/[^]/u.exec("A"),
|
||||
["A"]);
|
||||
assertEqArray(/[^]/u.exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/[^]/u.exec("\uDC38"),
|
||||
["\uDC38"]);
|
||||
assertEqArray(/[^]/u.exec("\uD83D\uDC38"),
|
||||
["\uD83D\uDC38"]);
|
||||
|
||||
if (typeof reportCompare === "function")
|
||||
reportCompare(true, true);
|
|
@ -0,0 +1,142 @@
|
|||
var BUGNUMBER = 1135377;
|
||||
var summary = "Implement RegExp unicode flag -- lead and trail pattern in RegExpUnicodeEscapeSequence in CharacterClass.";
|
||||
|
||||
print(BUGNUMBER + ": " + summary);
|
||||
|
||||
// ==== standalone ====
|
||||
|
||||
assertEqArray(/[\uD83D\uDC38]/u.exec("\uD83D\uDC38"),
|
||||
["\uD83D\uDC38"]);
|
||||
assertEq(/[\uD83D\uDC38]/u.exec("\uD83D"),
|
||||
null);
|
||||
assertEq(/[\uD83D\uDC38]/u.exec("\uDC38"),
|
||||
null);
|
||||
|
||||
// no unicode flag
|
||||
assertEqArray(/[\uD83D\uDC38]/.exec("\uD83D\uDC38"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/[\uD83D\uDC38]/.exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/[\uD83D\uDC38]/.exec("\uDC38"),
|
||||
["\uDC38"]);
|
||||
|
||||
// RegExp constructor
|
||||
assertEqArray(new RegExp("[\uD83D\uDC38]", "u").exec("\uD83D\uDC38"),
|
||||
["\uD83D\uDC38"]);
|
||||
assertEq(new RegExp("[\uD83D\uDC38]", "u").exec("\uD83D"),
|
||||
null);
|
||||
assertEq(new RegExp("[\uD83D\uDC38]", "u").exec("\uDC38"),
|
||||
null);
|
||||
|
||||
// RegExp constructor, no unicode flag
|
||||
assertEqArray(new RegExp("[\uD83D\uDC38]", "").exec("\uD83D\uDC38"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(new RegExp("[\uD83D\uDC38]", "").exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(new RegExp("[\uD83D\uDC38]", "").exec("\uDC38"),
|
||||
["\uDC38"]);
|
||||
|
||||
// ==== lead-only ====
|
||||
|
||||
// match only non-surrogate pair
|
||||
assertEqArray(/[\uD83D]/u.exec("\uD83D\uDBFF"),
|
||||
["\uD83D"]);
|
||||
assertEq(/[\uD83D]/u.exec("\uD83D\uDC00"),
|
||||
null);
|
||||
assertEq(/[\uD83D]/u.exec("\uD83D\uDFFF"),
|
||||
null);
|
||||
assertEqArray(/[\uD83D]/u.exec("\uD83D\uE000"),
|
||||
["\uD83D"]);
|
||||
|
||||
// match before non-tail char
|
||||
assertEqArray(/[\uD83D]/u.exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/[\uD83D]/u.exec("\uD83DA"),
|
||||
["\uD83D"]);
|
||||
|
||||
// no unicode flag
|
||||
assertEqArray(/[\uD83D]/.exec("\uD83D\uDBFF"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/[\uD83D]/.exec("\uD83D\uDC00"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/[\uD83D]/.exec("\uD83D\uDFFF"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/[\uD83D]/.exec("\uD83D\uE000"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/[\uD83D]/.exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/[\uD83D]/.exec("\uD83DA"),
|
||||
["\uD83D"]);
|
||||
|
||||
// ==== trail-only ====
|
||||
|
||||
// match only non-surrogate pair
|
||||
assertEqArray(/[\uDC38]/u.exec("\uD7FF\uDC38"),
|
||||
["\uDC38"]);
|
||||
assertEq(/[\uDC38]/u.exec("\uD800\uDC38"),
|
||||
null);
|
||||
assertEq(/[\uDC38]/u.exec("\uDBFF\uDC38"),
|
||||
null);
|
||||
assertEqArray(/[\uDC38]/u.exec("\uDC00\uDC38"),
|
||||
["\uDC38"]);
|
||||
|
||||
// match after non-lead char
|
||||
assertEqArray(/[\uDC38]/u.exec("\uDC38"),
|
||||
["\uDC38"]);
|
||||
assertEqArray(/[\uDC38]/u.exec("A\uDC38"),
|
||||
["\uDC38"]);
|
||||
|
||||
// no unicode flag
|
||||
assertEqArray(/[\uDC38]/.exec("\uD7FF\uDC38"),
|
||||
["\uDC38"]);
|
||||
assertEqArray(/[\uDC38]/.exec("\uD800\uDC38"),
|
||||
["\uDC38"]);
|
||||
assertEqArray(/[\uDC38]/.exec("\uDBFF\uDC38"),
|
||||
["\uDC38"]);
|
||||
assertEqArray(/[\uDC38]/.exec("\uDC00\uDC38"),
|
||||
["\uDC38"]);
|
||||
assertEqArray(/[\uDC38]/.exec("\uDC38"),
|
||||
["\uDC38"]);
|
||||
assertEqArray(/[\uDC38]/.exec("A\uDC38"),
|
||||
["\uDC38"]);
|
||||
|
||||
// ==== invalid trail ====
|
||||
|
||||
assertEqArray(/[\uD83D\u3042]*/u.exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/[\uD83D\u3042]*/u.exec("\uD83D\u3042"),
|
||||
["\uD83D\u3042"]);
|
||||
assertEqArray(/[\uD83D\u3042]*/u.exec("\uD83D\u3042\u3042\uD83D"),
|
||||
["\uD83D\u3042\u3042\uD83D"]);
|
||||
|
||||
assertEqArray(/[\uD83D\u{3042}]*/u.exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/[\uD83D\u{3042}]*/u.exec("\uD83D\u3042"),
|
||||
["\uD83D\u3042"]);
|
||||
assertEqArray(/[\uD83D\u{3042}]*/u.exec("\uD83D\u3042\u3042\uD83D"),
|
||||
["\uD83D\u3042\u3042\uD83D"]);
|
||||
|
||||
assertEqArray(/[\uD83DA]*/u.exec("\uD83D"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/[\uD83DA]*/u.exec("\uD83DA"),
|
||||
["\uD83DA"]);
|
||||
assertEqArray(/[\uD83DA]*/u.exec("\uD83DAA\uD83D"),
|
||||
["\uD83DAA\uD83D"]);
|
||||
|
||||
// ==== wrong patterns ====
|
||||
|
||||
assertThrowsInstanceOf(() => eval(`/[\\u]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\u0]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\u00]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\u000]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\u000G]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\u0.00]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\uD83D\\u]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\uD83D\\u0]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\uD83D\\u00]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\uD83D\\u000]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\uD83D\\u000G]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\uD83D\\u0.00]/u`), SyntaxError);
|
||||
|
||||
if (typeof reportCompare === "function")
|
||||
reportCompare(true, true);
|
|
@ -0,0 +1,64 @@
|
|||
var BUGNUMBER = 1135377;
|
||||
var summary = "Implement RegExp unicode flag -- negated CharacterClass.";
|
||||
|
||||
print(BUGNUMBER + ": " + summary);
|
||||
|
||||
// ==== BMP ====
|
||||
|
||||
assertEqArray(/[^A]/u.exec("ABC"),
|
||||
["B"]);
|
||||
assertEqArray(/[^A]/u.exec("A\u{1F438}C"),
|
||||
["\u{1F438}"]);
|
||||
assertEqArray(/[^A]/u.exec("A\uD83DC"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/[^A]/u.exec("A\uDC38C"),
|
||||
["\uDC38"]);
|
||||
|
||||
assertEqArray(/[^\uE000]/u.exec("\uE000\uE001"),
|
||||
["\uE001"]);
|
||||
assertEqArray(/[^\uE000]/u.exec("\uE000\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
assertEqArray(/[^\uE000]/u.exec("\uE000\uD83D"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/[^\uE000]/u.exec("\uE000\uDC38"),
|
||||
["\uDC38"]);
|
||||
|
||||
// ==== non-BMP ====
|
||||
|
||||
assertEqArray(/[^\u{1F438}]/u.exec("\u{1F438}A"),
|
||||
["A"]);
|
||||
assertEqArray(/[^\u{1F438}]/u.exec("\u{1F438}\u{1F439}"),
|
||||
["\u{1F439}"]);
|
||||
assertEqArray(/[^\u{1F438}]/u.exec("\u{1F438}\uD83D"),
|
||||
["\uD83D"]);
|
||||
assertEqArray(/[^\u{1F438}]/u.exec("\u{1F438}\uDC38"),
|
||||
["\uDC38"]);
|
||||
|
||||
// ==== lead-only ====
|
||||
|
||||
assertEqArray(/[^\uD83D]/u.exec("\u{1F438}A"),
|
||||
["\u{1F438}"]);
|
||||
assertEqArray(/[^\uD83D]/u.exec("\uD83D\uDBFF"),
|
||||
["\uDBFF"]);
|
||||
assertEqArray(/[^\uD83D]/u.exec("\uD83D\uDC00"),
|
||||
["\uD83D\uDC00"]);
|
||||
assertEqArray(/[^\uD83D]/u.exec("\uD83D\uDFFF"),
|
||||
["\uD83D\uDFFF"]);
|
||||
assertEqArray(/[^\uD83D]/u.exec("\uD83D\uE000"),
|
||||
["\uE000"]);
|
||||
|
||||
// ==== trail-only ====
|
||||
|
||||
assertEqArray(/[^\uDC38]/u.exec("\u{1F438}A"),
|
||||
["\u{1F438}"]);
|
||||
assertEqArray(/[^\uDC38]/u.exec("\uD7FF\uDC38"),
|
||||
["\uD7FF"]);
|
||||
assertEqArray(/[^\uDC38]/u.exec("\uD800\uDC38"),
|
||||
["\uD800\uDC38"]);
|
||||
assertEqArray(/[^\uDC38]/u.exec("\uDBFF\uDC38"),
|
||||
["\uDBFF\uDC38"]);
|
||||
assertEqArray(/[^\uDC38]/u.exec("\uDC00\uDC38"),
|
||||
["\uDC00"]);
|
||||
|
||||
if (typeof reportCompare === "function")
|
||||
reportCompare(true, true);
|
|
@ -0,0 +1,28 @@
|
|||
var BUGNUMBER = 1135377;
|
||||
var summary = "Implement RegExp unicode flag -- disallow range with CharacterClassEscape.";
|
||||
|
||||
print(BUGNUMBER + ": " + summary);
|
||||
|
||||
assertThrowsInstanceOf(() => eval(`/[\\w-\\uFFFF]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\W-\\uFFFF]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\d-\\uFFFF]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\D-\\uFFFF]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\s-\\uFFFF]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\S-\\uFFFF]/u`), SyntaxError);
|
||||
|
||||
assertThrowsInstanceOf(() => eval(`/[\\uFFFF-\\w]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\uFFFF-\\W]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\uFFFF-\\d]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\uFFFF-\\D]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\uFFFF-\\s]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\uFFFF-\\S]/u`), SyntaxError);
|
||||
|
||||
assertThrowsInstanceOf(() => eval(`/[\\w-\\w]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\W-\\W]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\d-\\d]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\D-\\D]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\s-\\s]/u`), SyntaxError);
|
||||
assertThrowsInstanceOf(() => eval(`/[\\S-\\S]/u`), SyntaxError);
|
||||
|
||||
if (typeof reportCompare === "function")
|
||||
reportCompare(true, true);
|
|
@ -0,0 +1,65 @@
|
|||
var BUGNUMBER = 1135377;
|
||||
var summary = "Implement RegExp unicode flag -- raw unicode.";
|
||||
|
||||
print(BUGNUMBER + ": " + summary);
|
||||
|
||||
// ==== standalone ====
|
||||
|
||||
assertEqArray(eval(`/[\uD83D\uDC38]/u`).exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
|
||||
// no unicode flag
|
||||
assertEqArray(eval(`/[\uD83D\uDC38]/`).exec("\u{1F438}"),
|
||||
["\uD83D"]);
|
||||
|
||||
// escaped (lead)
|
||||
assertEq(eval(`/[\\uD83D\uDC38]/u`).exec("\u{1F438}"),
|
||||
null);
|
||||
assertEq(eval(`/[\\u{D83D}\uDC38]/u`).exec("\u{1F438}"),
|
||||
null);
|
||||
|
||||
// escaped (trail)
|
||||
assertEq(eval(`/[\uD83D\\uDC38]/u`).exec("\u{1F438}"),
|
||||
null);
|
||||
assertEq(eval(`/[\uD83D\\u{DC38}]/u`).exec("\u{1F438}"),
|
||||
null);
|
||||
|
||||
// escaped (lead), no unicode flag
|
||||
assertEqArray(eval(`/[\\uD83D\uDC38]/`).exec("\u{1F438}"),
|
||||
["\uD83D"]);
|
||||
|
||||
// escaped (trail), no unicode flag
|
||||
assertEqArray(eval(`/[\uD83D\\uDC38]/`).exec("\u{1F438}"),
|
||||
["\uD83D"]);
|
||||
|
||||
// ==== RegExp constructor ====
|
||||
|
||||
assertEqArray(new RegExp("[\uD83D\uDC38]", "u").exec("\u{1F438}"),
|
||||
["\u{1F438}"]);
|
||||
|
||||
// no unicode flag
|
||||
assertEqArray(new RegExp("[\uD83D\uDC38]", "").exec("\u{1F438}"),
|
||||
["\uD83D"]);
|
||||
|
||||
// escaped(lead)
|
||||
assertEq(new RegExp("[\\uD83D\uDC38]", "u").exec("\u{1F438}"),
|
||||
null);
|
||||
assertEq(new RegExp("[\\u{D83D}\uDC38]", "u").exec("\u{1F438}"),
|
||||
null);
|
||||
|
||||
// escaped(trail)
|
||||
assertEq(new RegExp("[\uD83D\\uDC38]", "u").exec("\u{1F438}"),
|
||||
null);
|
||||
assertEq(new RegExp("[\uD83D\\u{DC38}]", "u").exec("\u{1F438}"),
|
||||
null);
|
||||
|
||||
// escaped(lead), no unicode flag
|
||||
assertEqArray(new RegExp("[\\uD83D\uDC38]", "").exec("\u{1F438}"),
|
||||
["\uD83D"]);
|
||||
|
||||
// escaped(trail), no unicode flag
|
||||
assertEqArray(new RegExp("[\uD83D\\uDC38]", "").exec("\u{1F438}"),
|
||||
["\uD83D"]);
|
||||
|
||||
if (typeof reportCompare === "function")
|
||||
reportCompare(true, true);
|
|
@ -29,11 +29,11 @@ namespace js {
|
|||
*
|
||||
* https://developer.mozilla.org/en-US/docs/SpiderMonkey/Internals/Bytecode
|
||||
*/
|
||||
static const uint32_t XDR_BYTECODE_VERSION_SUBTRAHEND = 335;
|
||||
static const uint32_t XDR_BYTECODE_VERSION_SUBTRAHEND = 336;
|
||||
static const uint32_t XDR_BYTECODE_VERSION =
|
||||
uint32_t(0xb973c0de - XDR_BYTECODE_VERSION_SUBTRAHEND);
|
||||
|
||||
static_assert(JSErr_Limit == 424,
|
||||
static_assert(JSErr_Limit == 425,
|
||||
"GREETINGS, POTENTIAL SUBTRAHEND INCREMENTER! If you added or "
|
||||
"removed MSG_DEFs from js.msg, you should increment "
|
||||
"XDR_BYTECODE_VERSION_SUBTRAHEND and update this assertion's "
|
||||
|
|
Загрузка…
Ссылка в новой задаче