зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1338373 - Update word boundary in RegExp with unicode and ignoreCase flags to include U+017F and U+212A. r=till
This commit is contained in:
Родитель
f11ca4d10d
Коммит
4bdc66dec5
|
@ -2254,7 +2254,10 @@ void
|
||||||
BoyerMoorePositionInfo::SetInterval(const Interval& interval)
|
BoyerMoorePositionInfo::SetInterval(const Interval& interval)
|
||||||
{
|
{
|
||||||
s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval);
|
s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval);
|
||||||
w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);
|
if (unicode_ignore_case_)
|
||||||
|
w_ = AddRange(w_, kIgnoreCaseWordRanges, kIgnoreCaseWordRangeCount, interval);
|
||||||
|
else
|
||||||
|
w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);
|
||||||
d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval);
|
d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval);
|
||||||
surrogate_ =
|
surrogate_ =
|
||||||
AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval);
|
AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval);
|
||||||
|
@ -2291,11 +2294,12 @@ BoyerMoorePositionInfo::SetAll()
|
||||||
BoyerMooreLookahead::BoyerMooreLookahead(LifoAlloc* alloc, size_t length, RegExpCompiler* compiler)
|
BoyerMooreLookahead::BoyerMooreLookahead(LifoAlloc* alloc, size_t length, RegExpCompiler* compiler)
|
||||||
: length_(length), compiler_(compiler), bitmaps_(*alloc)
|
: length_(length), compiler_(compiler), bitmaps_(*alloc)
|
||||||
{
|
{
|
||||||
|
bool unicode_ignore_case = compiler->unicode() && compiler->ignore_case();
|
||||||
max_char_ = MaximumCharacter(compiler->ascii());
|
max_char_ = MaximumCharacter(compiler->ascii());
|
||||||
|
|
||||||
bitmaps_.reserve(length);
|
bitmaps_.reserve(length);
|
||||||
for (size_t i = 0; i < length; i++)
|
for (size_t i = 0; i < length; i++)
|
||||||
bitmaps_.append(alloc->newInfallible<BoyerMoorePositionInfo>(alloc));
|
bitmaps_.append(alloc->newInfallible<BoyerMoorePositionInfo>(alloc, unicode_ignore_case));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find the longest range of lookahead that has the fewest number of different
|
// Find the longest range of lookahead that has the fewest number of different
|
||||||
|
@ -2961,15 +2965,22 @@ EmitNotInSurrogatePair(RegExpCompiler* compiler, RegExpNode* on_success, Trace*
|
||||||
// Check for [0-9A-Z_a-z].
|
// Check for [0-9A-Z_a-z].
|
||||||
static void
|
static void
|
||||||
EmitWordCheck(RegExpMacroAssembler* assembler,
|
EmitWordCheck(RegExpMacroAssembler* assembler,
|
||||||
jit::Label* word, jit::Label* non_word, bool fall_through_on_word)
|
jit::Label* word, jit::Label* non_word, bool fall_through_on_word,
|
||||||
|
bool unicode_ignore_case)
|
||||||
{
|
{
|
||||||
if (assembler->CheckSpecialCharacterClass(fall_through_on_word ? 'w' : 'W',
|
if (!unicode_ignore_case &&
|
||||||
|
assembler->CheckSpecialCharacterClass(fall_through_on_word ? 'w' : 'W',
|
||||||
fall_through_on_word ? non_word : word))
|
fall_through_on_word ? non_word : word))
|
||||||
{
|
{
|
||||||
// Optimized implementation available.
|
// Optimized implementation available.
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (unicode_ignore_case) {
|
||||||
|
assembler->CheckCharacter(0x017F, word);
|
||||||
|
assembler->CheckCharacter(0x212A, word);
|
||||||
|
}
|
||||||
|
|
||||||
assembler->CheckCharacterGT('z', non_word);
|
assembler->CheckCharacterGT('z', non_word);
|
||||||
assembler->CheckCharacterLT('0', non_word);
|
assembler->CheckCharacterLT('0', non_word);
|
||||||
assembler->CheckCharacterGT('a' - 1, word);
|
assembler->CheckCharacterGT('a' - 1, word);
|
||||||
|
@ -3018,7 +3029,8 @@ AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace)
|
||||||
assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
|
assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
|
||||||
}
|
}
|
||||||
// Fall through on non-word.
|
// Fall through on non-word.
|
||||||
EmitWordCheck(assembler, &before_word, &before_non_word, false);
|
EmitWordCheck(assembler, &before_word, &before_non_word, false,
|
||||||
|
compiler->unicode() && compiler->ignore_case());
|
||||||
// Next character is not a word character.
|
// Next character is not a word character.
|
||||||
assembler->Bind(&before_non_word);
|
assembler->Bind(&before_non_word);
|
||||||
jit::Label ok;
|
jit::Label ok;
|
||||||
|
@ -3058,7 +3070,8 @@ AssertionNode::BacktrackIfPrevious(RegExpCompiler* compiler,
|
||||||
// We already checked that we are not at the start of input so it must be
|
// We already checked that we are not at the start of input so it must be
|
||||||
// OK to load the previous character.
|
// OK to load the previous character.
|
||||||
assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false);
|
assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false);
|
||||||
EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);
|
EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord,
|
||||||
|
compiler->unicode() && compiler->ignore_case());
|
||||||
|
|
||||||
assembler->Bind(&fall_through);
|
assembler->Bind(&fall_through);
|
||||||
on_success()->Emit(compiler, &new_trace);
|
on_success()->Emit(compiler, &new_trace);
|
||||||
|
|
|
@ -1195,13 +1195,14 @@ AddRange(ContainedInLattice a,
|
||||||
class BoyerMoorePositionInfo
|
class BoyerMoorePositionInfo
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
explicit BoyerMoorePositionInfo(LifoAlloc* alloc)
|
explicit BoyerMoorePositionInfo(LifoAlloc* alloc, bool unicode_ignore_case)
|
||||||
: map_(*alloc),
|
: map_(*alloc),
|
||||||
map_count_(0),
|
map_count_(0),
|
||||||
w_(kNotYet),
|
w_(kNotYet),
|
||||||
s_(kNotYet),
|
s_(kNotYet),
|
||||||
d_(kNotYet),
|
d_(kNotYet),
|
||||||
surrogate_(kNotYet)
|
surrogate_(kNotYet),
|
||||||
|
unicode_ignore_case_(unicode_ignore_case)
|
||||||
{
|
{
|
||||||
map_.reserve(kMapSize);
|
map_.reserve(kMapSize);
|
||||||
for (int i = 0; i < kMapSize; i++)
|
for (int i = 0; i < kMapSize; i++)
|
||||||
|
@ -1228,6 +1229,9 @@ class BoyerMoorePositionInfo
|
||||||
ContainedInLattice s_; // The \s character class.
|
ContainedInLattice s_; // The \s character class.
|
||||||
ContainedInLattice d_; // The \d character class.
|
ContainedInLattice d_; // The \d character class.
|
||||||
ContainedInLattice surrogate_; // Surrogate UTF-16 code units.
|
ContainedInLattice surrogate_; // Surrogate UTF-16 code units.
|
||||||
|
|
||||||
|
// True if the RegExp has unicode and ignoreCase flags.
|
||||||
|
bool unicode_ignore_case_;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef InfallibleVector<BoyerMoorePositionInfo*, 1> BoyerMoorePositionInfoVector;
|
typedef InfallibleVector<BoyerMoorePositionInfo*, 1> BoyerMoorePositionInfoVector;
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
var BUGNUMBER = 1338373;
|
||||||
|
var summary = "Word boundary should match U+017F and U+212A in unicode+ignoreCase.";
|
||||||
|
|
||||||
|
assertEq(/\b/iu.test('\u017F'), true);
|
||||||
|
assertEq(/\b/i.test('\u017F'), false);
|
||||||
|
assertEq(/\b/u.test('\u017F'), false);
|
||||||
|
assertEq(/\b/.test('\u017F'), false);
|
||||||
|
|
||||||
|
assertEq(/\b/iu.test('\u212A'), true);
|
||||||
|
assertEq(/\b/i.test('\u212A'), false);
|
||||||
|
assertEq(/\b/u.test('\u212A'), false);
|
||||||
|
assertEq(/\b/.test('\u212A'), false);
|
||||||
|
|
||||||
|
assertEq(/\B/iu.test('\u017F'), false);
|
||||||
|
assertEq(/\B/i.test('\u017F'), true);
|
||||||
|
assertEq(/\B/u.test('\u017F'), true);
|
||||||
|
assertEq(/\B/.test('\u017F'), true);
|
||||||
|
|
||||||
|
assertEq(/\B/iu.test('\u212A'), false);
|
||||||
|
assertEq(/\B/i.test('\u212A'), true);
|
||||||
|
assertEq(/\B/u.test('\u212A'), true);
|
||||||
|
assertEq(/\B/.test('\u212A'), true);
|
||||||
|
|
||||||
|
if (typeof reportCompare === "function")
|
||||||
|
reportCompare(true, true);
|
Загрузка…
Ссылка в новой задаче