<regex>: Fixes regex character class case insensitive search problem (#1503)

Co-authored-by: Stephan T. Lavavej <stl@microsoft.com>
This commit is contained in:
Hamid Reza Arzaghi 2020-12-17 03:57:14 +03:30 коммит произвёл GitHub
Родитель d01fd4b73b
Коммит 628544cf61
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 105 добавлений и 5 удалений

Просмотреть файл

@ -3688,19 +3688,23 @@ _BidIt _Matcher<_BidIt, _Elem, _RxTraits, _It>::_Skip(_BidIt _First_arg, _BidIt
case _N_class:
{ // check for string match
for (; _First_arg != _Last; ++_First_arg) { // look for starting match
using _Uelem = typename _RxTraits::_Uelem;
bool _Found;
auto _Ch = static_cast<typename _RxTraits::_Uelem>(*_First_arg);
auto _Ch = static_cast<_Uelem>(*_First_arg);
_Node_class<_Elem, _RxTraits>* _Node = static_cast<_Node_class<_Elem, _RxTraits>*>(_Nx);
_It _Next = _First_arg;
++_Next;
if (_Sflags & regex_constants::icase) {
_Ch = static_cast<_Uelem>(_Traits.translate_nocase(static_cast<_Elem>(_Ch)));
}
if (_Node->_Coll && _Lookup_coll(_First_arg, _Next, _Node->_Coll) != _First_arg) {
_Found = true;
} else if (_Node->_Ranges
&& (_Lookup_range(
static_cast<typename _RxTraits::_Uelem>(
_Sflags & regex_constants::collate ? _Traits.translate(static_cast<_Elem>(_Ch))
: static_cast<_Elem>(_Ch)),
&& (_Lookup_range(static_cast<_Uelem>(_Sflags & regex_constants::collate
? _Traits.translate(static_cast<_Elem>(_Ch))
: static_cast<_Elem>(_Ch)),
_Node->_Ranges))) {
_Found = true;
} else if (_Ch < _Bmp_max) {

Просмотреть файл

@ -240,3 +240,64 @@ public:
}
}
};
class test_wregex {
regex_fixture* const fixture;
const std::wstring pattern;
const std::regex_constants::syntax_option_type syntax;
const std::wregex r;
public:
test_wregex(regex_fixture* fixture, const std::wstring& pattern,
std::regex_constants::syntax_option_type syntax = std::regex_constants::ECMAScript)
: fixture(fixture), pattern(pattern), syntax(syntax), r(pattern, syntax) {}
test_wregex(const test_wregex&) = delete;
test_wregex& operator=(const test_wregex&) = delete;
void should_search_match(const std::wstring& subject, const std::wstring& expected,
const std::regex_constants::match_flag_type match_flags = std::regex_constants::match_default) const {
std::wsmatch mr;
try {
const bool search_result = std::regex_search(subject, mr, r, match_flags);
if (!search_result || mr[0] != expected) {
wprintf(LR"(Expected regex_search("%s", regex("%s", 0x%X), 0x%X) to find "%s", )", subject.c_str(),
pattern.c_str(), static_cast<unsigned int>(syntax), static_cast<unsigned int>(match_flags),
expected.c_str());
if (search_result) {
wprintf(LR"(but it matched "%s")"
"\n",
mr.str().c_str());
} else {
puts("but it failed to match");
}
fixture->fail_regex();
}
} catch (const std::regex_error& e) {
wprintf(LR"(Failed to regex_search("%s", regex("%s", 0x%X), 0x%X): regex_error: )", subject.c_str(),
pattern.c_str(), static_cast<unsigned int>(syntax), static_cast<unsigned int>(match_flags));
printf("\"%s\"\n", e.what());
fixture->fail_regex();
}
}
void should_search_fail(const std::wstring& subject,
const std::regex_constants::match_flag_type match_flags = std::regex_constants::match_default) const {
std::wsmatch mr;
try {
if (std::regex_search(subject, mr, r, match_flags)) {
wprintf(LR"(Expected regex_search("%s", regex("%s", 0x%X), 0x%X) to not match, but it found "%s")"
"\n",
subject.c_str(), pattern.c_str(), static_cast<unsigned int>(syntax),
static_cast<unsigned int>(match_flags), mr.str().c_str());
fixture->fail_regex();
}
} catch (const std::regex_error& e) {
wprintf(LR"(Failed to regex_search("%s", regex("%s", 0x%X), 0x%X): regex_error: )", subject.c_str(),
pattern.c_str(), static_cast<unsigned int>(syntax), static_cast<unsigned int>(match_flags));
printf("\"%s\"\n", e.what());
fixture->fail_regex();
}
}
};

Просмотреть файл

@ -546,6 +546,40 @@ void test_VSO_226914_word_boundaries() {
aWordAny.should_search_fail("aa", match_not_bow | match_not_eow);
}
void test_GH_993_regex_character_class_case_insensitive_search() {
{
const wstring subject = L" Copyright";
const test_wregex case_regex(&g_regexTester, LR"([a-z][a-z])", ECMAScript);
const test_wregex icase_regex(&g_regexTester, LR"([a-z][a-z])", ECMAScript | icase);
case_regex.should_search_match(subject, L"op");
icase_regex.should_search_match(subject, L"Co");
}
{
const wstring subject = L"blahZblah";
const test_wregex Z_case_regex(&g_regexTester, LR"([Z])", ECMAScript);
const test_wregex Z_icase_regex(&g_regexTester, LR"([Z])", ECMAScript | icase);
const test_wregex z_case_regex(&g_regexTester, LR"([z])", ECMAScript);
const test_wregex z_icase_regex(&g_regexTester, LR"([z])", ECMAScript | icase);
Z_case_regex.should_search_match(subject, L"Z");
Z_icase_regex.should_search_match(subject, L"Z");
z_icase_regex.should_search_match(subject, L"Z");
z_case_regex.should_search_fail(subject);
z_case_regex.should_search_fail(subject, match_not_bow);
z_case_regex.should_search_fail(subject, match_not_eow);
z_case_regex.should_search_fail(subject, match_not_bow | match_not_eow);
const wstring lowercase_subject = L"hungry_zombies";
Z_case_regex.should_search_fail(lowercase_subject);
Z_icase_regex.should_search_match(lowercase_subject, L"z");
z_case_regex.should_search_match(lowercase_subject, L"z");
z_icase_regex.should_search_match(lowercase_subject, L"z");
}
}
int main() {
test_dev10_449367_case_insensitivity_should_work();
test_dev11_462743_regex_collate_should_not_disable_regex_icase();
@ -572,6 +606,7 @@ int main() {
test_VSO_225160_match_bol_flag();
test_VSO_225160_match_eol_flag();
test_VSO_226914_word_boundaries();
test_GH_993_regex_character_class_case_insensitive_search();
return g_regexTester.result();
}