From e7f3925bed86edf1b79fd18e5600252e445019d1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 19 Nov 2021 09:06:36 -0800 Subject: [PATCH] Revert "grep/pcre2: fix an edge case concerning ascii patterns and UTF-8 data" This reverts commit ae39ba431ab861548eb60b4bd2e1d8b8813db76f, as it breaks "grep" when looking for a string in non UTF-8 haystack, when linked with certain versions of PCREv2 library. Signed-off-by: Junio C Hamano --- grep.c | 6 ++--- t/t7812-grep-icase-non-ascii.sh | 48 --------------------------------- 2 files changed, 2 insertions(+), 52 deletions(-) diff --git a/grep.c b/grep.c index f6e113e9f0..fe847a0111 100644 --- a/grep.c +++ b/grep.c @@ -382,10 +382,8 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt } options |= PCRE2_CASELESS; } - if ((!opt->ignore_locale && !has_non_ascii(p->pattern)) || - (!opt->ignore_locale && is_utf8_locale() && - has_non_ascii(p->pattern) && !(!opt->ignore_case && - (p->fixed || p->is_fixed)))) + if (!opt->ignore_locale && is_utf8_locale() && has_non_ascii(p->pattern) && + !(!opt->ignore_case && (p->fixed || p->is_fixed))) options |= (PCRE2_UTF | PCRE2_MATCH_INVALID_UTF); #ifdef GIT_PCRE2_VERSION_10_36_OR_HIGHER diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh index 22487d90fd..e5d1e4ea68 100755 --- a/t/t7812-grep-icase-non-ascii.sh +++ b/t/t7812-grep-icase-non-ascii.sh @@ -53,54 +53,6 @@ test_expect_success REGEX_LOCALE 'pickaxe -i on non-ascii' ' test_cmp expected actual ' -test_expect_success GETTEXT_LOCALE,PCRE 'log --author with an ascii pattern on UTF-8 data' ' - cat >expected <<-\EOF && - Author: À Ú Thor - EOF - test_write_lines "forth" >file4 && - git add file4 && - git commit --author="À Ú Thor " -m sécond && - git log -1 --color=always --perl-regexp --author=".*Thor" >log && - grep Author log >actual.raw && - test_decode_color actual && - test_cmp expected actual -' - -test_expect_success GETTEXT_LOCALE,PCRE 'log --committer with an ascii pattern on ISO-8859-1 data' ' - cat >expected <<-\EOF && - Commit: Ç O Mîtter - EOF - test_write_lines "fifth" >file5 && - git add file5 && - GIT_COMMITTER_NAME="Ç O Mîtter" && - GIT_COMMITTER_EMAIL="committer@example.com" && - git -c i18n.commitEncoding=latin1 commit -m thïrd && - git -c i18n.logOutputEncoding=latin1 log -1 --pretty=fuller --color=always --perl-regexp --committer=" O.*" >log && - grep Commit: log >actual.raw && - test_decode_color actual && - test_cmp expected actual -' - -test_expect_success GETTEXT_LOCALE,PCRE 'log --grep with an ascii pattern on UTF-8 data' ' - cat >expected <<-\EOF && - sécond - EOF - git log -1 --color=always --perl-regexp --grep="con" >log && - grep con log >actual.raw && - test_decode_color actual && - test_cmp expected actual -' - -test_expect_success GETTEXT_LOCALE,PCRE 'log --grep with an ascii pattern on ISO-8859-1 data' ' - cat >expected <<-\EOF && - thïrd - EOF - git -c i18n.logOutputEncoding=latin1 log -1 --color=always --perl-regexp --grep="th.*rd" >log && - grep "th.*rd" log >actual.raw && - test_decode_color actual && - test_cmp expected actual -' - test_expect_success GETTEXT_LOCALE,LIBPCRE2 'PCRE v2: setup invalid UTF-8 data' ' printf "\\200\\n" >invalid-0x80 && echo "ævar" >expected &&