зеркало из https://github.com/microsoft/git.git
Merge branch 'cb/grep-pcre-ucp'
"grep -P" learned to use Unicode Character Property to grok character classes when processing \b and \w etc. * cb/grep-pcre-ucp: grep: correctly identify utf-8 characters with \{b,w} in -P
This commit is contained in:
Коммит
557d93a146
2
grep.c
2
grep.c
|
@ -293,7 +293,7 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
|
|||
options |= PCRE2_CASELESS;
|
||||
}
|
||||
if (!opt->ignore_locale && is_utf8_locale() && !literal)
|
||||
options |= (PCRE2_UTF | PCRE2_MATCH_INVALID_UTF);
|
||||
options |= (PCRE2_UTF | PCRE2_UCP | PCRE2_MATCH_INVALID_UTF);
|
||||
|
||||
#ifndef GIT_PCRE2_VERSION_10_36_OR_HIGHER
|
||||
/* Work around https://bugs.exim.org/show_bug.cgi?id=2642 fixed in 10.36 */
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
#!/bin/sh
|
||||
|
||||
test_description="git-grep's perl regex
|
||||
|
||||
If GIT_PERF_GREP_THREADS is set to a list of threads (e.g. '1 4 8'
|
||||
etc.) we will test the patterns under those numbers of threads.
|
||||
"
|
||||
|
||||
. ./perf-lib.sh
|
||||
|
||||
test_perf_large_repo
|
||||
test_checkout_worktree
|
||||
|
||||
if test -n "$GIT_PERF_GREP_THREADS"
|
||||
then
|
||||
test_set_prereq PERF_GREP_ENGINES_THREADS
|
||||
fi
|
||||
|
||||
for pattern in \
|
||||
'\\bhow' \
|
||||
'\\bÆvar' \
|
||||
'\\d+ \\bÆvar' \
|
||||
'\\bBelón\\b' \
|
||||
'\\w{12}\\b'
|
||||
do
|
||||
echo '$pattern' >pat
|
||||
if ! test_have_prereq PERF_GREP_ENGINES_THREADS
|
||||
then
|
||||
test_perf "grep -P '$pattern'" --prereq PCRE "
|
||||
git -P grep -f pat || :
|
||||
"
|
||||
else
|
||||
for threads in $GIT_PERF_GREP_THREADS
|
||||
do
|
||||
test_perf "grep -P '$pattern' with $threads threads" --prereq PTHREADS,PCRE "
|
||||
git -c grep.threads=$threads -P grep -f pat || :
|
||||
"
|
||||
done
|
||||
fi
|
||||
done
|
||||
|
||||
test_done
|
Загрузка…
Ссылка в новой задаче