From 6d423dd542f062a9696c210c33cf33e0657ab923 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 25 May 2017 20:05:22 +0000 Subject: [PATCH 1/8] grep: don't redundantly compile throwaway patterns under threading MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the pattern compilation logic under threading so that grep doesn't compile a pattern it never ends up using on the non-threaded code path, only to compile it again N times for N threads which will each use their own copy, ignoring the initially compiled pattern. This redundant compilation dates back to the initial introduction of the threaded grep in commit 5b594f457a ("Threaded grep", 2010-01-25). There was never any reason for doing this redundant work other than an oversight in the initial commit. Jeff King suggested on-list in <20170414212325.fefrl3qdjigwyitd@sigill.intra.peff.net> that this might be needed to check the pattern for sanity before threaded execution commences. That's not the case. The pattern is compiled under threading in start_threads() before any concurrent execution has started by calling pthread_create(), so if the pattern contains an error we still do the right thing. I.e. die with one error before any threaded execution has commenced, instead of e.g. spewing out an error for each N threads, which could be a regression a change like this might inadvertently introduce. This change is not meant as an optimization, any performance gains from this are in the hundreds to thousands of nanoseconds at most. If we wanted more performance here we could just re-use the compiled patterns in multiple threads (regcomp(3) is thread-safe), or partially re-use them and the associated structures in the case of later PCRE JIT changes. Rather, it's just to make the code easier to reason about. It's confusing to debug this under threading & non-threading when the threading codepaths redundantly compile a pattern which is never used. The reason the patterns are recompiled is as a side-effect of duplicating the whole grep_opt structure, which is not thread safe, writable, and munged during execution. The grep_opt structure then points to the grep_pat structure where pattern or patterns are stored. I looked into e.g. splitting the API into some "do & alloc threadsafe stuff", "spawn thread", "do and alloc non-threadsafe stuff", but the execution time of grep_opt_dup() & pattern compilation is trivial compared to actually executing the grep, so there was no point. Even with the more expensive JIT changes to follow the most expensive PCRE patterns take something like 0.0X milliseconds to compile at most[1]. The undocumented --debug mode added in commit 17bf35a3c7 ("grep: teach --debug option to dump the parse tree", 2012-09-13) still works properly with this change. It only emits debugging info during pattern compilation, which is now dumped by the pattern compiled just before the first thread is started. 1. http://sljit.sourceforge.net/pcre.html Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- builtin/grep.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index b1095362fb..12e62fcbf3 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -224,7 +224,8 @@ static void start_threads(struct grep_opt *opt) int err; struct grep_opt *o = grep_opt_dup(opt); o->output = strbuf_out; - o->debug = 0; + if (i) + o->debug = 0; compile_grep_patterns(o); err = pthread_create(&threads[i], NULL, run, o); @@ -1167,8 +1168,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix) if (!opt.fixed && opt.ignore_case) opt.regflags |= REG_ICASE; - compile_grep_patterns(&opt); - /* * We have to find "--" in a separate pass, because its presence * influences how we will parse arguments that come before it. @@ -1245,6 +1244,15 @@ int cmd_grep(int argc, const char **argv, const char *prefix) num_threads = 0; #endif + if (!num_threads) + /* + * The compiled patterns on the main path are only + * used when not using threading. Otherwise + * start_threads() below calls compile_grep_patterns() + * for each thread. + */ + compile_grep_patterns(&opt); + #ifndef NO_PTHREADS if (num_threads) { if (!(opt.name_only || opt.unmatch_name_only || opt.count) From 9ec726a4120bb219530faf988198a704ec7dd1f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 25 May 2017 20:05:23 +0000 Subject: [PATCH 2/8] grep: skip pthreads overhead when using one thread MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Skip the administrative overhead of using pthreads when only using one thread. Instead take the non-threaded path which would be taken under NO_PTHREADS. The threading support was initially added in commit 5b594f457a ("Threaded grep", 2010-01-25) with a hardcoded compile-time number of 8 threads. Later the number of threads was made configurable in commit 89f09dd34e ("grep: add --threads= option and grep.threads configuration", 2015-12-15). That change did not add any special handling for --threads=1. Now we take a slightly faster path by skipping thread handling entirely when 1 thread is requested. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- builtin/grep.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/builtin/grep.c b/builtin/grep.c index 12e62fcbf3..bd008cb100 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1238,6 +1238,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix) num_threads = GREP_NUM_THREADS_DEFAULT; else if (num_threads < 0) die(_("invalid number of threads specified (%d)"), num_threads); + if (num_threads == 1) + num_threads = 0; #else if (num_threads) warning(_("no threads support, ignoring --threads")); From 7531a2dd876994479484968d361e18b70b2c764e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 25 May 2017 20:05:24 +0000 Subject: [PATCH 3/8] log: add -P as a synonym for --perl-regexp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a short -P option as a synonym for the longer --perl-regexp, for consistency with the options the corresponding grep invocations accept. This was intentionally omitted in commit 727b6fc3ed ("log --grep: accept --basic-regexp and --perl-regexp", 2012-10-03) for unspecified future use. Make it consistent with "grep" rather than to keep it open for future use, and to avoid the confusion of -P meaning different things for grep & log, as is the case with the -G option. As noted in the aforementioned commit the --basic-regexp option can't have a corresponding -G argument, as the log command already uses that for -G. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- Documentation/rev-list-options.txt | 1 + revision.c | 2 +- t/t4202-log.sh | 12 ++++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index a46f70c2b1..9c44eae55d 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -91,6 +91,7 @@ endif::git-rev-list[] Consider the limiting patterns to be fixed strings (don't interpret pattern as a regular expression). +-P:: --perl-regexp:: Consider the limiting patterns to be Perl-compatible regular expressions. diff --git a/revision.c b/revision.c index c96265d89d..dea4d841b5 100644 --- a/revision.c +++ b/revision.c @@ -1996,7 +1996,7 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg DIFF_OPT_SET(&revs->diffopt, PICKAXE_IGNORE_CASE); } else if (!strcmp(arg, "--fixed-strings") || !strcmp(arg, "-F")) { revs->grep_filter.pattern_type_option = GREP_PATTERN_TYPE_FIXED; - } else if (!strcmp(arg, "--perl-regexp")) { + } else if (!strcmp(arg, "--perl-regexp") || !strcmp(arg, "-P")) { revs->grep_filter.pattern_type_option = GREP_PATTERN_TYPE_PCRE; } else if (!strcmp(arg, "--all-match")) { revs->grep_filter.all_match = 1; diff --git a/t/t4202-log.sh b/t/t4202-log.sh index dbed3efeee..2b07d1c0c2 100755 --- a/t/t4202-log.sh +++ b/t/t4202-log.sh @@ -404,8 +404,20 @@ test_expect_success 'log with various grep.patternType configurations & command- --grep="(1|2)" >actual.fixed.short-arg && git log --pretty=tformat:%s -E \ --grep="\|2" >actual.extended.short-arg && + if test_have_prereq PCRE + then + git log --pretty=tformat:%s -P \ + --grep="[\d]\|" >actual.perl.short-arg + else + test_must_fail git log -P \ + --grep="[\d]\|" + fi && test_cmp expect.fixed actual.fixed.short-arg && test_cmp expect.extended actual.extended.short-arg && + if test_have_prereq PCRE + then + test_cmp expect.perl actual.perl.short-arg + fi && git log --pretty=tformat:%s --fixed-strings \ --grep="(1|2)" >actual.fixed.long-arg && From fbaceaac4731655540e48845eb3a3e2ce2282b38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 25 May 2017 20:05:25 +0000 Subject: [PATCH 4/8] grep: add support for the PCRE v1 JIT API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the grep PCRE v1 code to use JIT when available. When PCRE support was initially added in commit 63e7e9d8b6 ("git-grep: Learn PCRE", 2011-05-09) PCRE had no JIT support, it was integrated into 8.20 released on 2011-10-21. Enabling JIT support usually improves performance by more than 40%. The pattern compilation times are relatively slower, but those relative numbers are tiny, and are easily made back in all but the most trivial cases of grep. Detailed benchmarks & overview of compilation times is at: http://sljit.sourceforge.net/pcre.html With this change the difference in a t/perf/p7820-grep-engines.sh run is, with just the /perl/ tests shown: $ GIT_PERF_REPEAT_COUNT=30 GIT_PERF_LARGE_REPO=~/g/linux GIT_PERF_MAKE_OPTS='-j8 USE_LIBPCRE=YesPlease CC=~/perl5/installed/bin/gcc NO_R_TO_GCC_LINKER=YesPlease CFLAGS=-O3 LIBPCREDIR=/home/avar/g/pcre/inst LDFLAGS=-Wl,-rpath,/home/avar/g/pcre/inst/lib' ./run HEAD~ HEAD p7820-grep-engines.sh Test HEAD~ HEAD --------------------------------------------------------------------------------------- 7820.3: perl grep 'how.to' 0.35(1.11+0.43) 0.23(0.42+0.46) -34.3% 7820.7: perl grep '^how to' 0.64(2.71+0.36) 0.27(0.66+0.44) -57.8% 7820.11: perl grep '[how] to' 0.63(2.51+0.42) 0.33(0.98+0.39) -47.6% 7820.15: perl grep '(e.t[^ ]*|v.ry) rare' 1.17(5.61+0.35) 0.34(1.08+0.46) -70.9% 7820.19: perl grep 'm(ú|u)lt.b(æ|y)te' 0.43(1.52+0.44) 0.30(0.88+0.42) -30.2% The conditional support for JIT is implemented as suggested in the pcrejit(3) man page. E.g. defining PCRE_STUDY_JIT_COMPILE to 0 if it's not present. The implementation is relatively verbose because even if PCRE_CONFIG_JIT is defined only a call to pcre_config() can determine if the JIT is available, and if so the faster pcre_jit_exec() function should be called instead of pcre_exec(), and a different (but not complimentary!) function needs to be called to free pcre1_extra_info. There's no graceful fallback if pcre_jit_stack_alloc() fails under PCRE_CONFIG_JIT, instead the program will simply abort. I don't think this is worth handling gracefully, it'll only fail in cases where malloc() doesn't work, in which case we're screwed anyway. That there's no assignment of `p->pcre1_jit_on = 0` when PCRE_CONFIG_JIT isn't defined isn't a bug. The create_grep_pat() function allocates the grep_pat allocates it with calloc(), so it's guaranteed to be 0 when PCRE_CONFIG_JIT isn't defined. I you're bisecting and find this change, check that your PCRE isn't older than 8.32. This change intentionally broke really old versions of PCRE, but that's fixed in follow-up commits. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 40 ++++++++++++++++++++++++++++++++++++---- grep.h | 6 ++++++ 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/grep.c b/grep.c index d03d424e5c..ffe95995ee 100644 --- a/grep.c +++ b/grep.c @@ -365,9 +365,22 @@ static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt) if (!p->pcre1_regexp) compile_regexp_failed(p, error); - p->pcre1_extra_info = pcre_study(p->pcre1_regexp, 0, &error); + p->pcre1_extra_info = pcre_study(p->pcre1_regexp, PCRE_STUDY_JIT_COMPILE, &error); if (!p->pcre1_extra_info && error) die("%s", error); + +#ifdef PCRE_CONFIG_JIT + pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on); + if (p->pcre1_jit_on == 1) { + p->pcre1_jit_stack = pcre_jit_stack_alloc(1, 1024 * 1024); + if (!p->pcre1_jit_stack) + die("Couldn't allocate PCRE JIT stack"); + pcre_assign_jit_stack(p->pcre1_extra_info, NULL, p->pcre1_jit_stack); + } else if (p->pcre1_jit_on != 0) { + die("BUG: The pcre1_jit_on variable should be 0 or 1, not %d", + p->pcre1_jit_on); + } +#endif } static int pcre1match(struct grep_pat *p, const char *line, const char *eol, @@ -378,8 +391,19 @@ static int pcre1match(struct grep_pat *p, const char *line, const char *eol, if (eflags & REG_NOTBOL) flags |= PCRE_NOTBOL; - ret = pcre_exec(p->pcre1_regexp, p->pcre1_extra_info, line, eol - line, - 0, flags, ovector, ARRAY_SIZE(ovector)); +#ifdef PCRE_CONFIG_JIT + if (p->pcre1_jit_on) { + ret = pcre_jit_exec(p->pcre1_regexp, p->pcre1_extra_info, line, + eol - line, 0, flags, ovector, + ARRAY_SIZE(ovector), p->pcre1_jit_stack); + } else +#endif + { + ret = pcre_exec(p->pcre1_regexp, p->pcre1_extra_info, line, + eol - line, 0, flags, ovector, + ARRAY_SIZE(ovector)); + } + if (ret < 0 && ret != PCRE_ERROR_NOMATCH) die("pcre_exec failed with error code %d", ret); if (ret > 0) { @@ -394,7 +418,15 @@ static int pcre1match(struct grep_pat *p, const char *line, const char *eol, static void free_pcre1_regexp(struct grep_pat *p) { pcre_free(p->pcre1_regexp); - pcre_free(p->pcre1_extra_info); +#ifdef PCRE_CONFIG_JIT + if (p->pcre1_jit_on) { + pcre_free_study(p->pcre1_extra_info); + pcre_jit_stack_free(p->pcre1_jit_stack); + } else +#endif + { + pcre_free(p->pcre1_extra_info); + } pcre_free((void *)p->pcre1_tables); } #else /* !USE_LIBPCRE1 */ diff --git a/grep.h b/grep.h index 38ac82b638..14f47189f9 100644 --- a/grep.h +++ b/grep.h @@ -3,9 +3,13 @@ #include "color.h" #ifdef USE_LIBPCRE1 #include +#ifndef PCRE_STUDY_JIT_COMPILE +#define PCRE_STUDY_JIT_COMPILE 0 +#endif #else typedef int pcre; typedef int pcre_extra; +typedef int pcre_jit_stack; #endif #include "kwset.h" #include "thread-utils.h" @@ -48,7 +52,9 @@ struct grep_pat { regex_t regexp; pcre *pcre1_regexp; pcre_extra *pcre1_extra_info; + pcre_jit_stack *pcre1_jit_stack; const unsigned char *pcre1_tables; + int pcre1_jit_on; kwset_t kws; unsigned fixed:1; unsigned ignore_case:1; From e87de7cab4f4e3a8ebf4cc588d26636c3d828e4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 25 May 2017 20:05:26 +0000 Subject: [PATCH 5/8] grep: un-break building with PCRE < 8.32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Amend my change earlier in this series ("grep: add support for the PCRE v1 JIT API", 2017-04-11) to un-break the build on PCRE v1 versions earlier than 8.32. The JIT support was added in version 8.20 released on 2011-10-21, but it wasn't until 8.32 released on 2012-11-30 that the fast code path to use the JIT via pcre_jit_exec() was added[1] (see also [2]). This means that versions 8.20 through 8.31 could still use the JIT, but supporting it on those versions would add to the already verbose macro soup around JIT support it, and I don't expect that the use-case of compiling a brand new git against a 5 year old PCRE is particularly common, and if someone does that they can just get the existing pre-JIT slow codepath. So just take the easy way out and disable the JIT on any version older than 8.32. The reason this change isn't part of the initial change PCRE JIT support is to have a cleaner history showing which parts of the implementation are only used for ancient PCRE versions. This also makes it easier to revert this change if we ever decide to stop supporting those old versions. 1. http://www.pcre.org/original/changelog.txt ("28. Introducing a native interface for JIT. Through this interface, the compiled[...]") 2. https://bugs.exim.org/show_bug.cgi?id=2121 Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 6 +++--- grep.h | 5 +++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/grep.c b/grep.c index ffe95995ee..19fa67c34c 100644 --- a/grep.c +++ b/grep.c @@ -369,7 +369,7 @@ static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt) if (!p->pcre1_extra_info && error) die("%s", error); -#ifdef PCRE_CONFIG_JIT +#ifdef GIT_PCRE1_USE_JIT pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on); if (p->pcre1_jit_on == 1) { p->pcre1_jit_stack = pcre_jit_stack_alloc(1, 1024 * 1024); @@ -391,7 +391,7 @@ static int pcre1match(struct grep_pat *p, const char *line, const char *eol, if (eflags & REG_NOTBOL) flags |= PCRE_NOTBOL; -#ifdef PCRE_CONFIG_JIT +#ifdef GIT_PCRE1_USE_JIT if (p->pcre1_jit_on) { ret = pcre_jit_exec(p->pcre1_regexp, p->pcre1_extra_info, line, eol - line, 0, flags, ovector, @@ -418,7 +418,7 @@ static int pcre1match(struct grep_pat *p, const char *line, const char *eol, static void free_pcre1_regexp(struct grep_pat *p) { pcre_free(p->pcre1_regexp); -#ifdef PCRE_CONFIG_JIT +#ifdef GIT_PCRE1_USE_JIT if (p->pcre1_jit_on) { pcre_free_study(p->pcre1_extra_info); pcre_jit_stack_free(p->pcre1_jit_stack); diff --git a/grep.h b/grep.h index 14f47189f9..3b948d9155 100644 --- a/grep.h +++ b/grep.h @@ -3,6 +3,11 @@ #include "color.h" #ifdef USE_LIBPCRE1 #include +#ifdef PCRE_CONFIG_JIT +#if PCRE_MAJOR >= 8 && PCRE_MINOR >= 32 +#define GIT_PCRE1_USE_JIT +#endif +#endif #ifndef PCRE_STUDY_JIT_COMPILE #define PCRE_STUDY_JIT_COMPILE 0 #endif From c30cf827a8a3d9e85b2459418599ae874b3e4787 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 25 May 2017 20:05:27 +0000 Subject: [PATCH 6/8] grep: un-break building with PCRE < 8.20 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Amend my change earlier in this series ("grep: add support for the PCRE v1 JIT API", 2017-04-11) to un-break the build on PCRE v1 versions earlier than 8.20. The 8.20 release was the first release to have JIT & pcre_jit_stack in the headers, so a mock type needs to be provided for it on those releases. Now git should compile with all PCRE versions that it supported before my JIT change. I've tested it as far back as version 7.5 released on 2008-01-10, once I got down to version 7.0 it wouldn't build anymore with GCC 7.1.1, and I couldn't be bothered to anything older than 7.5 as I'm confident that if the build breaks on those older versions it's not because of my JIT change. See the "un-break" change in this series ("grep: un-break building with PCRE < 8.32", 2017-05-10) for why this isn't squashed into the main PCRE JIT commit. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/grep.h b/grep.h index 3b948d9155..ce90969736 100644 --- a/grep.h +++ b/grep.h @@ -11,6 +11,9 @@ #ifndef PCRE_STUDY_JIT_COMPILE #define PCRE_STUDY_JIT_COMPILE 0 #endif +#if PCRE_MAJOR <= 8 && PCRE_MINOR < 20 +typedef int pcre_jit_stack; +#endif #else typedef int pcre; typedef int pcre_extra; From fb95e2e38dd4e8301b3992bacfac4c87b8262181 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 1 Jun 2017 18:20:55 +0000 Subject: [PATCH 7/8] grep: un-break building with PCRE >= 8.32 without --enable-jit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Amend my change earlier in this series ("grep: add support for the PCRE v1 JIT API", 2017-04-11) to un-break the build on PCRE v1 versions later than 8.31 compiled without --enable-jit. As explained in that change and a later compatibility change in this series ("grep: un-break building with PCRE < 8.32", 2017-05-10) the pcre_jit_exec() function is a faster path to execute the JIT. Unfortunately there's no compatibility stub for that function compiled into the library if pcre_config(PCRE_CONFIG_JIT, &ret) would return 0, and no macro that can be used to check for it, so the only portable option to support builds without --enable-jit is via a new NO_LIBPCRE1_JIT=UnfortunatelyYes Makefile option[1]. Another option would be to make the JIT opt-in via USE_LIBPCRE1_JIT=YesPlease, after all it's not a default option of PCRE v1. I think it makes more sense to make it opt-out since even though it's not a default option, most packagers of PCRE seem to turn it on by default, with the notable exception of the MinGW package. Make the MinGW platform work by default by changing the build defaults to turn on NO_LIBPCRE1_JIT=UnfortunatelyYes. It is the only platform that turns on USE_LIBPCRE=YesPlease by default, see commit df5218b4c3 ("config.mak.uname: support MSys2", 2016-01-13) for that change. 1. "How do I support pcre1 JIT on all versions?" (https://lists.exim.org/lurker/thread/20170601.103148.10253788.en.html) 2. https://github.com/Alexpux/MINGW-packages/blob/master/mingw-w64-pcre/PKGBUILD (referenced from "Re: PCRE v2 compile error, was Re: What's cooking in git.git (May 2017, #01; Mon, 1)"; ) Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- Makefile | 13 +++++++++++++ config.mak.uname | 1 + grep.h | 2 ++ 3 files changed, 16 insertions(+) diff --git a/Makefile b/Makefile index a79274e5e6..502c07246f 100644 --- a/Makefile +++ b/Makefile @@ -29,6 +29,14 @@ all:: # Perl-compatible regular expressions instead of standard or extended # POSIX regular expressions. # +# When using USE_LIBPCRE1, define NO_LIBPCRE1_JIT if the PCRE v1 +# library is compiled without --enable-jit. We will auto-detect +# whether the version of the PCRE v1 library in use has JIT support at +# all, but we unfortunately can't auto-detect whether JIT support +# hasn't been compiled in in an otherwise JIT-supporting version. If +# you have link-time errors about a missing `pcre_jit_exec` define +# this, or recompile PCRE v1 with --enable-jit. +# # Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in # /foo/bar/include and /foo/bar/lib directories. # @@ -1094,6 +1102,10 @@ ifdef USE_LIBPCRE EXTLIBS += -L$(LIBPCREDIR)/$(lib) $(CC_LD_DYNPATH)$(LIBPCREDIR)/$(lib) endif EXTLIBS += -lpcre + +ifdef NO_LIBPCRE1_JIT + BASIC_CFLAGS += -DNO_LIBPCRE1_JIT +endif endif ifdef HAVE_ALLOCA_H @@ -2241,6 +2253,7 @@ GIT-BUILD-OPTIONS: FORCE @echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@+ @echo NO_EXPAT=\''$(subst ','\'',$(subst ','\'',$(NO_EXPAT)))'\' >>$@+ @echo USE_LIBPCRE1=\''$(subst ','\'',$(subst ','\'',$(USE_LIBPCRE)))'\' >>$@+ + @echo NO_LIBPCRE1_JIT=\''$(subst ','\'',$(subst ','\'',$(NO_LIBPCRE1_JIT)))'\' >>$@+ @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@+ @echo NO_PTHREADS=\''$(subst ','\'',$(subst ','\'',$(NO_PTHREADS)))'\' >>$@+ @echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@+ diff --git a/config.mak.uname b/config.mak.uname index 399fe19271..2a577794ba 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -550,6 +550,7 @@ else NO_GETTEXT = USE_GETTEXT_SCHEME = fallthrough USE_LIBPCRE= YesPlease + NO_LIBPCRE1_JIT = UnfortunatelyYes NO_CURL = USE_NED_ALLOCATOR = YesPlease else diff --git a/grep.h b/grep.h index ce90969736..3bff0870b2 100644 --- a/grep.h +++ b/grep.h @@ -5,9 +5,11 @@ #include #ifdef PCRE_CONFIG_JIT #if PCRE_MAJOR >= 8 && PCRE_MINOR >= 32 +#ifndef NO_LIBPCRE1_JIT #define GIT_PCRE1_USE_JIT #endif #endif +#endif #ifndef PCRE_STUDY_JIT_COMPILE #define PCRE_STUDY_JIT_COMPILE 0 #endif From 94da9193a6eb8f1085d611c04ff8bbb4f5ae1e0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 1 Jun 2017 18:20:56 +0000 Subject: [PATCH 8/8] grep: add support for PCRE v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for v2 of the PCRE API. This is a new major version of PCRE that came out in early 2015[1]. The regular expression syntax is the same, but while the API is similar, pretty much every function is either renamed or takes different arguments. Thus using it via entirely new functions makes sense, as opposed to trying to e.g. have one compile_pcre_pattern() that would call either PCRE v1 or v2 functions. Git can now be compiled with either USE_LIBPCRE1=YesPlease or USE_LIBPCRE2=YesPlease, with USE_LIBPCRE=YesPlease currently being a synonym for the former. Providing both is a compile-time error. With earlier patches to enable JIT for PCRE v1 the performance of the release versions of both libraries is almost exactly the same, with PCRE v2 being around 1% slower. However after I reported this to the pcre-dev mailing list[2] I got a lot of help with the API use from Zoltán Herczeg, he subsequently optimized some of the JIT functionality in v2 of the library. Running the p7820-grep-engines.sh performance test against the latest Subversion trunk of both, with both them and git compiled as -O3, and the test run against linux.git, gives the following results. Just the /perl/ tests shown: $ GIT_PERF_REPEAT_COUNT=30 GIT_PERF_LARGE_REPO=~/g/linux GIT_PERF_MAKE_COMMAND='grep -q LIBPCRE2 Makefile && make -j8 USE_LIBPCRE2=YesPlease CC=~/perl5/installed/bin/gcc NO_R_TO_GCC_LINKER=YesPlease CFLAGS=-O3 LIBPCREDIR=/home/avar/g/pcre2/inst LDFLAGS=-Wl,-rpath,/home/avar/g/pcre2/inst/lib || make -j8 USE_LIBPCRE=YesPlease CC=~/perl5/installed/bin/gcc NO_R_TO_GCC_LINKER=YesPlease CFLAGS=-O3 LIBPCREDIR=/home/avar/g/pcre/inst LDFLAGS=-Wl,-rpath,/home/avar/g/pcre/inst/lib' ./run HEAD~5 HEAD~ HEAD p7820-grep-engines.sh [...] Test HEAD~5 HEAD~ HEAD ----------------------------------------------------------------------------------------------------------------- 7820.3: perl grep 'how.to' 0.31(1.10+0.48) 0.21(0.35+0.56) -32.3% 0.21(0.34+0.55) -32.3% 7820.7: perl grep '^how to' 0.56(2.70+0.40) 0.24(0.64+0.52) -57.1% 0.20(0.28+0.60) -64.3% 7820.11: perl grep '[how] to' 0.56(2.66+0.38) 0.29(0.95+0.45) -48.2% 0.23(0.45+0.54) -58.9% 7820.15: perl grep '(e.t[^ ]*|v.ry) rare' 1.02(5.77+0.42) 0.31(1.02+0.54) -69.6% 0.23(0.50+0.54) -77.5% 7820.19: perl grep 'm(ú|u)lt.b(æ|y)te' 0.38(1.57+0.42) 0.27(0.85+0.46) -28.9% 0.21(0.33+0.57) -44.7% See commit ("perf: add a comparison test of grep regex engines", 2017-04-19) for details on the machine the above test run was executed on. Here HEAD~2 is git with PCRE v1 without JIT, HEAD~ is PCRE v1 with JIT, and HEAD is PCRE v2 (also with JIT). See previous commits of mine mentioning p7820-grep-engines.sh for more details on the test setup. For ease of readability, a different run just of HEAD~ (PCRE v1 with JIT v.s. PCRE v2), again with just the /perl/ tests shown: [...] Test HEAD~ HEAD ---------------------------------------------------------------------------------------- 7820.3: perl grep 'how.to' 0.21(0.42+0.52) 0.21(0.31+0.58) +0.0% 7820.7: perl grep '^how to' 0.25(0.65+0.50) 0.20(0.31+0.57) -20.0% 7820.11: perl grep '[how] to' 0.30(0.90+0.50) 0.23(0.46+0.53) -23.3% 7820.15: perl grep '(e.t[^ ]*|v.ry) rare' 0.30(1.19+0.38) 0.23(0.51+0.51) -23.3% 7820.19: perl grep 'm(ú|u)lt.b(æ|y)te' 0.27(0.84+0.48) 0.21(0.34+0.57) -22.2% I.e. the two are either neck-to-neck, but PCRE v2 usually pulls ahead, when it does it's around 20% faster. A brief note on thread safety: As noted in pcre2api(3) & pcre2jit(3) the compiled pattern can be shared between threads, but not some of the JIT context, however the grep threading support does all pattern & JIT compilation in separate threads, so this code doesn't need to concern itself with thread safety. See commit 63e7e9d8b6 ("git-grep: Learn PCRE", 2011-05-09) for the initial addition of PCRE v1. This change follows some of the same patterns it did (and which were discussed on list at the time), e.g. mocking up types with typedef instead of ifdef-ing them out when USE_LIBPCRE2 isn't defined. This adds some trivial memory use to the program, but makes the code look nicer. 1. https://lists.exim.org/lurker/message/20150105.162835.0666407a.en.html 2. https://lists.exim.org/lurker/thread/20170419.172322.833ee099.en.html Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- Makefile | 36 ++++++++++--- configure.ac | 79 ++++++++++++++++++++++----- grep.c | 145 ++++++++++++++++++++++++++++++++++++++++++++++++++ grep.h | 17 ++++++ t/test-lib.sh | 2 +- 5 files changed, 257 insertions(+), 22 deletions(-) diff --git a/Makefile b/Makefile index 502c07246f..96ebbed87e 100644 --- a/Makefile +++ b/Makefile @@ -29,6 +29,11 @@ all:: # Perl-compatible regular expressions instead of standard or extended # POSIX regular expressions. # +# Currently USE_LIBPCRE is a synonym for USE_LIBPCRE1, define +# USE_LIBPCRE2 instead if you'd like to use version 2 of the PCRE +# library. The USE_LIBPCRE flag will likely be changed to mean v2 by +# default in future releases. +# # When using USE_LIBPCRE1, define NO_LIBPCRE1_JIT if the PCRE v1 # library is compiled without --enable-jit. We will auto-detect # whether the version of the PCRE v1 library in use has JIT support at @@ -37,8 +42,10 @@ all:: # you have link-time errors about a missing `pcre_jit_exec` define # this, or recompile PCRE v1 with --enable-jit. # -# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in -# /foo/bar/include and /foo/bar/lib directories. +# Define LIBPCREDIR=/foo/bar if your PCRE header and library files are +# in /foo/bar/include and /foo/bar/lib directories. Which version of +# PCRE this points to determined by the USE_LIBPCRE1 and USE_LIBPCRE2 +# variables. # # Define HAVE_ALLOCA_H if you have working alloca(3) defined in that header. # @@ -1095,12 +1102,14 @@ ifdef NO_LIBGEN_H COMPAT_OBJS += compat/basename.o endif -ifdef USE_LIBPCRE - BASIC_CFLAGS += -DUSE_LIBPCRE1 - ifdef LIBPCREDIR - BASIC_CFLAGS += -I$(LIBPCREDIR)/include - EXTLIBS += -L$(LIBPCREDIR)/$(lib) $(CC_LD_DYNPATH)$(LIBPCREDIR)/$(lib) +USE_LIBPCRE1 ?= $(USE_LIBPCRE) + +ifneq (,$(USE_LIBPCRE1)) + ifdef USE_LIBPCRE2 +$(error Only set USE_LIBPCRE1 (or its alias USE_LIBPCRE) or USE_LIBPCRE2, not both!) endif + + BASIC_CFLAGS += -DUSE_LIBPCRE1 EXTLIBS += -lpcre ifdef NO_LIBPCRE1_JIT @@ -1108,6 +1117,16 @@ ifdef NO_LIBPCRE1_JIT endif endif +ifdef USE_LIBPCRE2 + BASIC_CFLAGS += -DUSE_LIBPCRE2 + EXTLIBS += -lpcre2-8 +endif + +ifdef LIBPCREDIR + BASIC_CFLAGS += -I$(LIBPCREDIR)/include + EXTLIBS += -L$(LIBPCREDIR)/$(lib) $(CC_LD_DYNPATH)$(LIBPCREDIR)/$(lib) +endif + ifdef HAVE_ALLOCA_H BASIC_CFLAGS += -DHAVE_ALLOCA_H endif @@ -2252,7 +2271,8 @@ GIT-BUILD-OPTIONS: FORCE @echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@+ @echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@+ @echo NO_EXPAT=\''$(subst ','\'',$(subst ','\'',$(NO_EXPAT)))'\' >>$@+ - @echo USE_LIBPCRE1=\''$(subst ','\'',$(subst ','\'',$(USE_LIBPCRE)))'\' >>$@+ + @echo USE_LIBPCRE1=\''$(subst ','\'',$(subst ','\'',$(USE_LIBPCRE1)))'\' >>$@+ + @echo USE_LIBPCRE2=\''$(subst ','\'',$(subst ','\'',$(USE_LIBPCRE2)))'\' >>$@+ @echo NO_LIBPCRE1_JIT=\''$(subst ','\'',$(subst ','\'',$(NO_LIBPCRE1_JIT)))'\' >>$@+ @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@+ @echo NO_PTHREADS=\''$(subst ','\'',$(subst ','\'',$(NO_PTHREADS)))'\' >>$@+ diff --git a/configure.ac b/configure.ac index deeb968daa..11d083fbe0 100644 --- a/configure.ac +++ b/configure.ac @@ -255,21 +255,61 @@ GIT_PARSE_WITH([openssl])) # Perl-compatible regular expressions instead of standard or extended # POSIX regular expressions. # -# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in +# Currently USE_LIBPCRE is a synonym for USE_LIBPCRE1, define +# USE_LIBPCRE2 instead if you'd like to use version 2 of the PCRE +# library. The USE_LIBPCRE flag will likely be changed to mean v2 by +# default in future releases. +# +# Define LIBPCREDIR=/foo/bar if your PCRE header and library files are in # /foo/bar/include and /foo/bar/lib directories. # AC_ARG_WITH(libpcre, -AS_HELP_STRING([--with-libpcre],[support Perl-compatible regexes (default is NO)]) -AS_HELP_STRING([], [ARG can be also prefix for libpcre library and headers]), +AS_HELP_STRING([--with-libpcre],[synonym for --with-libpcre1]), if test "$withval" = "no"; then - USE_LIBPCRE= + USE_LIBPCRE1= elif test "$withval" = "yes"; then - USE_LIBPCRE=YesPlease + USE_LIBPCRE1=YesPlease else - USE_LIBPCRE=YesPlease + USE_LIBPCRE1=YesPlease LIBPCREDIR=$withval AC_MSG_NOTICE([Setting LIBPCREDIR to $LIBPCREDIR]) - dnl USE_LIBPCRE can still be modified below, so don't substitute + dnl USE_LIBPCRE1 can still be modified below, so don't substitute + dnl it yet. + GIT_CONF_SUBST([LIBPCREDIR]) + fi) + +AC_ARG_WITH(libpcre1, +AS_HELP_STRING([--with-libpcre1],[support Perl-compatible regexes via libpcre1 (default is NO)]) +AS_HELP_STRING([], [ARG can be also prefix for libpcre library and headers]), + if test "$withval" = "no"; then + USE_LIBPCRE1= + elif test "$withval" = "yes"; then + USE_LIBPCRE1=YesPlease + else + USE_LIBPCRE1=YesPlease + LIBPCREDIR=$withval + AC_MSG_NOTICE([Setting LIBPCREDIR to $LIBPCREDIR]) + dnl USE_LIBPCRE1 can still be modified below, so don't substitute + dnl it yet. + GIT_CONF_SUBST([LIBPCREDIR]) + fi) + +AC_ARG_WITH(libpcre2, +AS_HELP_STRING([--with-libpcre2],[support Perl-compatible regexes via libpcre2 (default is NO)]) +AS_HELP_STRING([], [ARG can be also prefix for libpcre library and headers]), + if test -n "$USE_LIBPCRE1"; then + AC_MSG_ERROR([Only supply one of --with-libpcre1 or --with-libpcre2!]) + fi + + if test "$withval" = "no"; then + USE_LIBPCRE2= + elif test "$withval" = "yes"; then + USE_LIBPCRE2=YesPlease + else + USE_LIBPCRE2=YesPlease + LIBPCREDIR=$withval + AC_MSG_NOTICE([Setting LIBPCREDIR to $LIBPCREDIR]) + dnl USE_LIBPCRE2 can still be modified below, so don't substitute dnl it yet. GIT_CONF_SUBST([LIBPCREDIR]) fi) @@ -501,13 +541,11 @@ GIT_CONF_SUBST([NEEDS_SSL_WITH_CRYPTO]) GIT_CONF_SUBST([NO_OPENSSL]) # -# Define USE_LIBPCRE if you have and want to use libpcre. Various -# commands such as log and grep offer runtime options to use -# Perl-compatible regular expressions instead of standard or extended -# POSIX regular expressions. +# Handle the USE_LIBPCRE1 and USE_LIBPCRE2 options potentially set +# above. # -if test -n "$USE_LIBPCRE"; then +if test -n "$USE_LIBPCRE1"; then GIT_STASH_FLAGS($LIBPCREDIR) @@ -517,7 +555,22 @@ AC_CHECK_LIB([pcre], [pcre_version], GIT_UNSTASH_FLAGS($LIBPCREDIR) -GIT_CONF_SUBST([USE_LIBPCRE]) +GIT_CONF_SUBST([USE_LIBPCRE1]) + +fi + + +if test -n "$USE_LIBPCRE2"; then + +GIT_STASH_FLAGS($LIBPCREDIR) + +AC_CHECK_LIB([pcre2-8], [pcre2_config_8], +[USE_LIBPCRE2=YesPlease], +[USE_LIBPCRE2=]) + +GIT_UNSTASH_FLAGS($LIBPCREDIR) + +GIT_CONF_SUBST([USE_LIBPCRE2]) fi diff --git a/grep.c b/grep.c index 19fa67c34c..d0bf37858a 100644 --- a/grep.c +++ b/grep.c @@ -179,22 +179,37 @@ static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, st case GREP_PATTERN_TYPE_BRE: opt->fixed = 0; opt->pcre1 = 0; + opt->pcre2 = 0; break; case GREP_PATTERN_TYPE_ERE: opt->fixed = 0; opt->pcre1 = 0; + opt->pcre2 = 0; opt->regflags |= REG_EXTENDED; break; case GREP_PATTERN_TYPE_FIXED: opt->fixed = 1; opt->pcre1 = 0; + opt->pcre2 = 0; break; case GREP_PATTERN_TYPE_PCRE: opt->fixed = 0; +#ifdef USE_LIBPCRE2 + opt->pcre1 = 0; + opt->pcre2 = 1; +#else + /* + * It's important that pcre1 always be assigned to + * even when there's no USE_LIBPCRE* defined. We still + * call the PCRE stub function, it just dies with + * "cannot use Perl-compatible regexes[...]". + */ opt->pcre1 = 1; + opt->pcre2 = 0; +#endif break; } } @@ -446,6 +461,127 @@ static void free_pcre1_regexp(struct grep_pat *p) } #endif /* !USE_LIBPCRE1 */ +#ifdef USE_LIBPCRE2 +static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt) +{ + int error; + PCRE2_UCHAR errbuf[256]; + PCRE2_SIZE erroffset; + int options = PCRE2_MULTILINE; + const uint8_t *character_tables = NULL; + int jitret; + + assert(opt->pcre2); + + p->pcre2_compile_context = NULL; + + if (opt->ignore_case) { + if (has_non_ascii(p->pattern)) { + character_tables = pcre2_maketables(NULL); + p->pcre2_compile_context = pcre2_compile_context_create(NULL); + pcre2_set_character_tables(p->pcre2_compile_context, character_tables); + } + options |= PCRE2_CASELESS; + } + if (is_utf8_locale() && has_non_ascii(p->pattern)) + options |= PCRE2_UTF; + + p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern, + p->patternlen, options, &error, &erroffset, + p->pcre2_compile_context); + + if (p->pcre2_pattern) { + p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, NULL); + if (!p->pcre2_match_data) + die("Couldn't allocate PCRE2 match data"); + } else { + pcre2_get_error_message(error, errbuf, sizeof(errbuf)); + compile_regexp_failed(p, (const char *)&errbuf); + } + + pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on); + if (p->pcre2_jit_on == 1) { + jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE); + if (jitret) + die("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n", p->pattern, jitret); + p->pcre2_jit_stack = pcre2_jit_stack_create(1, 1024 * 1024, NULL); + if (!p->pcre2_jit_stack) + die("Couldn't allocate PCRE2 JIT stack"); + p->pcre2_match_context = pcre2_match_context_create(NULL); + if (!p->pcre2_jit_stack) + die("Couldn't allocate PCRE2 match context"); + pcre2_jit_stack_assign(p->pcre2_match_context, NULL, p->pcre2_jit_stack); + } else if (p->pcre2_jit_on != 0) { + die("BUG: The pcre2_jit_on variable should be 0 or 1, not %d", + p->pcre1_jit_on); + } +} + +static int pcre2match(struct grep_pat *p, const char *line, const char *eol, + regmatch_t *match, int eflags) +{ + int ret, flags = 0; + PCRE2_SIZE *ovector; + PCRE2_UCHAR errbuf[256]; + + if (eflags & REG_NOTBOL) + flags |= PCRE2_NOTBOL; + + if (p->pcre2_jit_on) + ret = pcre2_jit_match(p->pcre2_pattern, (unsigned char *)line, + eol - line, 0, flags, p->pcre2_match_data, + NULL); + else + ret = pcre2_match(p->pcre2_pattern, (unsigned char *)line, + eol - line, 0, flags, p->pcre2_match_data, + NULL); + + if (ret < 0 && ret != PCRE2_ERROR_NOMATCH) { + pcre2_get_error_message(ret, errbuf, sizeof(errbuf)); + die("%s failed with error code %d: %s", + (p->pcre2_jit_on ? "pcre2_jit_match" : "pcre2_match"), ret, + errbuf); + } + if (ret > 0) { + ovector = pcre2_get_ovector_pointer(p->pcre2_match_data); + ret = 0; + match->rm_so = (int)ovector[0]; + match->rm_eo = (int)ovector[1]; + } + + return ret; +} + +static void free_pcre2_pattern(struct grep_pat *p) +{ + pcre2_compile_context_free(p->pcre2_compile_context); + pcre2_code_free(p->pcre2_pattern); + pcre2_match_data_free(p->pcre2_match_data); + pcre2_jit_stack_free(p->pcre2_jit_stack); + pcre2_match_context_free(p->pcre2_match_context); +} +#else /* !USE_LIBPCRE2 */ +static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt) +{ + /* + * Unreachable until USE_LIBPCRE2 becomes synonymous with + * USE_LIBPCRE. See the sibling comment in + * grep_set_pattern_type_option(). + */ + die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE"); +} + +static int pcre2match(struct grep_pat *p, const char *line, const char *eol, + regmatch_t *match, int eflags) +{ + return 1; +} + +static void free_pcre2_pattern(struct grep_pat *p) +{ +} +#endif /* !USE_LIBPCRE2 */ + static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) { struct strbuf sb = STRBUF_INIT; @@ -511,6 +647,11 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) return; } + if (opt->pcre2) { + compile_pcre2_pattern(p, opt); + return; + } + if (opt->pcre1) { compile_pcre1_regexp(p, opt); return; @@ -870,6 +1011,8 @@ void free_grep_patterns(struct grep_opt *opt) kwsfree(p->kws); else if (p->pcre1_regexp) free_pcre1_regexp(p); + else if (p->pcre2_pattern) + free_pcre2_pattern(p); else regfree(&p->regexp); free(p->pattern); @@ -950,6 +1093,8 @@ static int patmatch(struct grep_pat *p, char *line, char *eol, hit = !fixmatch(p, line, eol, match); else if (p->pcre1_regexp) hit = !pcre1match(p, line, eol, match, eflags); + else if (p->pcre2_pattern) + hit = !pcre2match(p, line, eol, match, eflags); else hit = !regexec_buf(&p->regexp, line, eol - line, 1, match, eflags); diff --git a/grep.h b/grep.h index 3bff0870b2..6f3d4e1954 100644 --- a/grep.h +++ b/grep.h @@ -21,6 +21,16 @@ typedef int pcre; typedef int pcre_extra; typedef int pcre_jit_stack; #endif +#ifdef USE_LIBPCRE2 +#define PCRE2_CODE_UNIT_WIDTH 8 +#include +#else +typedef int pcre2_code; +typedef int pcre2_match_data; +typedef int pcre2_compile_context; +typedef int pcre2_match_context; +typedef int pcre2_jit_stack; +#endif #include "kwset.h" #include "thread-utils.h" #include "userdiff.h" @@ -65,6 +75,12 @@ struct grep_pat { pcre_jit_stack *pcre1_jit_stack; const unsigned char *pcre1_tables; int pcre1_jit_on; + pcre2_code *pcre2_pattern; + pcre2_match_data *pcre2_match_data; + pcre2_compile_context *pcre2_compile_context; + pcre2_match_context *pcre2_match_context; + pcre2_jit_stack *pcre2_jit_stack; + uint32_t pcre2_jit_on; kwset_t kws; unsigned fixed:1; unsigned ignore_case:1; @@ -128,6 +144,7 @@ struct grep_opt { int extended; int use_reflog_filter; int pcre1; + int pcre2; int relative; int pathname; int null_following_name; diff --git a/t/test-lib.sh b/t/test-lib.sh index ab92c0ebaa..44d4679384 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1011,7 +1011,7 @@ esac test -z "$NO_PERL" && test_set_prereq PERL test -z "$NO_PTHREADS" && test_set_prereq PTHREADS test -z "$NO_PYTHON" && test_set_prereq PYTHON -test -n "$USE_LIBPCRE1" && test_set_prereq PCRE +test -n "$USE_LIBPCRE1$USE_LIBPCRE2" && test_set_prereq PCRE test -z "$NO_GETTEXT" && test_set_prereq GETTEXT # Can we rely on git's output in the C locale?