From d2355d76150316104b89443065e62a71342c36be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Kiedrowicz?= Date: Thu, 5 May 2011 00:00:17 +0200 Subject: [PATCH 01/14] Documentation: Add --line-number to git-grep synopsis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 7d6cb10b ("grep: Add the option '--line-number'", 2011-03-28) introduced the --line-number option and added its description to OPTIONS section, but forgot to update SYNOPSIS. Signed-off-by: Michał Kiedrowicz Signed-off-by: Junio C Hamano --- Documentation/git-grep.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt index d7523b3e45..4a5837881d 100644 --- a/Documentation/git-grep.txt +++ b/Documentation/git-grep.txt @@ -12,7 +12,7 @@ SYNOPSIS 'git grep' [-a | --text] [-I] [-i | --ignore-case] [-w | --word-regexp] [-v | --invert-match] [-h|-H] [--full-name] [-E | --extended-regexp] [-G | --basic-regexp] - [-F | --fixed-strings] [-n] + [-F | --fixed-strings] [-n | --line-number] [-l | --files-with-matches] [-L | --files-without-match] [(-O | --open-files-in-pager) []] [-z | --null] From 5a69eaf5541b8449ede74f148d395abd0acbf20f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Kiedrowicz?= Date: Thu, 5 May 2011 00:00:18 +0200 Subject: [PATCH 02/14] contrib/completion: --line-number to git grep MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "-n" option of "git grep" gained a synonym "--line-number" with commit 7d6cb10b ("grep: Add the option '--line-number'", 2011-03-28). Teach bash-completion about it. Signed-off-by: Michał Kiedrowicz Signed-off-by: Junio C Hamano --- contrib/completion/git-completion.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index 840ae38760..3dc9cbe9f9 100755 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -1485,7 +1485,7 @@ _git_grep () __gitcomp " --cached --text --ignore-case --word-regexp --invert-match - --full-name + --full-name --line-number --extended-regexp --basic-regexp --fixed-strings --files-with-matches --name-only --files-without-match From 97e777842260a5339bb5272a35ebeaeaae554937 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Kiedrowicz?= Date: Thu, 5 May 2011 00:00:19 +0200 Subject: [PATCH 03/14] grep: Put calls to fixmatch() and regmatch() into patmatch() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both match_one_pattern() and look_ahead() use fixmatch() and regmatch() in the same way. They really want to match a pattern againt a string, but now they need to know if the pattern is fixed or regexp. This change cleans this up by introducing patmatch() (from "pattern match") and also simplifies inserting other ways of matching a string. Signed-off-by: Michał Kiedrowicz Signed-off-by: Junio C Hamano --- grep.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/grep.c b/grep.c index 63c4280cac..d67baf9566 100644 --- a/grep.c +++ b/grep.c @@ -412,6 +412,19 @@ static int regmatch(const regex_t *preg, char *line, char *eol, return regexec(preg, line, 1, match, eflags); } +static int patmatch(struct grep_pat *p, char *line, char *eol, + regmatch_t *match, int eflags) +{ + int hit; + + if (p->fixed) + hit = !fixmatch(p, line, eol, match); + else + hit = !regmatch(&p->regexp, line, eol, match, eflags); + + return hit; +} + static int strip_timestamp(char *bol, char **eol_p) { char *eol = *eol_p; @@ -461,10 +474,7 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol, } again: - if (p->fixed) - hit = !fixmatch(p, bol, eol, pmatch); - else - hit = !regmatch(&p->regexp, bol, eol, pmatch, eflags); + hit = patmatch(p, bol, eol, pmatch, eflags); if (hit && p->word_regexp) { if ((pmatch[0].rm_so < 0) || @@ -791,10 +801,7 @@ static int look_ahead(struct grep_opt *opt, int hit; regmatch_t m; - if (p->fixed) - hit = !fixmatch(p, bol, bol + *left_p, &m); - else - hit = !regmatch(&p->regexp, bol, bol + *left_p, &m, 0); + hit = patmatch(p, bol, bol + *left_p, &m, 0); if (!hit || m.rm_so < 0 || m.rm_eo < 0) continue; if (earliest < 0 || m.rm_so < earliest) From 8997da3820a0f55d156f43f3bb71856580df160d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Kiedrowicz?= Date: Mon, 9 May 2011 23:52:03 +0200 Subject: [PATCH 04/14] grep: Fix a typo in a comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Kiedrowicz Signed-off-by: Junio C Hamano --- grep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grep.c b/grep.c index d67baf9566..250462e0c0 100644 --- a/grep.c +++ b/grep.c @@ -898,7 +898,7 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name, int hit; /* - * look_ahead() skips quicly to the line that possibly + * look_ahead() skips quickly to the line that possibly * has the next hit; don't call it if we need to do * something more than just skipping the current line * in response to an unmatch for the current line. E.g. From a30c148aa7ec6583dbdb38fa6601df3cf4f5a660 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Kiedrowicz?= Date: Mon, 9 May 2011 23:52:04 +0200 Subject: [PATCH 05/14] grep: Extract compile_regexp_failed() from compile_regexp() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This simplifies compile_regexp() a little and allows re-using error handling code. Signed-off-by: Michał Kiedrowicz Signed-off-by: Junio C Hamano --- grep.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/grep.c b/grep.c index 250462e0c0..870d10cf69 100644 --- a/grep.c +++ b/grep.c @@ -59,6 +59,21 @@ struct grep_opt *grep_opt_dup(const struct grep_opt *opt) return ret; } +static NORETURN void compile_regexp_failed(const struct grep_pat *p, + const char *error) +{ + char where[1024]; + + if (p->no) + sprintf(where, "In '%s' at %d, ", p->origin, p->no); + else if (p->origin) + sprintf(where, "%s, ", p->origin); + else + where[0] = 0; + + die("%s'%s': %s", where, p->pattern, error); +} + static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { int err; @@ -73,17 +88,9 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) err = regcomp(&p->regexp, p->pattern, opt->regflags); if (err) { char errbuf[1024]; - char where[1024]; - if (p->no) - sprintf(where, "In '%s' at %d, ", - p->origin, p->no); - else if (p->origin) - sprintf(where, "%s, ", p->origin); - else - where[0] = 0; regerror(err, &p->regexp, errbuf, 1024); regfree(&p->regexp); - die("%s'%s': %s", where, p->pattern, errbuf); + compile_regexp_failed(p, errbuf); } } From 63e7e9d8b6483fed555ebed1c79a4820b2ba2558 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Kiedrowicz?= Date: Mon, 9 May 2011 23:52:05 +0200 Subject: [PATCH 06/14] git-grep: Learn PCRE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch teaches git-grep the --perl-regexp/-P options (naming borrowed from GNU grep) in order to allow specifying PCRE regexes on the command line. PCRE has a number of features which make them more handy to use than POSIX regexes, like consistent escaping rules, extended character classes, ungreedy matching etc. git isn't build with PCRE support automatically. USE_LIBPCRE environment variable must be enabled (like `make USE_LIBPCRE=YesPlease`). Signed-off-by: Michał Kiedrowicz Signed-off-by: Junio C Hamano --- Documentation/git-grep.txt | 6 +++ Makefile | 15 ++++++ builtin/grep.c | 2 + contrib/completion/git-completion.bash | 1 + grep.c | 75 +++++++++++++++++++++++++- grep.h | 9 ++++ 6 files changed, 107 insertions(+), 1 deletion(-) diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt index 4a5837881d..e150c77cff 100644 --- a/Documentation/git-grep.txt +++ b/Documentation/git-grep.txt @@ -12,6 +12,7 @@ SYNOPSIS 'git grep' [-a | --text] [-I] [-i | --ignore-case] [-w | --word-regexp] [-v | --invert-match] [-h|-H] [--full-name] [-E | --extended-regexp] [-G | --basic-regexp] + [-P | --perl-regexp] [-F | --fixed-strings] [-n | --line-number] [-l | --files-with-matches] [-L | --files-without-match] [(-O | --open-files-in-pager) []] @@ -97,6 +98,11 @@ OPTIONS Use POSIX extended/basic regexp for patterns. Default is to use basic regexp. +-P:: +--perl-regexp:: + Use Perl-compatible regexp for patterns. Requires libpcre to be + compiled in. + -F:: --fixed-strings:: Use fixed strings for patterns (don't interpret pattern diff --git a/Makefile b/Makefile index cbc3fce2d5..fea55c04dd 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,12 @@ all:: # Define NO_OPENSSL environment variable if you do not have OpenSSL. # This also implies BLK_SHA1. # +# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be +# able to use Perl-compatible regular expressions. +# +# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in +# /foo/bar/include and /foo/bar/lib directories. +# # Define NO_CURL if you do not have libcurl installed. git-http-pull and # git-http-push are not built, and you cannot use http:// and https:// # transports. @@ -1248,6 +1254,15 @@ ifdef NO_LIBGEN_H COMPAT_OBJS += compat/basename.o endif +ifdef USE_LIBPCRE + BASIC_CFLAGS += -DUSE_LIBPCRE + ifdef LIBPCREDIR + BASIC_CFLAGS += -I$(LIBPCREDIR)/include + EXTLIBS += -L$(LIBPCREDIR)/$(lib) $(CC_LD_DYNPATH)$(LIBPCREDIR)/$(lib) + endif + EXTLIBS += -lpcre +endif + ifdef NO_CURL BASIC_CFLAGS += -DNO_CURL REMOTE_CURL_PRIMARY = diff --git a/builtin/grep.c b/builtin/grep.c index 10a1f65310..6831975104 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -781,6 +781,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix) REG_EXTENDED), OPT_BOOLEAN('F', "fixed-strings", &opt.fixed, "interpret patterns as fixed strings"), + OPT_BOOLEAN('P', "perl-regexp", &opt.pcre, + "use Perl-compatible regular expressions"), OPT_GROUP(""), OPT_BOOLEAN('n', "line-number", &opt.linenum, "show line numbers"), OPT_NEGBIT('h', NULL, &opt.pathname, "don't show filenames", 1), diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index 3dc9cbe9f9..2facd08d16 100755 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -1487,6 +1487,7 @@ _git_grep () --text --ignore-case --word-regexp --invert-match --full-name --line-number --extended-regexp --basic-regexp --fixed-strings + --perl-regexp --files-with-matches --name-only --files-without-match --max-depth diff --git a/grep.c b/grep.c index 870d10cf69..d03d9e24c2 100644 --- a/grep.c +++ b/grep.c @@ -74,6 +74,69 @@ static NORETURN void compile_regexp_failed(const struct grep_pat *p, die("%s'%s': %s", where, p->pattern, error); } +#ifdef USE_LIBPCRE +static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) +{ + const char *error; + int erroffset; + int options = 0; + + if (opt->ignore_case) + options |= PCRE_CASELESS; + + p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset, + NULL); + if (!p->pcre_regexp) + compile_regexp_failed(p, error); + + p->pcre_extra_info = pcre_study(p->pcre_regexp, 0, &error); + if (!p->pcre_extra_info && error) + die("%s", error); +} + +static int pcrematch(struct grep_pat *p, const char *line, const char *eol, + regmatch_t *match, int eflags) +{ + int ovector[30], ret, flags = 0; + + if (eflags & REG_NOTBOL) + flags |= PCRE_NOTBOL; + + ret = pcre_exec(p->pcre_regexp, p->pcre_extra_info, line, eol - line, + 0, flags, ovector, ARRAY_SIZE(ovector)); + if (ret < 0 && ret != PCRE_ERROR_NOMATCH) + die("pcre_exec failed with error code %d", ret); + if (ret > 0) { + ret = 0; + match->rm_so = ovector[0]; + match->rm_eo = ovector[1]; + } + + return ret; +} + +static void free_pcre_regexp(struct grep_pat *p) +{ + pcre_free(p->pcre_regexp); + pcre_free(p->pcre_extra_info); +} +#else /* !USE_LIBPCRE */ +static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) +{ + die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE"); +} + +static int pcrematch(struct grep_pat *p, const char *line, const char *eol, + regmatch_t *match, int eflags) +{ + return 1; +} + +static void free_pcre_regexp(struct grep_pat *p) +{ +} +#endif /* !USE_LIBPCRE */ + static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { int err; @@ -85,6 +148,11 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) if (p->fixed) return; + if (opt->pcre) { + compile_pcre_regexp(p, opt); + return; + } + err = regcomp(&p->regexp, p->pattern, opt->regflags); if (err) { char errbuf[1024]; @@ -327,7 +395,10 @@ void free_grep_patterns(struct grep_opt *opt) case GREP_PATTERN: /* atom */ case GREP_PATTERN_HEAD: case GREP_PATTERN_BODY: - regfree(&p->regexp); + if (p->pcre_regexp) + free_pcre_regexp(p); + else + regfree(&p->regexp); break; default: break; @@ -426,6 +497,8 @@ static int patmatch(struct grep_pat *p, char *line, char *eol, if (p->fixed) hit = !fixmatch(p, line, eol, match); + else if (p->pcre_regexp) + hit = !pcrematch(p, line, eol, match, eflags); else hit = !regmatch(&p->regexp, line, eol, match, eflags); diff --git a/grep.h b/grep.h index 06621fe663..cd055cdfa8 100644 --- a/grep.h +++ b/grep.h @@ -1,6 +1,12 @@ #ifndef GREP_H #define GREP_H #include "color.h" +#ifdef USE_LIBPCRE +#include +#else +typedef int pcre; +typedef int pcre_extra; +#endif enum grep_pat_token { GREP_PATTERN, @@ -33,6 +39,8 @@ struct grep_pat { size_t patternlen; enum grep_header_field field; regex_t regexp; + pcre *pcre_regexp; + pcre_extra *pcre_extra_info; unsigned fixed:1; unsigned ignore_case:1; unsigned word_regexp:1; @@ -83,6 +91,7 @@ struct grep_opt { #define GREP_BINARY_TEXT 2 int binary; int extended; + int pcre; int relative; int pathname; int null_following_name; From a119f91e57f23f89b1f9170613d517d62a91c97d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Kiedrowicz?= Date: Mon, 9 May 2011 23:52:06 +0200 Subject: [PATCH 07/14] configure: Check for libpcre MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds checks for libpcre to configure. By default libpcre is disabled, --with-libpcre enables it (if it works). Signed-off-by: Michał Kiedrowicz Signed-off-by: Junio C Hamano --- config.mak.in | 1 + configure.ac | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/config.mak.in b/config.mak.in index e378534cbd..f30130b17a 100644 --- a/config.mak.in +++ b/config.mak.in @@ -61,6 +61,7 @@ NO_INET_PTON=@NO_INET_PTON@ NO_ICONV=@NO_ICONV@ OLD_ICONV=@OLD_ICONV@ NO_REGEX=@NO_REGEX@ +USE_LIBPCRE=@USE_LIBPCRE@ NO_DEFLATE_BOUND=@NO_DEFLATE_BOUND@ INLINE=@INLINE@ SOCKLEN_T=@SOCKLEN_T@ diff --git a/configure.ac b/configure.ac index fafd81557c..048a1d4972 100644 --- a/configure.ac +++ b/configure.ac @@ -220,6 +220,27 @@ AS_HELP_STRING([--with-openssl],[use OpenSSL library (default is YES)]) AS_HELP_STRING([], [ARG can be prefix for openssl library and headers]),\ GIT_PARSE_WITH(openssl)) # +# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be +# able to use Perl-compatible regular expressions. +# +# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in +# /foo/bar/include and /foo/bar/lib directories. +# +AC_ARG_WITH(libpcre, +AS_HELP_STRING([--with-libpcre],[support Perl-compatible regexes (default is NO)]) +AS_HELP_STRING([], [ARG can be also prefix for libpcre library and headers]), +if test "$withval" = "no"; then \ + USE_LIBPCRE=; \ +elif test "$withval" = "yes"; then \ + USE_LIBPCRE=YesPlease; \ +else + USE_LIBPCRE=YesPlease; \ + LIBPCREDIR=$withval; \ + AC_MSG_NOTICE([Setting LIBPCREDIR to $withval]); \ + GIT_CONF_APPEND_LINE(LIBPCREDIR=$withval); \ +fi \ +) +# # Define NO_CURL if you do not have curl installed. git-http-pull and # git-http-push are not built, and you cannot use http:// and https:// # transports. @@ -434,6 +455,25 @@ GIT_UNSTASH_FLAGS($OPENSSLDIR) AC_SUBST(NEEDS_SSL_WITH_CRYPTO) AC_SUBST(NO_OPENSSL) +# +# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be +# able to use Perl-compatible regular expressions. +# + +if test -n "$USE_LIBPCRE"; then + +GIT_STASH_FLAGS($LIBPCREDIR) + +AC_CHECK_LIB([pcre], [pcre_version], +[USE_LIBPCRE=YesPlease], +[USE_LIBPCRE=]) + +GIT_UNSTASH_FLAGS($LIBPCREDIR) + +AC_SUBST(USE_LIBPCRE) + +fi + # # Define NO_CURL if you do not have libcurl installed. git-http-pull and # git-http-push are not built, and you cannot use http:// and https:// From 8f852ce613650b0cccf02adecbc18865d8e21fb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Kiedrowicz?= Date: Mon, 9 May 2011 23:52:07 +0200 Subject: [PATCH 08/14] grep: Add basic tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This modest patch adds simple tests for git grep -P/--perl-regexp and its interoperation with -i and -w. Tests are only enabled when prerequisite LIBPCRE is defined (it's automatically set based on USE_LIBPCRE in test-lib.sh). Signed-off-by: Michał Kiedrowicz Signed-off-by: Junio C Hamano --- t/README | 5 +++++ t/t7810-grep.sh | 38 ++++++++++++++++++++++++++++++++++++++ t/test-lib.sh | 1 + 3 files changed, 44 insertions(+) diff --git a/t/README b/t/README index 428ee05c4a..238729c5b7 100644 --- a/t/README +++ b/t/README @@ -587,6 +587,11 @@ use these, and "test_set_prereq" for how to define your own. Test is not run by root user, and an attempt to write to an unwritable file is expected to fail correctly. + - LIBPCRE + + Git was compiled with USE_LIBPCRE=YesPlease. Wrap any tests + that use git-grep --perl-regexp or git-grep -P in these. + Tips for Writing Tests ---------------------- diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh index 8184c264cf..e845218f67 100755 --- a/t/t7810-grep.sh +++ b/t/t7810-grep.sh @@ -26,6 +26,12 @@ test_expect_success setup ' echo foo mmap bar_mmap echo foo_mmap bar mmap baz } >file && + { + echo Hello world + echo HeLLo world + echo Hello_world + echo HeLLo_world + } >hello_world && echo vvv >v && echo ww w >w && echo x x xx x >x && @@ -599,4 +605,36 @@ test_expect_success 'grep -e -- -- path' ' test_cmp expected actual ' +cat >expected <actual && + test_cmp expected actual +' + +test_expect_success LIBPCRE 'grep -P pattern' ' + git grep -P "\p{Ps}.*?\p{Pe}" hello.c >actual && + test_cmp expected actual +' + +test_expect_success LIBPCRE 'grep -P -i pattern' ' + { + echo "hello.c: printf(\"Hello world.\n\");" + } >expected && + git grep -P -i "PRINTF\([^\d]+\)" hello.c >actual && + test_cmp expected actual +' + +test_expect_success LIBPCRE 'grep -P -w pattern' ' + { + echo "hello_world:Hello world" + echo "hello_world:HeLLo world" + } >expected && + git grep -P -w "He((?i)ll)o" hello_world >actual && + test_cmp expected actual +' + test_done diff --git a/t/test-lib.sh b/t/test-lib.sh index abc47f3abc..d3ed59803f 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1067,6 +1067,7 @@ esac test -z "$NO_PERL" && test_set_prereq PERL test -z "$NO_PYTHON" && test_set_prereq PYTHON +test -n "$USE_LIBPCRE" && test_set_prereq LIBPCRE # Can we rely on git's output in the C locale? if test -n "$GETTEXT_POISON" From 258a6188496fe5131203905b6cd596af69312247 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Kiedrowicz?= Date: Mon, 9 May 2011 23:52:08 +0200 Subject: [PATCH 09/14] git-grep: Bail out when -P is used with -F or -E MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch makes git-grep die() when -P is used on command line together with -E/--extended-regexp or -F/--fixed-strings. This also makes it bail out when grep.extendedRegexp is enabled. But `git grep -G -P pattern` and `git grep -E -G -P pattern` still work because -G and -E set opts.regflags during parse_options() and there is no way to detect `-G` or `-E -G`. Signed-off-by: Michał Kiedrowicz Signed-off-by: Junio C Hamano --- builtin/grep.c | 4 +++- t/t7810-grep.sh | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/builtin/grep.c b/builtin/grep.c index 6831975104..8f2602653e 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -925,9 +925,11 @@ int cmd_grep(int argc, const char **argv, const char *prefix) if (!opt.pattern_list) die(_("no pattern given.")); + if (opt.regflags != REG_NEWLINE && opt.pcre) + die(_("cannot mix --extended-regexp and --perl-regexp")); if (!opt.fixed && opt.ignore_case) opt.regflags |= REG_ICASE; - if ((opt.regflags != REG_NEWLINE) && opt.fixed) + if ((opt.regflags != REG_NEWLINE || opt.pcre) && opt.fixed) die(_("cannot mix --fixed-strings and regexp")); #ifndef NO_PTHREADS diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh index e845218f67..2a31eca5f2 100755 --- a/t/t7810-grep.sh +++ b/t/t7810-grep.sh @@ -637,4 +637,20 @@ test_expect_success LIBPCRE 'grep -P -w pattern' ' test_cmp expected actual ' +test_expect_success LIBPCRE 'grep -P -F returns error' ' + test_expect_code 128 git grep -P -F main +' + +test_expect_success LIBPCRE 'grep -P -E returns error' ' + test_expect_code 128 git grep -P -E main +' + +test_expect_failure LIBPCRE 'grep -P -G returns error' ' + test_expect_code 128 git grep -P -G main +' + +test_expect_failure LIBPCRE 'grep -P -E -G returns error' ' + test_expect_code 128 git grep -P -E -G main +' + test_done From cca2c172e0c37eff8e743b63016b6bc604b38a7d Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 9 May 2011 18:48:36 -0700 Subject: [PATCH 10/14] git-grep: do not die upon -F/-P when grep.extendedRegexp is set. The previous one made "git grep -P" fail when grep.extendedRegexp is enabled. That is a no-starter. The option on the command line should just make the command ignore the configured default. The handling of "-F" in the existing code has the same problem. Instead of saying -G/-F/-E/-P incompatible with each other, just allow the last one win. That way, you can have "[alias] gr = grep -P" and use Pcre for everyday work e.g. "git gr ':i?foo'", and append -G to the aliased command line to override it e.g. "git gr -G '[Ff][Oo][Oo]'". Signed-off-by: Junio C Hamano --- builtin/grep.c | 56 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 13 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 8f2602653e..298f763b70 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -753,6 +753,15 @@ int cmd_grep(int argc, const char **argv, const char *prefix) int i; int dummy; int use_index = 1; + enum { + pattern_type_unspecified = 0, + pattern_type_bre, + pattern_type_ere, + pattern_type_fixed, + pattern_type_pcre, + }; + int pattern_type = pattern_type_unspecified; + struct option options[] = { OPT_BOOLEAN(0, "cached", &cached, "search in index instead of in the work tree"), @@ -774,15 +783,18 @@ int cmd_grep(int argc, const char **argv, const char *prefix) "descend at most levels", PARSE_OPT_NONEG, NULL, 1 }, OPT_GROUP(""), - OPT_BIT('E', "extended-regexp", &opt.regflags, - "use extended POSIX regular expressions", REG_EXTENDED), - OPT_NEGBIT('G', "basic-regexp", &opt.regflags, - "use basic POSIX regular expressions (default)", - REG_EXTENDED), - OPT_BOOLEAN('F', "fixed-strings", &opt.fixed, - "interpret patterns as fixed strings"), - OPT_BOOLEAN('P', "perl-regexp", &opt.pcre, - "use Perl-compatible regular expressions"), + OPT_SET_INT('E', "extended-regexp", &pattern_type, + "use extended POSIX regular expressions", + pattern_type_ere), + OPT_SET_INT('G', "basic-regexp", &pattern_type, + "use basic POSIX regular expressions (default)", + pattern_type_bre), + OPT_SET_INT('F', "fixed-strings", &pattern_type, + "interpret patterns as fixed strings", + pattern_type_fixed), + OPT_SET_INT('P', "perl-regexp", &pattern_type, + "use Perl-compatible regular expressions", + pattern_type_pcre), OPT_GROUP(""), OPT_BOOLEAN('n', "line-number", &opt.linenum, "show line numbers"), OPT_NEGBIT('h', NULL, &opt.pathname, "don't show filenames", 1), @@ -888,6 +900,28 @@ int cmd_grep(int argc, const char **argv, const char *prefix) PARSE_OPT_KEEP_DASHDASH | PARSE_OPT_STOP_AT_NON_OPTION | PARSE_OPT_NO_INTERNAL_HELP); + switch (pattern_type) { + case pattern_type_fixed: + opt.fixed = 1; + opt.pcre = 0; + break; + case pattern_type_bre: + opt.fixed = 0; + opt.pcre = 0; + opt.regflags &= ~REG_EXTENDED; + break; + case pattern_type_ere: + opt.fixed = 0; + opt.pcre = 0; + opt.regflags |= REG_EXTENDED; + break; + case pattern_type_pcre: + opt.fixed = 0; + opt.pcre = 1; + break; + default: + break; /* nothing */ + } if (use_index && !startup_info->have_repository) /* die the same way as if we did it at the beginning */ @@ -925,12 +959,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix) if (!opt.pattern_list) die(_("no pattern given.")); - if (opt.regflags != REG_NEWLINE && opt.pcre) - die(_("cannot mix --extended-regexp and --perl-regexp")); if (!opt.fixed && opt.ignore_case) opt.regflags |= REG_ICASE; - if ((opt.regflags != REG_NEWLINE || opt.pcre) && opt.fixed) - die(_("cannot mix --fixed-strings and regexp")); #ifndef NO_PTHREADS if (online_cpus() == 1 || !grep_threads_ok(&opt)) From dd0a21ede0998ec790c37865c7dc119814ac7745 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 16 May 2011 00:10:14 -0700 Subject: [PATCH 11/14] git-grep: update tests now regexp type is "last one wins" Signed-off-by: Junio C Hamano --- t/t7810-grep.sh | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh index 2a31eca5f2..e845218f67 100755 --- a/t/t7810-grep.sh +++ b/t/t7810-grep.sh @@ -637,20 +637,4 @@ test_expect_success LIBPCRE 'grep -P -w pattern' ' test_cmp expected actual ' -test_expect_success LIBPCRE 'grep -P -F returns error' ' - test_expect_code 128 git grep -P -F main -' - -test_expect_success LIBPCRE 'grep -P -E returns error' ' - test_expect_code 128 git grep -P -E main -' - -test_expect_failure LIBPCRE 'grep -P -G returns error' ' - test_expect_code 128 git grep -P -G main -' - -test_expect_failure LIBPCRE 'grep -P -E -G returns error' ' - test_expect_code 128 git grep -P -E -G main -' - test_done From a80dff25152f43e8a9a688943cb5f67d2ad76fc4 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 16 May 2011 00:11:53 -0700 Subject: [PATCH 12/14] Makefile: Pass USE_LIBPCRE down in GIT-BUILD-OPTIONS Otherwise we would fail to rebuild correctly when this option was changed between $(MAKE) invocations, and more importantly, $(MAKE) test would not pass it down and t/test-lib.sh would not set the LIBPCRE prerequisite. Signed-off-by: Junio C Hamano --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index fea55c04dd..c449f5d6de 100644 --- a/Makefile +++ b/Makefile @@ -2109,6 +2109,7 @@ GIT-BUILD-OPTIONS: FORCE @echo PYTHON_PATH=\''$(subst ','\'',$(PYTHON_PATH_SQ))'\' >>$@ @echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@ @echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@ + @echo USE_LIBPCRE=\''$(subst ','\'',$(subst ','\'',$(USE_LIBPCRE)))'\' >>$@ @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@ @echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@ ifdef GIT_TEST_CMP From f556e4af2790bc8f0918093bd2b3053c7d2898dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Kiedrowicz?= Date: Sun, 22 May 2011 13:37:28 +0200 Subject: [PATCH 13/14] git-grep: Update tests (mainly for -P) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add few more tests for "-P/--perl-regexp" option of "git grep". While at it, add some generic tests for grep.extendedRegexp config option, for detecting invalid regexep and check if "last one wins" rule works for selecting regexp type. Signed-off-by: Michał Kiedrowicz Signed-off-by: Junio C Hamano --- t/t7810-grep.sh | 85 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh index e845218f67..e061108a64 100755 --- a/t/t7810-grep.sh +++ b/t/t7810-grep.sh @@ -32,6 +32,11 @@ test_expect_success setup ' echo Hello_world echo HeLLo_world } >hello_world && + { + echo aab + echo a+b + echo a\\+b + } >ab && echo vvv >v && echo ww w >w && echo x x xx x >x && @@ -227,7 +232,17 @@ do git grep --max-depth 0 -n -e vvv $H -- t . >actual && test_cmp expected actual ' + test_expect_success "grep $L with grep.extendedRegexp=false" ' + echo "ab:a+b" >expected && + git -c grep.extendedRegexp=false grep "a+b" >actual && + test_cmp expected actual + ' + test_expect_success "grep $L with grep.extendedRegexp=true" ' + echo "ab:aab" >expected && + git -c grep.extendedRegexp=true grep "a+b" >actual && + test_cmp expected actual + ' done cat >expected <empty && + test_must_fail git -c grep.extendedregexp=true \ + grep "\p{Ps}.*?\p{Pe}" hello.c >actual && + test_cmp empty actual +' + +test_expect_success LIBPCRE 'grep -P pattern with grep.extendedRegexp=true' ' + git -c grep.extendedregexp=true \ + grep -P "\p{Ps}.*?\p{Pe}" hello.c >actual && + test_cmp expected actual +' + +test_expect_success LIBPCRE 'grep -P -v pattern' ' + { + echo ab:a+b + echo ab:a\\+b + } >expected && + git grep -P -v "aab" ab >actual && + test_cmp expected actual +' + test_expect_success LIBPCRE 'grep -P -i pattern' ' { echo "hello.c: printf(\"Hello world.\n\");" @@ -637,4 +674,52 @@ test_expect_success LIBPCRE 'grep -P -w pattern' ' test_cmp expected actual ' +test_expect_success 'grep -G invalidpattern properly dies ' ' + test_must_fail git grep -G "a[" +' + +test_expect_success 'grep -E invalidpattern properly dies ' ' + test_must_fail git grep -E "a[" +' + +test_expect_success LIBPCRE 'grep -P invalidpattern properly dies ' ' + test_must_fail git grep -P "a[" +' + +test_expect_success 'grep -F -E -G pattern' ' + echo ab:a+b >expected && + git grep -F -E -G a+b >actual && + test_cmp expected actual +' + +test_expect_success 'grep -F -G -E pattern' ' + echo ab:aab >expected && + git grep -F -G -E a+b >actual && + test_cmp expected actual +' + +test_expect_success 'grep -E -F -G pattern' ' + echo ab:aab >expected && + git grep -E -F -G a\\+b >actual && + test_cmp expected actual +' + +test_expect_success 'grep -E -G -F pattern' ' + echo ab:a\\+b >expected && + git grep -E -G -F a\\+b >actual && + test_cmp expected actual +' + +test_expect_success 'grep -G -F -E pattern' ' + echo ab:a+b >expected && + git grep -G -F -E a\\+b >actual && + test_cmp expected actual +' + +test_expect_success LIBPCRE 'grep -E -G -F -P pattern' ' + echo ab:a+b >expected && + git grep -E -G -F -P a\\+b >actual && + test_cmp expected actual +' + test_done From d0042abe14b3aece87595d365d6eba84c3e53327 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Kiedrowicz?= Date: Fri, 27 May 2011 00:43:59 +0200 Subject: [PATCH 14/14] git-grep: Fix problems with recently added tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Brian Gernhardt reported that test 'git grep -E -F -G a\\+b' fails on OS X 10.6.7. This is because I assumed \+ is part of BRE, which isn't true on all platforms. The easiest way to make this test pass is to just update expected output, but that would make the test pointless. Its real purpose is to check whether 'git grep -E -F -G' is different from 'git grep -E -G -F'. To check that, let's change pattern to "a+b*c". This should return different match for -G, -F and -E. I also made two small tweaks to the tests. First, I added path "ab" to all calls to future-proof tests. Second, I updated last two tests to better show that 'git grep -P -E' is different from 'git grep -E -P'. Signed-off-by: Michał Kiedrowicz Signed-off-by: Junio C Hamano --- t/t7810-grep.sh | 58 ++++++++++++++++++++++--------------------------- 1 file changed, 26 insertions(+), 32 deletions(-) diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh index e061108a64..69bd576d1c 100755 --- a/t/t7810-grep.sh +++ b/t/t7810-grep.sh @@ -33,9 +33,9 @@ test_expect_success setup ' echo HeLLo_world } >hello_world && { - echo aab - echo a+b - echo a\\+b + echo "a+b*c" + echo "a+bc" + echo "abc" } >ab && echo vvv >v && echo ww w >w && @@ -233,14 +233,14 @@ do test_cmp expected actual ' test_expect_success "grep $L with grep.extendedRegexp=false" ' - echo "ab:a+b" >expected && - git -c grep.extendedRegexp=false grep "a+b" >actual && + echo "ab:a+bc" >expected && + git -c grep.extendedRegexp=false grep "a+b*c" ab >actual && test_cmp expected actual ' test_expect_success "grep $L with grep.extendedRegexp=true" ' - echo "ab:aab" >expected && - git -c grep.extendedRegexp=true grep "a+b" >actual && + echo "ab:abc" >expected && + git -c grep.extendedRegexp=true grep "a+b*c" ab >actual && test_cmp expected actual ' done @@ -636,7 +636,7 @@ test_expect_success LIBPCRE 'grep -P pattern' ' ' test_expect_success 'grep pattern with grep.extendedRegexp=true' ' - :>empty && + >empty && test_must_fail git -c grep.extendedregexp=true \ grep "\p{Ps}.*?\p{Pe}" hello.c >actual && test_cmp empty actual @@ -650,10 +650,10 @@ test_expect_success LIBPCRE 'grep -P pattern with grep.extendedRegexp=true' ' test_expect_success LIBPCRE 'grep -P -v pattern' ' { - echo ab:a+b - echo ab:a\\+b + echo "ab:a+b*c" + echo "ab:a+bc" } >expected && - git grep -P -v "aab" ab >actual && + git grep -P -v "abc" ab >actual && test_cmp expected actual ' @@ -686,39 +686,33 @@ test_expect_success LIBPCRE 'grep -P invalidpattern properly dies ' ' test_must_fail git grep -P "a[" ' -test_expect_success 'grep -F -E -G pattern' ' - echo ab:a+b >expected && - git grep -F -E -G a+b >actual && - test_cmp expected actual -' - -test_expect_success 'grep -F -G -E pattern' ' - echo ab:aab >expected && - git grep -F -G -E a+b >actual && +test_expect_success 'grep -G -E -F pattern' ' + echo "ab:a+b*c" >expected && + git grep -G -E -F "a+b*c" ab >actual && test_cmp expected actual ' test_expect_success 'grep -E -F -G pattern' ' - echo ab:aab >expected && - git grep -E -F -G a\\+b >actual && + echo "ab:a+bc" >expected && + git grep -E -F -G "a+b*c" ab >actual && test_cmp expected actual ' -test_expect_success 'grep -E -G -F pattern' ' - echo ab:a\\+b >expected && - git grep -E -G -F a\\+b >actual && +test_expect_success 'grep -F -G -E pattern' ' + echo "ab:abc" >expected && + git grep -F -G -E "a+b*c" ab >actual && test_cmp expected actual ' -test_expect_success 'grep -G -F -E pattern' ' - echo ab:a+b >expected && - git grep -G -F -E a\\+b >actual && - test_cmp expected actual +test_expect_success 'grep -G -F -P -E pattern' ' + >empty && + test_must_fail git grep -G -F -P -E "a\x{2b}b\x{2a}c" ab >actual && + test_cmp empty actual ' -test_expect_success LIBPCRE 'grep -E -G -F -P pattern' ' - echo ab:a+b >expected && - git grep -E -G -F -P a\\+b >actual && +test_expect_success LIBPCRE 'grep -G -F -E -P pattern' ' + echo "ab:a+b*c" >expected && + git grep -G -F -E -P "a\x{2b}b\x{2a}c" ab >actual && test_cmp expected actual '