From b4285c71bcc43c4cc9939b8d170aaf8ddb25f09f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 17 Jan 2009 16:50:13 +0100 Subject: [PATCH 1/4] Add ctype test Manipulating the character class table in ctype.c by hand is error prone. To ensure that typos are found quickly, add a test program and script. test-ctype checks the output of the character class macros isspace() et. al. by applying them on all possible char values and consulting a list of all characters in the particular class. It doesn't check tolower() and toupper(); this could be added later. The test script t0070-fundamental.sh is created because there is no good place for the ctype test, yet -- except for t0000-basic.sh perhaps, but it doesn't run well on Windows, yet. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- Makefile | 12 +++++++- t/t0070-fundamental.sh | 15 ++++++++++ test-ctype.c | 66 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 1 deletion(-) create mode 100755 t/t0070-fundamental.sh create mode 100644 test-ctype.c diff --git a/Makefile b/Makefile index dee97c1b01..20ba65d1e6 100644 --- a/Makefile +++ b/Makefile @@ -1356,7 +1356,15 @@ endif ### Testing rules -TEST_PROGRAMS = test-chmtime$X test-genrandom$X test-date$X test-delta$X test-sha1$X test-match-trees$X test-parse-options$X test-path-utils$X +TEST_PROGRAMS += test-chmtime$X +TEST_PROGRAMS += test-ctype$X +TEST_PROGRAMS += test-date$X +TEST_PROGRAMS += test-delta$X +TEST_PROGRAMS += test-genrandom$X +TEST_PROGRAMS += test-match-trees$X +TEST_PROGRAMS += test-parse-options$X +TEST_PROGRAMS += test-path-utils$X +TEST_PROGRAMS += test-sha1$X all:: $(TEST_PROGRAMS) @@ -1369,6 +1377,8 @@ export NO_SVN_TESTS test: all $(MAKE) -C t/ all +test-ctype$X: ctype.o + test-date$X: date.o ctype.o test-delta$X: diff-delta.o patch-delta.o diff --git a/t/t0070-fundamental.sh b/t/t0070-fundamental.sh new file mode 100755 index 0000000000..680d7d6861 --- /dev/null +++ b/t/t0070-fundamental.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +test_description='check that the most basic functions work + + +Verify wrappers and compatibility functions. +' + +. ./test-lib.sh + +test_expect_success 'character classes (isspace, isalpha etc.)' ' + test-ctype +' + +test_done diff --git a/test-ctype.c b/test-ctype.c new file mode 100644 index 0000000000..723eff4e96 --- /dev/null +++ b/test-ctype.c @@ -0,0 +1,66 @@ +#include "cache.h" + + +static int test_isdigit(int c) +{ + return isdigit(c); +} + +static int test_isspace(int c) +{ + return isspace(c); +} + +static int test_isalpha(int c) +{ + return isalpha(c); +} + +static int test_isalnum(int c) +{ + return isalnum(c); +} + +#define DIGIT "0123456789" +#define LOWER "abcdefghijklmnopqrstuvwxyz" +#define UPPER "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + +static const struct ctype_class { + const char *name; + int (*test_fn)(int); + const char *members; +} classes[] = { + { "isdigit", test_isdigit, DIGIT }, + { "isspace", test_isspace, " \n\r\t" }, + { "isalpha", test_isalpha, LOWER UPPER }, + { "isalnum", test_isalnum, LOWER UPPER DIGIT }, + { NULL } +}; + +static int test_class(const struct ctype_class *test) +{ + int i, rc = 0; + + for (i = 0; i < 256; i++) { + int expected = i ? !!strchr(test->members, i) : 0; + int actual = test->test_fn(i); + + if (actual != expected) { + rc = 1; + printf("%s classifies char %d (0x%02x) wrongly\n", + test->name, i, i); + } + } + return rc; +} + +int main(int argc, char **argv) +{ + const struct ctype_class *test; + int rc = 0; + + for (test = classes; test->name; test++) + rc |= test_class(test); + + return rc; +} From c841aa8b903200f5d7830c7c4ab8d62b5ef44c5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 17 Jan 2009 16:50:29 +0100 Subject: [PATCH 2/4] Reformat ctype.c Enhance the readability of ctype.c by using an enum instead of macros to initialize the character class table. This allows the use of a single letter to mark a char, making the table fit within 80 columns. Also list the index of the last entry in each row in the following comment. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- ctype.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/ctype.c b/ctype.c index 9208d674db..6528687000 100644 --- a/ctype.c +++ b/ctype.c @@ -5,25 +5,21 @@ */ #include "cache.h" -/* Just so that no insane platform contaminate namespace with these symbols */ -#undef SS -#undef AA -#undef DD -#undef GS - -#define SS GIT_SPACE -#define AA GIT_ALPHA -#define DD GIT_DIGIT -#define GS GIT_SPECIAL /* \0, *, ?, [, \\ */ +enum { + S = GIT_SPACE, + A = GIT_ALPHA, + D = GIT_DIGIT, + G = GIT_SPECIAL, /* \0, *, ?, [, \\ */ +}; unsigned char sane_ctype[256] = { - GS, 0, 0, 0, 0, 0, 0, 0, 0, SS, SS, 0, 0, SS, 0, 0, /* 0-15 */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16-15 */ - SS, 0, 0, 0, 0, 0, 0, 0, 0, 0, GS, 0, 0, 0, 0, 0, /* 32-15 */ - DD, DD, DD, DD, DD, DD, DD, DD, DD, DD, 0, 0, 0, 0, 0, GS, /* 48-15 */ - 0, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, /* 64-15 */ - AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, GS, GS, 0, 0, 0, /* 80-15 */ - 0, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, /* 96-15 */ - AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, 0, 0, 0, 0, 0, /* 112-15 */ + G, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */ + S, 0, 0, 0, 0, 0, 0, 0, 0, 0, G, 0, 0, 0, 0, 0, /* 32.. 47 */ + D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G, /* 48.. 63 */ + 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ + A, A, A, A, A, A, A, A, A, A, A, G, G, 0, 0, 0, /* 80.. 95 */ + 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ + A, A, A, A, A, A, A, A, A, A, A, 0, 0, 0, 0, 0, /* 112..127 */ /* Nothing in the 128.. range */ }; From 8cc32992624ed4140fb136d98675f0f19b20ba09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 17 Jan 2009 16:50:34 +0100 Subject: [PATCH 3/4] Change NUL char handling of isspecial() Replace isspecial() by the new macro is_glob_special(), which is more, well, specialized. The former included the NUL char in its character class, while the letter only included characters that are special to file name globbing. The new name contains underscores because they enhance readability considerably now that it's made up of three words. Renaming the function is necessary to document its changed scope. The call sites of isspecial() are updated to check explicitly for NUL. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- ctype.c | 4 ++-- dir.c | 4 ++-- git-compat-util.h | 4 ++-- grep.c | 5 +++-- test-ctype.c | 6 ++++++ 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/ctype.c b/ctype.c index 6528687000..9de187c812 100644 --- a/ctype.c +++ b/ctype.c @@ -9,11 +9,11 @@ enum { S = GIT_SPACE, A = GIT_ALPHA, D = GIT_DIGIT, - G = GIT_SPECIAL, /* \0, *, ?, [, \\ */ + G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */ }; unsigned char sane_ctype[256] = { - G, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */ S, 0, 0, 0, 0, 0, 0, 0, 0, 0, G, 0, 0, 0, 0, 0, /* 32.. 47 */ D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G, /* 48.. 63 */ diff --git a/dir.c b/dir.c index 0131983dfb..7ae1e2e75c 100644 --- a/dir.c +++ b/dir.c @@ -75,7 +75,7 @@ static int match_one(const char *match, const char *name, int namelen) for (;;) { unsigned char c1 = *match; unsigned char c2 = *name; - if (isspecial(c1)) + if (c1 == '\0' || is_glob_special(c1)) break; if (c1 != c2) return 0; @@ -680,7 +680,7 @@ static int simple_length(const char *match) for (;;) { unsigned char c = *match++; len++; - if (isspecial(c)) + if (c == '\0' || is_glob_special(c)) return len; } } diff --git a/git-compat-util.h b/git-compat-util.h index e20b1e858c..7c925881d9 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -327,13 +327,13 @@ extern unsigned char sane_ctype[256]; #define GIT_SPACE 0x01 #define GIT_DIGIT 0x02 #define GIT_ALPHA 0x04 -#define GIT_SPECIAL 0x08 +#define GIT_GLOB_SPECIAL 0x08 #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) #define isspace(x) sane_istest(x,GIT_SPACE) #define isdigit(x) sane_istest(x,GIT_DIGIT) #define isalpha(x) sane_istest(x,GIT_ALPHA) #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) -#define isspecial(x) sane_istest(x,GIT_SPECIAL) +#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL) #define tolower(x) sane_case((unsigned char)(x), 0x20) #define toupper(x) sane_case((unsigned char)(x), 0) diff --git a/grep.c b/grep.c index 6485760ff3..f9a45258aa 100644 --- a/grep.c +++ b/grep.c @@ -30,8 +30,9 @@ void append_grep_pattern(struct grep_opt *opt, const char *pat, static int isregexspecial(int c) { - return isspecial(c) || c == '$' || c == '(' || c == ')' || c == '+' || - c == '.' || c == '^' || c == '{' || c == '|'; + return c == '\0' || is_glob_special(c) || + c == '$' || c == '(' || c == ')' || c == '+' || + c == '.' || c == '^' || c == '{' || c == '|'; } static int is_fixed(const char *s) diff --git a/test-ctype.c b/test-ctype.c index 723eff4e96..d6425d5b40 100644 --- a/test-ctype.c +++ b/test-ctype.c @@ -21,6 +21,11 @@ static int test_isalnum(int c) return isalnum(c); } +static int test_is_glob_special(int c) +{ + return is_glob_special(c); +} + #define DIGIT "0123456789" #define LOWER "abcdefghijklmnopqrstuvwxyz" #define UPPER "ABCDEFGHIJKLMNOPQRSTUVWXYZ" @@ -34,6 +39,7 @@ static const struct ctype_class { { "isspace", test_isspace, " \n\r\t" }, { "isalpha", test_isalpha, LOWER UPPER }, { "isalnum", test_isalnum, LOWER UPPER DIGIT }, + { "is_glob_special", test_is_glob_special, "*?[\\" }, { NULL } }; From f9b7cce61cbd19c99e89b859b5909f0741111185 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 17 Jan 2009 16:50:37 +0100 Subject: [PATCH 4/4] Add is_regex_special() Add is_regex_special(), a character class macro for chars that have a special meaning in regular expressions. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- ctype.c | 7 ++++--- git-compat-util.h | 2 ++ grep.c | 9 +-------- test-ctype.c | 6 ++++++ 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/ctype.c b/ctype.c index 9de187c812..b90ec004f2 100644 --- a/ctype.c +++ b/ctype.c @@ -10,16 +10,17 @@ enum { A = GIT_ALPHA, D = GIT_DIGIT, G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */ + R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | * */ }; unsigned char sane_ctype[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */ - S, 0, 0, 0, 0, 0, 0, 0, 0, 0, G, 0, 0, 0, 0, 0, /* 32.. 47 */ + S, 0, 0, 0, R, 0, 0, 0, R, R, G, R, 0, 0, R, 0, /* 32.. 47 */ D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G, /* 48.. 63 */ 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ - A, A, A, A, A, A, A, A, A, A, A, G, G, 0, 0, 0, /* 80.. 95 */ + A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, 0, /* 80.. 95 */ 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ - A, A, A, A, A, A, A, A, A, A, A, 0, 0, 0, 0, 0, /* 112..127 */ + A, A, A, A, A, A, A, A, A, A, A, R, R, 0, 0, 0, /* 112..127 */ /* Nothing in the 128.. range */ }; diff --git a/git-compat-util.h b/git-compat-util.h index 7c925881d9..079cbe9440 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -328,12 +328,14 @@ extern unsigned char sane_ctype[256]; #define GIT_DIGIT 0x02 #define GIT_ALPHA 0x04 #define GIT_GLOB_SPECIAL 0x08 +#define GIT_REGEX_SPECIAL 0x10 #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) #define isspace(x) sane_istest(x,GIT_SPACE) #define isdigit(x) sane_istest(x,GIT_DIGIT) #define isalpha(x) sane_istest(x,GIT_ALPHA) #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL) +#define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL) #define tolower(x) sane_case((unsigned char)(x), 0x20) #define toupper(x) sane_case((unsigned char)(x), 0) diff --git a/grep.c b/grep.c index f9a45258aa..062b2b6f28 100644 --- a/grep.c +++ b/grep.c @@ -28,16 +28,9 @@ void append_grep_pattern(struct grep_opt *opt, const char *pat, p->next = NULL; } -static int isregexspecial(int c) -{ - return c == '\0' || is_glob_special(c) || - c == '$' || c == '(' || c == ')' || c == '+' || - c == '.' || c == '^' || c == '{' || c == '|'; -} - static int is_fixed(const char *s) { - while (!isregexspecial(*s)) + while (*s && !is_regex_special(*s)) s++; return !*s; } diff --git a/test-ctype.c b/test-ctype.c index d6425d5b40..033c74911e 100644 --- a/test-ctype.c +++ b/test-ctype.c @@ -26,6 +26,11 @@ static int test_is_glob_special(int c) return is_glob_special(c); } +static int test_is_regex_special(int c) +{ + return is_regex_special(c); +} + #define DIGIT "0123456789" #define LOWER "abcdefghijklmnopqrstuvwxyz" #define UPPER "ABCDEFGHIJKLMNOPQRSTUVWXYZ" @@ -40,6 +45,7 @@ static const struct ctype_class { { "isalpha", test_isalpha, LOWER UPPER }, { "isalnum", test_isalnum, LOWER UPPER DIGIT }, { "is_glob_special", test_is_glob_special, "*?[\\" }, + { "is_regex_special", test_is_regex_special, "$()*+.?[\\^{|" }, { NULL } };