WSL2-Linux-Kernel/lib/glob.c

#include <linux/module.h>
#include <linux/glob.h>

/*
 * The only reason this code can be compiled as a module is because the
 * ATA code that depends on it can be as well.  In practice, they're
 * both usually compiled in and the module overhead goes away.
 */
MODULE_DESCRIPTION("glob(7) matching");
MODULE_LICENSE("Dual MIT/GPL");

/**
 * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)
 * @pat: Shell-style pattern to match, e.g. "*.[ch]".
 * @str: String to match.  The pattern must match the entire string.
 *
 * Perform shell-style glob matching, returning true (1) if the match
 * succeeds, or false (0) if it fails.  Equivalent to !fnmatch(@pat, @str, 0).
 *
 * Pattern metacharacters are ?, *, [ and \.
 * (And, inside character classes, !, - and ].)
 *
 * This is small and simple implementation intended for device blacklists
 * where a string is matched against a number of patterns.  Thus, it
 * does not preprocess the patterns.  It is non-recursive, and run-time
 * is at most quadratic: strlen(@str)*strlen(@pat).
 *
 * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");
 * it takes 6 passes over the pattern before matching the string.
 *
 * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT
 * treat / or leading . specially; it isn't actually used for pathnames.
 *
 * Note that according to glob(7) (and unlike bash), character classes
 * are complemented by a leading !; this does not support the regex-style
 * [^a-z] syntax.
 *
 * An opening bracket without a matching close is matched literally.
 */
bool __pure glob_match(char const *pat, char const *str)
{
	/*
	 * Backtrack to previous * on mismatch and retry starting one
	 * character later in the string.  Because * matches all characters
	 * (no exception for /), it can be easily proved that there's
	 * never a need to backtrack multiple levels.
	 */
	char const *back_pat = NULL, *back_str = back_str;

	/*
	 * Loop over each token (character or class) in pat, matching
	 * it against the remaining unmatched tail of str.  Return false
	 * on mismatch, or true after matching the trailing nul bytes.
	 */
	for (;;) {
		unsigned char c = *str++;
		unsigned char d = *pat++;

		switch (d) {
		case '?':	/* Wildcard: anything but nul */
			if (c == '\0')
				return false;
			break;
		case '*':	/* Any-length wildcard */
			if (*pat == '\0')	/* Optimize trailing * case */
				return true;
			back_pat = pat;
			back_str = --str;	/* Allow zero-length match */
			break;
		case '[': {	/* Character class */
			bool match = false, inverted = (*pat == '!');
			char const *class = pat + inverted;
			unsigned char a = *class++;

			/*
			 * Iterate over each span in the character class.
			 * A span is either a single character a, or a
			 * range a-b.  The first span may begin with ']'.
			 */
			do {
				unsigned char b = a;

				if (a == '\0')	/* Malformed */
					goto literal;

				if (class[0] == '-' && class[1] != ']') {
					b = class[1];

					if (b == '\0')
						goto literal;

					class += 2;
					/* Any special action if a > b? */
				}
				match |= (a <= c && c <= b);
			} while ((a = *class++) != ']');

			if (match == inverted)
				goto backtrack;
			pat = class;
			}
			break;
		case '\\':
			d = *pat++;
			/*FALLTHROUGH*/
		default:	/* Literal character */
literal:
			if (c == d) {
				if (d == '\0')
					return true;
				break;
			}
backtrack:
			if (c == '\0' || !back_pat)
				return false;	/* No point continuing */
			/* Try again from last *, one character later in str. */
			pat = back_pat;
			str = ++back_str;
			break;
		}
	}
}
EXPORT_SYMBOL(glob_match);


#ifdef CONFIG_GLOB_SELFTEST

#include <linux/printk.h>
#include <linux/moduleparam.h>

/* Boot with "glob.verbose=1" to show successful tests, too */
static bool verbose = false;
module_param(verbose, bool, 0);

struct glob_test {
	char const *pat, *str;
	bool expected;
};

static bool __pure __init test(char const *pat, char const *str, bool expected)
{
	bool match = glob_match(pat, str);
	bool success = match == expected;

	/* Can't get string literals into a particular section, so... */
	static char const msg_error[] __initconst =
		KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n";
	static char const msg_ok[] __initconst =
		KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n";
	static char const mismatch[] __initconst = "mismatch";
	char const *message;

	if (!success)
		message = msg_error;
	else if (verbose)
		message = msg_ok;
	else
		return success;

	printk(message, pat, str, mismatch + 3*match);
	return success;
}

/*
 * The tests are all jammed together in one array to make it simpler
 * to place that array in the .init.rodata section.  The obvious
 * "array of structures containing char *" has no way to force the
 * pointed-to strings to be in a particular section.
 *
 * Anyway, a test consists of:
 * 1. Expected glob_match result: '1' or '0'.
 * 2. Pattern to match: null-terminated string
 * 3. String to match against: null-terminated string
 *
 * The list of tests is terminated with a final '\0' instead of
 * a glob_match result character.
 */
static char const glob_tests[] __initconst =
	/* Some basic tests */
	"1" "a\0" "a\0"
	"0" "a\0" "b\0"
	"0" "a\0" "aa\0"
	"0" "a\0" "\0"
	"1" "\0" "\0"
	"0" "\0" "a\0"
	/* Simple character class tests */
	"1" "[a]\0" "a\0"
	"0" "[a]\0" "b\0"
	"0" "[!a]\0" "a\0"
	"1" "[!a]\0" "b\0"
	"1" "[ab]\0" "a\0"
	"1" "[ab]\0" "b\0"
	"0" "[ab]\0" "c\0"
	"1" "[!ab]\0" "c\0"
	"1" "[a-c]\0" "b\0"
	"0" "[a-c]\0" "d\0"
	/* Corner cases in character class parsing */
	"1" "[a-c-e-g]\0" "-\0"
	"0" "[a-c-e-g]\0" "d\0"
	"1" "[a-c-e-g]\0" "f\0"
	"1" "[]a-ceg-ik[]\0" "a\0"
	"1" "[]a-ceg-ik[]\0" "]\0"
	"1" "[]a-ceg-ik[]\0" "[\0"
	"1" "[]a-ceg-ik[]\0" "h\0"
	"0" "[]a-ceg-ik[]\0" "f\0"
	"0" "[!]a-ceg-ik[]\0" "h\0"
	"0" "[!]a-ceg-ik[]\0" "]\0"
	"1" "[!]a-ceg-ik[]\0" "f\0"
	/* Simple wild cards */
	"1" "?\0" "a\0"
	"0" "?\0" "aa\0"
	"0" "??\0" "a\0"
	"1" "?x?\0" "axb\0"
	"0" "?x?\0" "abx\0"
	"0" "?x?\0" "xab\0"
	/* Asterisk wild cards (backtracking) */
	"0" "*??\0" "a\0"
	"1" "*??\0" "ab\0"
	"1" "*??\0" "abc\0"
	"1" "*??\0" "abcd\0"
	"0" "??*\0" "a\0"
	"1" "??*\0" "ab\0"
	"1" "??*\0" "abc\0"
	"1" "??*\0" "abcd\0"
	"0" "?*?\0" "a\0"
	"1" "?*?\0" "ab\0"
	"1" "?*?\0" "abc\0"
	"1" "?*?\0" "abcd\0"
	"1" "*b\0" "b\0"
	"1" "*b\0" "ab\0"
	"0" "*b\0" "ba\0"
	"1" "*b\0" "bb\0"
	"1" "*b\0" "abb\0"
	"1" "*b\0" "bab\0"
	"1" "*bc\0" "abbc\0"
	"1" "*bc\0" "bc\0"
	"1" "*bc\0" "bbc\0"
	"1" "*bc\0" "bcbc\0"
	/* Multiple asterisks (complex backtracking) */
	"1" "*ac*\0" "abacadaeafag\0"
	"1" "*ac*ae*ag*\0" "abacadaeafag\0"
	"1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0"
	"0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0"
	"1" "*abcd*\0" "abcabcabcabcdefg\0"
	"1" "*ab*cd*\0" "abcabcabcabcdefg\0"
	"1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0"
	"0" "*abcd*\0" "abcabcabcabcefg\0"
	"0" "*ab*cd*\0" "abcabcabcabcefg\0";

static int __init glob_init(void)
{
	unsigned successes = 0;
	unsigned n = 0;
	char const *p = glob_tests;
	static char const message[] __initconst =
		KERN_INFO "glob: %u self-tests passed, %u failed\n";

	/*
	 * Tests are jammed together in a string.  The first byte is '1'
	 * or '0' to indicate the expected outcome, or '\0' to indicate the
	 * end of the tests.  Then come two null-terminated strings: the
	 * pattern and the string to match it against.
	 */
	while (*p) {
		bool expected = *p++ & 1;
		char const *pat = p;

		p += strlen(p) + 1;
		successes += test(pat, p, expected);
		p += strlen(p) + 1;
		n++;
	}

	n -= successes;
	printk(message, successes, n);

	/* What's the errno for "kernel bug detected"?  Guess... */
	return n ? -ECANCELED : 0;
}

/* We need a dummy exit function to allow unload */
static void __exit glob_fini(void) { }

module_init(glob_init);
module_exit(glob_fini);

#endif /* CONFIG_GLOB_SELFTEST */
lib: add lib/glob.c This is a helper function from drivers/ata/libata_core.c, where it is used to blacklist particular device models. It's being moved to lib/ so other drivers may use it for the same purpose. This implementation in non-recursive, so is safe for the kernel stack. [akpm@linux-foundation.org: fix sparse warning] Signed-off-by: George Spelvin <linux@horizon.com> Cc: Randy Dunlap <rdunlap@infradead.org> Cc: Tejun Heo <tj@kernel.org> Cc: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> 2014-08-07 03:09:23 +04:00			`#include <linux/module.h>`
			`#include <linux/glob.h>`

			`/*`
			`* The only reason this code can be compiled as a module is because the`
			`* ATA code that depends on it can be as well. In practice, they're`
			`* both usually compiled in and the module overhead goes away.`
			`*/`
			`MODULE_DESCRIPTION("glob(7) matching");`
			`MODULE_LICENSE("Dual MIT/GPL");`

			`/**`
			`* glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)`
			`* @pat: Shell-style pattern to match, e.g. "*.[ch]".`
			`* @str: String to match. The pattern must match the entire string.`
			`*`
			`* Perform shell-style glob matching, returning true (1) if the match`
			`* succeeds, or false (0) if it fails. Equivalent to !fnmatch(@pat, @str, 0).`
			`*`
			`* Pattern metacharacters are ?, *, [ and \.`
			`* (And, inside character classes, !, - and ].)`
			`*`
			`* This is small and simple implementation intended for device blacklists`
			`* where a string is matched against a number of patterns. Thus, it`
			`* does not preprocess the patterns. It is non-recursive, and run-time`
			`* is at most quadratic: strlen(@str)*strlen(@pat).`
			`*`
			`* An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");`
			`* it takes 6 passes over the pattern before matching the string.`
			`*`
			`* Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT`
			`* treat / or leading . specially; it isn't actually used for pathnames.`
			`*`
			`* Note that according to glob(7) (and unlike bash), character classes`
			`* are complemented by a leading !; this does not support the regex-style`
			`* [^a-z] syntax.`
			`*`
			`* An opening bracket without a matching close is matched literally.`
			`*/`
			`bool __pure glob_match(char const pat, char const str)`
			`{`
			`/*`
			`* Backtrack to previous * on mismatch and retry starting one`
			`* character later in the string. Because * matches all characters`
			`* (no exception for /), it can be easily proved that there's`
			`* never a need to backtrack multiple levels.`
			`*/`
			`char const back_pat = NULL, back_str = back_str;`

			`/*`
			`* Loop over each token (character or class) in pat, matching`
			`* it against the remaining unmatched tail of str. Return false`
			`* on mismatch, or true after matching the trailing nul bytes.`
			`*/`
			`for (;;) {`
			`unsigned char c = *str++;`
			`unsigned char d = *pat++;`

			`switch (d) {`
			`case '?': /* Wildcard: anything but nul */`
			`if (c == '\0')`
			`return false;`
			`break;`
			`case '': / Any-length wildcard */`
			`if (pat == '\0') / Optimize trailing * case */`
			`return true;`
			`back_pat = pat;`
			`back_str = --str; /* Allow zero-length match */`
			`break;`
			`case '[': { /* Character class */`
			`bool match = false, inverted = (*pat == '!');`
			`char const *class = pat + inverted;`
			`unsigned char a = *class++;`

			`/*`
			`* Iterate over each span in the character class.`
			`* A span is either a single character a, or a`
			`* range a-b. The first span may begin with ']'.`
			`*/`
			`do {`
			`unsigned char b = a;`

			`if (a == '\0') /* Malformed */`
			`goto literal;`

			`if (class[0] == '-' && class[1] != ']') {`
			`b = class[1];`

			`if (b == '\0')`
			`goto literal;`

			`class += 2;`
			`/* Any special action if a > b? */`
			`}`
			`match \|= (a <= c && c <= b);`
			`} while ((a = *class++) != ']');`

			`if (match == inverted)`
			`goto backtrack;`
			`pat = class;`
			`}`
			`break;`
			`case '\\':`
			`d = *pat++;`
			`/FALLTHROUGH/`
			`default: /* Literal character */`
			`literal:`
			`if (c == d) {`
			`if (d == '\0')`
			`return true;`
			`break;`
			`}`
			`backtrack:`
			`if (c == '\0' \|\| !back_pat)`
			`return false; /* No point continuing */`
			`/* Try again from last , one character later in str. /`
			`pat = back_pat;`
			`str = ++back_str;`
			`break;`
			`}`
			`}`
			`}`
			`EXPORT_SYMBOL(glob_match);`
lib/glob.c: add CONFIG_GLOB_SELFTEST This was useful during development, and is retained for future regression testing. GCC appears to have no way to place string literals in a particular section; adding __initconst to a char pointer leaves the string itself in the default string section, where it will not be thrown away after module load. Thus all string constants are kept in explicitly declared and named arrays. Sorry this makes printk a bit harder to read. At least the tests are more compact. Signed-off-by: George Spelvin <linux@horizon.com> Cc: Randy Dunlap <rdunlap@infradead.org> Cc: Tejun Heo <tj@kernel.org> Cc: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> 2014-08-07 03:09:25 +04:00

			`#ifdef CONFIG_GLOB_SELFTEST`

			`#include <linux/printk.h>`
			`#include <linux/moduleparam.h>`

			`/* Boot with "glob.verbose=1" to show successful tests, too */`
			`static bool verbose = false;`
			`module_param(verbose, bool, 0);`

			`struct glob_test {`
			`char const pat, str;`
			`bool expected;`
			`};`

			`static bool __pure __init test(char const pat, char const str, bool expected)`
			`{`
			`bool match = glob_match(pat, str);`
			`bool success = match == expected;`

			`/* Can't get string literals into a particular section, so... */`
			`static char const msg_error[] __initconst =`
			`KERN_ERR "glob: \"%s\" vs. \"%s\": %s * ERROR *\n";`
			`static char const msg_ok[] __initconst =`
			`KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n";`
			`static char const mismatch[] __initconst = "mismatch";`
			`char const *message;`

			`if (!success)`
			`message = msg_error;`
			`else if (verbose)`
			`message = msg_ok;`
			`else`
			`return success;`

			`printk(message, pat, str, mismatch + 3*match);`
			`return success;`
			`}`

			`/*`
			`* The tests are all jammed together in one array to make it simpler`
			`* to place that array in the .init.rodata section. The obvious`
			`* "array of structures containing char *" has no way to force the`
			`* pointed-to strings to be in a particular section.`
			`*`
			`* Anyway, a test consists of:`
			`* 1. Expected glob_match result: '1' or '0'.`
			`* 2. Pattern to match: null-terminated string`
			`* 3. String to match against: null-terminated string`
			`*`
			`* The list of tests is terminated with a final '\0' instead of`
			`* a glob_match result character.`
			`*/`
			`static char const glob_tests[] __initconst =`
			`/* Some basic tests */`
			`"1" "a\0" "a\0"`
			`"0" "a\0" "b\0"`
			`"0" "a\0" "aa\0"`
			`"0" "a\0" "\0"`
			`"1" "\0" "\0"`
			`"0" "\0" "a\0"`
			`/* Simple character class tests */`
			`"1" "[a]\0" "a\0"`
			`"0" "[a]\0" "b\0"`
			`"0" "[!a]\0" "a\0"`
			`"1" "[!a]\0" "b\0"`
			`"1" "[ab]\0" "a\0"`
			`"1" "[ab]\0" "b\0"`
			`"0" "[ab]\0" "c\0"`
			`"1" "[!ab]\0" "c\0"`
			`"1" "[a-c]\0" "b\0"`
			`"0" "[a-c]\0" "d\0"`
			`/* Corner cases in character class parsing */`
			`"1" "[a-c-e-g]\0" "-\0"`
			`"0" "[a-c-e-g]\0" "d\0"`
			`"1" "[a-c-e-g]\0" "f\0"`
			`"1" "[]a-ceg-ik[]\0" "a\0"`
			`"1" "[]a-ceg-ik[]\0" "]\0"`
			`"1" "[]a-ceg-ik[]\0" "[\0"`
			`"1" "[]a-ceg-ik[]\0" "h\0"`
			`"0" "[]a-ceg-ik[]\0" "f\0"`
			`"0" "[!]a-ceg-ik[]\0" "h\0"`
			`"0" "[!]a-ceg-ik[]\0" "]\0"`
			`"1" "[!]a-ceg-ik[]\0" "f\0"`
			`/* Simple wild cards */`
			`"1" "?\0" "a\0"`
			`"0" "?\0" "aa\0"`
			`"0" "??\0" "a\0"`
			`"1" "?x?\0" "axb\0"`
			`"0" "?x?\0" "abx\0"`
			`"0" "?x?\0" "xab\0"`
			`/* Asterisk wild cards (backtracking) */`
			`"0" "*??\0" "a\0"`
			`"1" "*??\0" "ab\0"`
			`"1" "*??\0" "abc\0"`
			`"1" "*??\0" "abcd\0"`
			`"0" "??*\0" "a\0"`
			`"1" "??*\0" "ab\0"`
			`"1" "??*\0" "abc\0"`
			`"1" "??*\0" "abcd\0"`
			`"0" "?*?\0" "a\0"`
			`"1" "?*?\0" "ab\0"`
			`"1" "?*?\0" "abc\0"`
			`"1" "?*?\0" "abcd\0"`
			`"1" "*b\0" "b\0"`
			`"1" "*b\0" "ab\0"`
			`"0" "*b\0" "ba\0"`
			`"1" "*b\0" "bb\0"`
			`"1" "*b\0" "abb\0"`
			`"1" "*b\0" "bab\0"`
			`"1" "*bc\0" "abbc\0"`
			`"1" "*bc\0" "bc\0"`
			`"1" "*bc\0" "bbc\0"`
			`"1" "*bc\0" "bcbc\0"`
			`/* Multiple asterisks (complex backtracking) */`
			`"1" "ac\0" "abacadaeafag\0"`
			`"1" "acaeag\0" "abacadaeafag\0"`
			`"1" "ab[bc][ef]g\0" "abacadaeafag\0"`
			`"0" "ab[ef][cd]g\0" "abacadaeafag\0"`
			`"1" "abcd\0" "abcabcabcabcdefg\0"`
			`"1" "abcd*\0" "abcabcabcabcdefg\0"`
			`"1" "abcdabcdef*\0" "abcabcdabcdeabcdefg\0"`
			`"0" "abcd\0" "abcabcabcabcefg\0"`
			`"0" "abcd*\0" "abcabcabcabcefg\0";`

			`static int __init glob_init(void)`
			`{`
			`unsigned successes = 0;`
			`unsigned n = 0;`
			`char const *p = glob_tests;`
			`static char const message[] __initconst =`
			`KERN_INFO "glob: %u self-tests passed, %u failed\n";`

			`/*`
			`* Tests are jammed together in a string. The first byte is '1'`
			`* or '0' to indicate the expected outcome, or '\0' to indicate the`
			`* end of the tests. Then come two null-terminated strings: the`
			`* pattern and the string to match it against.`
			`*/`
			`while (*p) {`
			`bool expected = *p++ & 1;`
			`char const *pat = p;`

			`p += strlen(p) + 1;`
			`successes += test(pat, p, expected);`
			`p += strlen(p) + 1;`
			`n++;`
			`}`

			`n -= successes;`
			`printk(message, successes, n);`

			`/* What's the errno for "kernel bug detected"? Guess... */`
			`return n ? -ECANCELED : 0;`
			`}`

			`/* We need a dummy exit function to allow unload */`
			`static void __exit glob_fini(void) { }`

			`module_init(glob_init);`
			`module_exit(glob_fini);`

			`#endif /* CONFIG_GLOB_SELFTEST */`