dir.c: performance fix with braces

Braces were expended before ruby_glob0(). This caused to call
replace_real_basename() for same plain patterns repeatedly.
Move blace expansion into glob_helper() in ruby_glob0() to reduce
replace_real_basename() call.
This fix changes the order of glob results.
[Feature #13167] [Fix GH-1864]

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@64810 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
shirosaki 2018-09-22 01:11:40 +00:00
Родитель c61adf51a3
Коммит 2a11904214
1 изменённых файлов: 138 добавлений и 30 удалений

168
dir.c
Просмотреть файл

@ -1534,7 +1534,7 @@ do_opendir(const int basefd, size_t baselen, const char *path, int flags, rb_enc
} }
/* Globing pattern */ /* Globing pattern */
enum glob_pattern_type { PLAIN, ALPHA, MAGICAL, RECURSIVE, MATCH_ALL, MATCH_DIR }; enum glob_pattern_type { PLAIN, ALPHA, BRACE, MAGICAL, RECURSIVE, MATCH_ALL, MATCH_DIR };
/* Return nonzero if S has any special globbing chars in it. */ /* Return nonzero if S has any special globbing chars in it. */
static enum glob_pattern_type static enum glob_pattern_type
@ -1542,15 +1542,20 @@ has_magic(const char *p, const char *pend, int flags, rb_encoding *enc)
{ {
const int escape = !(flags & FNM_NOESCAPE); const int escape = !(flags & FNM_NOESCAPE);
int hasalpha = 0; int hasalpha = 0;
int hasmagical = 0;
register char c; register char c;
while (p < pend && (c = *p++) != 0) { while (p < pend && (c = *p++) != 0) {
switch (c) { switch (c) {
case '{':
return BRACE;
case '*': case '*':
case '?': case '?':
case '[': case '[':
return MAGICAL; hasmagical = 1;
break;
case '\\': case '\\':
if (escape && p++ >= pend) if (escape && p++ >= pend)
@ -1575,7 +1580,7 @@ has_magic(const char *p, const char *pend, int flags, rb_encoding *enc)
p = Next(p-1, pend, enc); p = Next(p-1, pend, enc);
} }
return hasalpha ? ALPHA : PLAIN; return hasmagical ? MAGICAL : hasalpha ? ALPHA : PLAIN;
} }
/* Find separator in globbing pattern. */ /* Find separator in globbing pattern. */
@ -1596,6 +1601,13 @@ find_dirsep(const char *p, const char *pend, int flags, rb_encoding *enc)
open = 0; open = 0;
continue; continue;
case '{':
open = 1;
continue;
case '}':
open = 0;
continue;
case '/': case '/':
if (!open) if (!open)
return (char *)p-1; return (char *)p-1;
@ -1671,6 +1683,21 @@ glob_make_pattern(const char *p, const char *e, int flags, rb_encoding *enc)
const enum glob_pattern_type non_magic = (USE_NAME_ON_FS || FNM_SYSCASE) ? PLAIN : ALPHA; const enum glob_pattern_type non_magic = (USE_NAME_ON_FS || FNM_SYSCASE) ? PLAIN : ALPHA;
char *buf; char *buf;
if (magic == BRACE) {
/* brace pattern is parsed after expansion */
buf = GLOB_ALLOC_N(char, e-p+1);
if (!buf) {
GLOB_FREE(tmp);
goto error;
}
memcpy(buf, p, e-p);
buf[e-p] = '\0';
tmp->type = BRACE;
tmp->str = buf;
*tail = tmp;
tmp->next = 0;
return list;
}
if (!(FNM_SYSCASE || magic > non_magic) && !recursive && *m) { if (!(FNM_SYSCASE || magic > non_magic) && !recursive && *m) {
const char *m2; const char *m2;
while (has_magic(m+1, m2 = find_dirsep(m+1, e, flags, enc), flags, enc) <= non_magic && while (has_magic(m+1, m2 = find_dirsep(m+1, e, flags, enc), flags, enc) <= non_magic &&
@ -2001,6 +2028,58 @@ dirent_match(const char *pat, rb_encoding *enc, const char *name, const struct d
return 0; return 0;
} }
struct push_glob_args {
int fd;
const char *path;
size_t baselen;
size_t namelen;
int dirsep; /* '/' should be placed before appending child entry's name to 'path'. */
rb_pathtype_t pathtype; /* type of 'path' */
int flags;
const ruby_glob_funcs_t *funcs;
VALUE arg;
};
struct dirent_brace_args {
const char *name;
const struct dirent *dp;
int flags;
};
static int
dirent_match_brace(const char *pattern, VALUE val, void *enc)
{
struct dirent_brace_args *arg = (struct dirent_brace_args *)val;
return dirent_match(pattern, enc, arg->name, arg->dp, arg->flags);
}
/* join paths from pattern list of glob_make_pattern() */
static const char*
join_path_from_pattern(struct glob_pattern **beg)
{
struct glob_pattern *p;
const char *path = "";
for (p = *beg; p; p = p->next) {
const char *str;
switch (p->type) {
case RECURSIVE:
str = "**";
break;
default:
str = p->str;
}
path = join_path(path, strlen(path), (p != *beg), str, strlen(str));
}
return path;
}
static int push_caller(const char *path, VALUE val, void *enc);
static int ruby_brace_expand(const char *str, int flags, ruby_glob_func *func, VALUE arg,
rb_encoding *enc, VALUE var);
static int static int
glob_helper( glob_helper(
int fd, int fd,
@ -2019,7 +2098,7 @@ glob_helper(
struct stat st; struct stat st;
int status = 0; int status = 0;
struct glob_pattern **cur, **new_beg, **new_end; struct glob_pattern **cur, **new_beg, **new_end;
int plain = 0, magical = 0, recursive = 0, match_all = 0, match_dir = 0; int plain = 0, brace = 0, magical = 0, recursive = 0, match_all = 0, match_dir = 0;
int escape = !(flags & FNM_NOESCAPE); int escape = !(flags & FNM_NOESCAPE);
size_t pathlen = baselen + namelen; size_t pathlen = baselen + namelen;
@ -2040,6 +2119,9 @@ glob_helper(
magical = 1; magical = 1;
#endif #endif
break; break;
case BRACE:
brace = 1;
break;
case MAGICAL: case MAGICAL:
magical = 2; magical = 2;
break; break;
@ -2054,6 +2136,20 @@ glob_helper(
} }
} }
if (brace) {
struct push_glob_args args;
const char* brace_path = join_path_from_pattern(beg);
args.fd = fd;
args.path = path;
args.baselen = baselen;
args.namelen = namelen;
args.dirsep = dirsep;
args.flags = flags;
args.funcs = funcs;
args.arg = arg;
return ruby_brace_expand(brace_path, flags, push_caller, (VALUE)&args, enc, Qfalse);
}
if (*path) { if (*path) {
if (match_all && pathtype == path_unknown) { if (match_all && pathtype == path_unknown) {
if (do_lstat(fd, baselen, path, &st, flags, enc) == 0) { if (do_lstat(fd, baselen, path, &st, flags, enc) == 0) {
@ -2191,6 +2287,7 @@ glob_helper(
for (cur = beg; cur < end; ++cur) { for (cur = beg; cur < end; ++cur) {
struct glob_pattern *p = *cur; struct glob_pattern *p = *cur;
struct dirent_brace_args args;
if (p->type == RECURSIVE) { if (p->type == RECURSIVE) {
if (new_pathtype == path_directory || /* not symlink but real directory */ if (new_pathtype == path_directory || /* not symlink but real directory */
new_pathtype == path_exist) { new_pathtype == path_exist) {
@ -2200,6 +2297,14 @@ glob_helper(
p = p->next; /* 0 times recursion */ p = p->next; /* 0 times recursion */
} }
switch (p->type) { switch (p->type) {
case BRACE:
args.name = name;
args.dp = dp;
args.flags = flags;
if (ruby_brace_expand(p->str, flags, dirent_match_brace,
(VALUE)&args, enc, Qfalse) > 0)
*new_end++ = p->next;
break;
case ALPHA: case ALPHA:
# if USE_NAME_ON_FS == USE_NAME_ON_FS_BY_FNMATCH # if USE_NAME_ON_FS == USE_NAME_ON_FS_BY_FNMATCH
if (plainname) { if (plainname) {
@ -2297,6 +2402,24 @@ glob_helper(
return status; return status;
} }
static int
push_caller(const char *path, VALUE val, void *enc)
{
struct push_glob_args *arg = (struct push_glob_args *)val;
struct glob_pattern *list;
int status;
list = glob_make_pattern(path, path + strlen(path), arg->flags, enc);
if (!list) {
return -1;
}
status = glob_helper(arg->fd, arg->path, arg->baselen, arg->namelen, arg->dirsep,
arg->pathtype, &list, &list + 1, arg->flags, arg->funcs,
arg->arg, enc);
glob_free_pattern(list);
return status;
}
static int static int
ruby_glob0(const char *path, int fd, const char *base, int flags, ruby_glob0(const char *path, int fd, const char *base, int flags,
const ruby_glob_funcs_t *funcs, VALUE arg, const ruby_glob_funcs_t *funcs, VALUE arg,
@ -2489,25 +2612,11 @@ ruby_brace_glob(const char *str, int flags, ruby_glob_func *func, VALUE arg)
return ruby_brace_glob_with_enc(str, flags, func, arg, rb_ascii8bit_encoding()); return ruby_brace_glob_with_enc(str, flags, func, arg, rb_ascii8bit_encoding());
} }
struct push_glob_args {
struct glob_args glob;
int flags;
int fd;
};
static int
push_caller(const char *path, VALUE val, void *enc)
{
struct push_glob_args *arg = (struct push_glob_args *)val;
return ruby_glob0(path, arg->fd, arg->glob.base, arg->flags, &rb_glob_funcs,
(VALUE)&arg->glob, enc);
}
static int static int
push_glob(VALUE ary, VALUE str, VALUE base, int flags) push_glob(VALUE ary, VALUE str, VALUE base, int flags)
{ {
struct push_glob_args args; struct glob_args args;
int fd;
rb_encoding *enc = rb_enc_get(str); rb_encoding *enc = rb_enc_get(str);
#if defined _WIN32 || defined __APPLE__ #if defined _WIN32 || defined __APPLE__
@ -2518,30 +2627,29 @@ push_glob(VALUE ary, VALUE str, VALUE base, int flags)
if (rb_enc_to_index(enc) == ENCINDEX_US_ASCII) if (rb_enc_to_index(enc) == ENCINDEX_US_ASCII)
enc = rb_ascii8bit_encoding(); enc = rb_ascii8bit_encoding();
flags |= GLOB_VERBOSE; flags |= GLOB_VERBOSE;
args.glob.func = push_pattern; args.func = push_pattern;
args.glob.value = ary; args.value = ary;
args.glob.enc = enc; args.enc = enc;
args.glob.base = 0; args.base = 0;
args.flags = flags; fd = AT_FDCWD;
args.fd = AT_FDCWD;
if (!NIL_P(base)) { if (!NIL_P(base)) {
if (!RB_TYPE_P(base, T_STRING) || !rb_enc_check(str, base)) { if (!RB_TYPE_P(base, T_STRING) || !rb_enc_check(str, base)) {
struct dir_data *dirp = DATA_PTR(base); struct dir_data *dirp = DATA_PTR(base);
if (!dirp->dir) dir_closed(); if (!dirp->dir) dir_closed();
#ifdef HAVE_DIRFD #ifdef HAVE_DIRFD
if ((args.fd = dirfd(dirp->dir)) == -1) if ((fd = dirfd(dirp->dir)) == -1)
rb_sys_fail_path(dir_inspect(base)); rb_sys_fail_path(dir_inspect(base));
#endif #endif
base = dirp->path; base = dirp->path;
} }
args.glob.base = RSTRING_PTR(base); args.base = RSTRING_PTR(base);
} }
#if defined _WIN32 || defined __APPLE__ #if defined _WIN32 || defined __APPLE__
enc = rb_utf8_encoding(); enc = rb_utf8_encoding();
#endif #endif
return ruby_brace_expand(RSTRING_PTR(str), flags, return ruby_glob0(RSTRING_PTR(str), fd, args.base, flags, &rb_glob_funcs,
push_caller, (VALUE)&args, enc, str); (VALUE)&args, enc);
} }
static VALUE static VALUE