diff --git a/ChangeLog b/ChangeLog index 17284bdea7..ea03e547a0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Wed Oct 29 17:27:05 2003 Tanaka Akira + + * re.c (rb_reg_s_union, Init_Regexp): new method `Regexp.union'. + + * lib/pathname.rb (realpath): examine Dir.pwd because it may have + symlinks. + Wed Oct 29 17:16:31 2003 Nobuyoshi Nakada * eval.c (rb_longjmp): must not disturb original jump. diff --git a/lib/pathname.rb b/lib/pathname.rb index 4822a4a172..b4462bfeaf 100644 --- a/lib/pathname.rb +++ b/lib/pathname.rb @@ -105,27 +105,29 @@ class Pathname Pathname.new(path) end - # realpath returns real pathname of self in actual filesystem. + # realpath returns a real pathname of self in actual filesystem. + # The real pathname doesn't contain a symlink and useless dots. # # If false is given for the optional argument force_absolute, # it may return relative pathname. # Otherwise it returns absolute pathname. def realpath(force_absolute=true) - # Check file existence at first by File.stat. - # This test detects ELOOP. - # - # /tmp/a -> a - # /tmp/b -> b/b - # /tmp/c -> ./c - # /tmp/d -> ../tmp/d - - File.stat(@path) - - top = %r{\A/} =~ @path ? '/' : '' - unresolved = @path.scan(%r{[^/]+}) + if %r{\A/} =~ @path + top = '/' + unresolved = @path.scan(%r{[^/]+}) + elsif force_absolute + # Although POSIX getcwd returns a pathname which contains no symlink, + # 4.4BSD-Lite2 derived getcwd may return the environment variable $PWD + # which may contain a symlink. + # So the return value of Dir.pwd should be examined. + top = '/' + unresolved = Dir.pwd.scan(%r{[^/]+}) + @path.scan(%r{[^/]+}) + else + top = '' + unresolved = @path.scan(%r{[^/]+}) + end resolved = [] - checked_path = {} - + until unresolved.empty? case unresolved.last when '.' @@ -133,34 +135,36 @@ class Pathname when '..' resolved.unshift unresolved.pop else - path = top + unresolved.join('/') - raise Errno::ELOOP.new(path) if checked_path[path] - checked_path[path] = true - if File.lstat(path).symlink? - link = File.readlink(path) - if %r{\A/} =~ link + loop_check = {} + while (stat = File.lstat(path = top + unresolved.join('/'))).symlink? + symlink_id = "#{stat.dev}:#{stat.ino}" + raise Errno::ELOOP.new(path) if loop_check[symlink_id] + loop_check[symlink_id] = true + if %r{\A/} =~ (link = File.readlink(path)) top = '/' unresolved = link.scan(%r{[^/]+}) else - unresolved.pop - unresolved.concat link.scan(%r{[^/]+}) + unresolved[-1,1] = link.scan(%r{[^/]+}) end + end + next if (filename = unresolved.pop) == '.' + if filename != '..' && resolved.first == '..' + resolved.shift else - resolved.unshift unresolved.pop + resolved.unshift filename end end end - - if resolved.empty? - path = top.empty? ? '.' : top - else - path = top + resolved.join('/') + + if top == '/' + resolved.shift while resolved[0] == '..' end - # Note that Dir.pwd has no symlinks. - path = File.join(Dir.pwd, path) if %r{\A/} !~ path && force_absolute - - Pathname.new(path).cleanpath + if resolved.empty? + Pathname.new(top.empty? ? '.' : '/') + else + Pathname.new(top + resolved.join('/')) + end end # parent method returns parent directory, i.e. ".." is joined at last. @@ -297,7 +301,7 @@ end # IO class Pathname # Pathname#each_line iterates over lines of the file. - # It's yields String objects for each line. + # It's yields a String object for each line. # # This method is exist since 1.8.1. def each_line(*args, &block) IO.foreach(@path, *args, &block) end diff --git a/re.c b/re.c index 7460d6ae3d..9fb770015a 100644 --- a/re.c +++ b/re.c @@ -1484,6 +1484,82 @@ rb_reg_options(re) return options; } +static VALUE +rb_reg_s_union(argc, argv) + int argc; + VALUE *argv; +{ + if (argc == 0) { + VALUE args[1]; + args[0] = rb_str_new2("(?!)"); + return rb_class_new_instance(1, args, rb_cRegexp); + } + else if (argc == 1) { + VALUE v; + v = rb_check_convert_type(argv[0], T_REGEXP, "Regexp", "to_regexp"); + if (!NIL_P(v)) + return v; + else { + VALUE args[1]; + args[0] = rb_reg_s_quote(argc, argv); + return rb_class_new_instance(1, args, rb_cRegexp); + } + } + else { + int i, kcode = -1; + VALUE kcode_re = Qnil; + VALUE source = rb_str_buf_new(0); + VALUE args[3]; + for (i = 0; i < argc; i++) { + volatile VALUE v; + if (0 < i) + rb_str_buf_cat2(source, "|"); + v = rb_check_convert_type(argv[i], T_REGEXP, "Regexp", "to_regexp"); + if (!NIL_P(v)) { + if (FL_TEST(v, KCODE_FIXED)) { + if (kcode == -1) { + kcode_re = v; + kcode = RBASIC(v)->flags & KCODE_MASK; + } + else if ((RBASIC(v)->flags & KCODE_MASK) != kcode) { + volatile VALUE str1, str2; + str1 = rb_inspect(kcode_re); + str2 = rb_inspect(v); + rb_raise(rb_eArgError, "mixed kcode: %s and %s", + RSTRING(str1)->ptr, RSTRING(str2)->ptr); + } + } + v = rb_reg_to_s(v); + } + else { + args[0] = argv[i]; + v = rb_reg_s_quote(1, args); + } + rb_str_buf_append(source, v); + } + args[0] = source; + args[1] = Qnil; + switch (kcode) { + case -1: + args[2] = Qnil; + break; + case KCODE_NONE: + args[2] = rb_str_new2("n"); + break; + case KCODE_EUC: + args[2] = rb_str_new2("e"); + break; + case KCODE_SJIS: + args[2] = rb_str_new2("s"); + break; + case KCODE_UTF8: + args[2] = rb_str_new2("u"); + break; + } + return rb_class_new_instance(3, args, rb_cRegexp); + } +} + static VALUE rb_reg_init_copy(copy, re) VALUE copy, re; @@ -1732,6 +1808,7 @@ Init_Regexp() rb_define_singleton_method(rb_cRegexp, "compile", rb_class_new_instance, -1); rb_define_singleton_method(rb_cRegexp, "quote", rb_reg_s_quote, -1); rb_define_singleton_method(rb_cRegexp, "escape", rb_reg_s_quote, -1); + rb_define_singleton_method(rb_cRegexp, "union", rb_reg_s_union, -1); rb_define_singleton_method(rb_cRegexp, "last_match", rb_reg_s_last_match, -1); rb_define_method(rb_cRegexp, "initialize", rb_reg_initialize_m, -1);