* file.c (rb_str_normalize_ospath):

HFS Plus (Mac OS Extended) uses a variant of Normal Form D in which
  U+2000 through U+2FFF, U+F900 through U+FAFF, and U+2F800 through
  U+2FAFF are not decomposed (this avoids problems with round trip
  conversions from old Mac text encodings).
  http://developer.apple.com/library/mac/qa/qa1173/_index.html
  Therefore fix r42457 to exclude the range.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@42498 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2013-08-10 20:44:10 +00:00
Родитель 06ed1c781a
Коммит 9962aad7b0
8 изменённых файлов: 183 добавлений и 4 удалений

Просмотреть файл

@ -1,3 +1,13 @@
Sun Aug 11 04:48:14 2013 NARUSE, Yui <naruse@ruby-lang.org>
* file.c (rb_str_normalize_ospath):
HFS Plus (Mac OS Extended) uses a variant of Normal Form D in which
U+2000 through U+2FFF, U+F900 through U+FAFF, and U+2F800 through
U+2FAFF are not decomposed (this avoids problems with round trip
conversions from old Mac text encodings).
http://developer.apple.com/library/mac/qa/qa1173/_index.html
Therefore fix r42457 to exclude the range.
Sun Aug 11 03:26:07 2013 Tanaka Akira <akr@fsij.org>
* bignum.c (bitsize): Fix a conditional expression.

4
dir.c
Просмотреть файл

@ -84,8 +84,6 @@ char *strchr(char*,char);
#include <sys/param.h>
#include <sys/mount.h>
VALUE rb_str_normalize_ospath(const char *ptr, long len);
static inline int
is_hfs(DIR *dirp)
{
@ -1420,7 +1418,7 @@ glob_helper(
name = dp->d_name;
namlen = NAMLEN(dp);
# if HAVE_HFS
if (hfs_p && has_nonascii(name, namlen)) {
if (0&&hfs_p && has_nonascii(name, namlen)) {
if (!NIL_P(utf8str = rb_str_normalize_ospath(name, namlen))) {
RSTRING_GETMEM(utf8str, name, namlen);
}

Просмотреть файл

@ -2,3 +2,4 @@ $(OBJS): $(HDRS) $(ruby_headers) \
$(hdrdir)/ruby/encoding.h \
$(hdrdir)/ruby/oniguruma.h
qsort.o: $(hdrdir)/ruby/util.h
normalize.o: $(top_srcdir)/internal.h

Просмотреть файл

@ -1,3 +1,4 @@
$INCFLAGS << " -I$(topdir) -I$(top_srcdir)"
$srcs = Dir[File.join($srcdir, "*.{#{SRC_EXT.join(%q{,})}}")]
inits = $srcs.map {|s| File.basename(s, ".*")}
inits.delete("init")

Просмотреть файл

@ -0,0 +1,18 @@
#include "ruby.h"
#include "internal.h"
#ifdef __APPLE__
static VALUE
normalize_ospath(VALUE str)
{
return rb_str_normalize_ospath(RSTRING_PTR(str), RSTRING_LEN(str));
}
#else
#define normalize_ospath rb_f_notimplement
#endif
void
Init_normalize(VALUE klass)
{
rb_define_method(klass, "normalize_ospath", normalize_ospath, 0);
}

43
file.c
Просмотреть файл

@ -245,7 +245,7 @@ rb_str_encode_ospath(VALUE path)
#ifdef __APPLE__
VALUE
rb_str_normalize_ospath(const char *ptr, long len)
rb_str_normalize_ospath0(const char *ptr, long len)
{
VALUE str;
CFIndex buflen = 0;
@ -267,6 +267,47 @@ rb_str_normalize_ospath(const char *ptr, long len)
CFRelease(s);
return str;
}
VALUE
rb_str_normalize_ospath(const char *ptr, long len)
{
const char *p = ptr;
const char *e = ptr + len;
const char *p1 = p;
VALUE str = rb_str_buf_new(len);
rb_encoding *enc = rb_utf8_encoding();
rb_enc_associate(str, enc);
while (p < e) {
int l;
int r = rb_enc_precise_mbclen(p, e, enc);
if (!MBCLEN_CHARFOUND_P(r)) {
/* invalid byte shall not happen but */
rb_str_append(str, rb_str_normalize_ospath0(p1, p-p1));
rb_str_cat2(str, "\xEF\xBF\xBD");
p += 1;
}
l = MBCLEN_CHARFOUND_LEN(r);
int c = rb_enc_mbc_to_codepoint(p, e, enc);
if ((0x2000 <= c && c <= 0x2FFF) || (0xF900 <= c && c <= 0xFAFF) ||
(0x2F800 <= c && c <= 0x2FAFF)) {
if (p - p1 > 0) {
rb_str_append(str, rb_str_normalize_ospath0(p1, p-p1));
}
rb_str_cat(str, p, l);
p += l;
p1 = p;
}
else {
p += l;
}
}
if (p - p1 > 0) {
rb_str_append(str, rb_str_normalize_ospath0(p1, p-p1));
}
return str;
}
#endif
static long

Просмотреть файл

@ -513,6 +513,11 @@ VALUE rb_big_mul_karatsuba(VALUE x, VALUE y);
VALUE rb_big_mul_toom3(VALUE x, VALUE y);
VALUE rb_big_sq_fast(VALUE x);
/* file.c */
#ifdef __APPLE__
VALUE rb_str_normalize_ospath(const char *ptr, long len);
#endif
/* io.c */
void rb_maygvl_fd_fix_cloexec(int fd);

Просмотреть файл

@ -0,0 +1,105 @@
require 'test/unit'
require "-test-/string/string"
require "tempfile"
class Test_StringNormalize < Test::Unit::TestCase
=begin
def test_normalize_all
exclude = [
#0x340, 0x341, 0x343, 0x344
]
(0x0080..0xFFFD).each do |n|
next if 0xD800 <= n && n <= 0xDFFF
next if exclude.include? n
code = n.to_s(16)
Tempfile.create("#{code}-#{n.chr(Encoding::UTF_8)}-") do |tempfile|
ary = Dir.glob(File.expand_path("../#{code}-*", tempfile.path))
assert_equal 1, ary.size
result = ary[0]
rn = result[/\/\h+-(.+?)-/, 1]
#assert_equal tempfile.path, result, "#{rn.dump} is not U+#{n.to_s(16)}"
r2 = Bug::String.new(result ).normalize_ospath
rn2 = r2[/\/\h+-(.+?)-/, 1]
if tempfile.path == result
if tempfile.path == r2
else
puts "U+#{n.to_s(16)} shouldn't be r2#{rn2.dump}"
end
else
if tempfile.path == r2
# puts "U+#{n.to_s(16)} shouldn't be r#{rn.dump}"
elsif result == r2
puts "U+#{n.to_s(16)} shouldn't be #{rn.dump}"
else
puts "U+#{n.to_s(16)} shouldn't be r#{rn.dump} r2#{rn2.dump}"
end
end
end
end
end
=end
def test_normalize
%[
\u304C \u304B\u3099
\u3077 \u3075\u309A
\u308F\u3099 \u308F\u3099
\u30F4 \u30A6\u3099
\u30DD \u30DB\u309A
\u30AB\u303A \u30AB\u303A
\u00C1 A\u0301
B\u030A B\u030A
\u0386 \u0391\u0301
\u03D3 \u03D2\u0301
\u0401 \u0415\u0308
\u2260 =\u0338
].scan(/(\S+)\s+(\S+)/) do |expected, src|
result = Bug::String.new(src).normalize_ospath
assert_equal expected, result,
"#{expected.dump} is expected but #{src.dump}"
end
rescue NotImplementedError
end
def test_not_normalize_kc
%[
\u2460
\u2162
\u3349
\u33A1
\u337B
\u2116
\u33CD
\u2121
\u32A4
\u3231
].split.each do |src|
result = Bug::String.new(src).normalize_ospath
assert_equal src, result,
"#{src.dump} is expected not to be normalized, but #{result.dump}"
end
rescue NotImplementedError
end
def test_dont_normalize_hfsplus
%[
\u2190\u0338
\u219A
\u212B
\uF90A
\uF9F4
\uF961 \uF9DB
\uF96F \uF3AA
\uF915 \uF95C \uF9BF
\uFA0C
\uFA10
\uFA19
\uFA26
].split.each do |src|
result = Bug::String.new(src).normalize_ospath
assert_equal src, result,
"#{src.dump} is expected not to be normalized, but #{result.dump}"
end
rescue NotImplementedError
end
end