зеркало из https://github.com/github/ruby.git
* lib/rexml/encoding.rb (REXML::Encoding#encoding=): store @encoding
a String which means the name of the encoding. this partially revert r29646. * lib/rexml/document.rb: follow above. * lib/rexml/output.rb: ditto. * lib/rexml/parsers/baseparser.rb: ditto. * lib/rexml/source.rb: ditto. * lib/rexml/xmldecl.rb: ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@31008 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
cddcffb8f9
Коммит
f25ff846f6
16
ChangeLog
16
ChangeLog
|
@ -1,3 +1,19 @@
|
|||
Thu Mar 3 00:36:29 2011 NARUSE, Yui <naruse@ruby-lang.org>
|
||||
|
||||
* lib/rexml/encoding.rb (REXML::Encoding#encoding=): store @encoding
|
||||
a String which means the name of the encoding.
|
||||
this partially revert r29646.
|
||||
|
||||
* lib/rexml/document.rb: follow above.
|
||||
|
||||
* lib/rexml/output.rb: ditto.
|
||||
|
||||
* lib/rexml/parsers/baseparser.rb: ditto.
|
||||
|
||||
* lib/rexml/source.rb: ditto.
|
||||
|
||||
* lib/rexml/xmldecl.rb: ditto.
|
||||
|
||||
Wed Mar 2 23:19:56 2011 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||
|
||||
* string.c (str_byte_substr): return nil for negative length.
|
||||
|
|
|
@ -184,7 +184,7 @@ module REXML
|
|||
# that IE's limited abilities can handle. This hack inserts a space
|
||||
# before the /> on empty tags. Defaults to false
|
||||
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
|
||||
if xml_decl.encoding != ::Encoding::UTF_8 && !output.kind_of?(Output)
|
||||
if xml_decl.encoding != 'UTF-8' && !output.kind_of?(Output)
|
||||
output = Output.new( output, xml_decl.encoding )
|
||||
end
|
||||
formatter = if indent > -1
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
module REXML
|
||||
module Encoding
|
||||
# ID ---> Encoding object
|
||||
# ID ---> Encoding name
|
||||
attr_reader :encoding
|
||||
def encoding=(encoding)
|
||||
encoding = encoding.name if encoding.is_a?(Encoding)
|
||||
if encoding.is_a?(String)
|
||||
original_encoding = encoding
|
||||
encoding = find_encoding(encoding)
|
||||
|
@ -11,35 +12,25 @@ module REXML
|
|||
end
|
||||
end
|
||||
return false if defined?(@encoding) and encoding == @encoding
|
||||
if encoding and encoding != ::Encoding::UTF_8
|
||||
@encoding = encoding
|
||||
if encoding
|
||||
@encoding = encoding.upcase
|
||||
else
|
||||
@encoding = ::Encoding::UTF_8
|
||||
@encoding = 'UTF-8'
|
||||
end
|
||||
true
|
||||
end
|
||||
|
||||
def check_encoding(xml)
|
||||
# We have to recognize UTF-16, LSB UTF-16, and UTF-8
|
||||
# We have to recognize UTF-16BE, UTF-16LE, and UTF-8
|
||||
if xml[0, 2] == "\xfe\xff"
|
||||
xml[0, 2] = ""
|
||||
::Encoding::UTF_16BE
|
||||
return 'UTF-16BE'
|
||||
elsif xml[0, 2] == "\xff\xfe"
|
||||
xml[0, 2] = ""
|
||||
::Encoding::UTF_16LE
|
||||
else
|
||||
if /\A\s*<\?xml\s+version\s*=\s*(['"]).*?\1
|
||||
\s+encoding\s*=\s*(["'])(.*?)\2/mx =~ xml
|
||||
encoding_name = $3
|
||||
if /\Autf-16\z/i =~ encoding_name
|
||||
::Encoding::UTF_16BE
|
||||
else
|
||||
find_encoding(encoding_name)
|
||||
end
|
||||
else
|
||||
::Encoding::UTF_8
|
||||
end
|
||||
return 'UTF-16LE'
|
||||
end
|
||||
xml =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/m
|
||||
return $3 ? $3.upcase : 'UTF-8'
|
||||
end
|
||||
|
||||
def encode(string)
|
||||
|
@ -53,14 +44,19 @@ module REXML
|
|||
private
|
||||
def find_encoding(name)
|
||||
case name
|
||||
when "UTF-16"
|
||||
name = "UTF-16BE"
|
||||
when /\Ashift-jis\z/i
|
||||
name = "Shift_JIS"
|
||||
return "SHIFT_JIS"
|
||||
when /\ACP-(\d+)\z/
|
||||
name = "CP#{$1}"
|
||||
when /\AUTF-8\z/i
|
||||
return name
|
||||
end
|
||||
::Encoding.find(name)
|
||||
begin
|
||||
::Encoding::Converter.search_convpath(name, 'UTF-8')
|
||||
rescue ::Encoding::ConverterNotFoundError
|
||||
return nil
|
||||
end
|
||||
name
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -22,7 +22,7 @@ module REXML
|
|||
case node
|
||||
|
||||
when Document
|
||||
if node.xml_decl.encoding != ::Encoding::UTF_8 && !output.kind_of?(Output)
|
||||
if node.xml_decl.encoding != 'UTF-8' && !output.kind_of?(Output)
|
||||
output = Output.new( output, node.xml_decl.encoding )
|
||||
end
|
||||
write_document( node, output )
|
||||
|
|
|
@ -10,7 +10,7 @@ module REXML
|
|||
@output = real_IO
|
||||
self.encoding = encd
|
||||
|
||||
@to_utf = (@encoding != ::Encoding::UTF_8)
|
||||
@to_utf = encd != 'UTF-8'
|
||||
end
|
||||
|
||||
def <<( content )
|
||||
|
|
|
@ -248,7 +248,7 @@ module REXML
|
|||
@document_status = :after_doctype
|
||||
@source.read if @source.buffer.size<2
|
||||
md = @source.match(/\s*/um, true)
|
||||
if @source.encoding == ::Encoding::UTF_8
|
||||
if @source.encoding == "UTF-8"
|
||||
@source.buffer.force_encoding(::Encoding::UTF_8)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -54,16 +54,14 @@ module REXML
|
|||
def encoding=(enc)
|
||||
return unless super
|
||||
@line_break = encode( '>' )
|
||||
if @encoding != ::Encoding::UTF_8
|
||||
if @encoding != 'UTF-8'
|
||||
@buffer = decode(@buffer)
|
||||
@to_utf = true
|
||||
else
|
||||
@to_utf = false
|
||||
if @buffer.respond_to? :force_encoding
|
||||
@buffer.force_encoding ::Encoding::UTF_8
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Scans the source for a given pattern. Note, that this is not your
|
||||
# usual scan() method. For one thing, the pattern argument has some
|
||||
|
|
|
@ -109,20 +109,9 @@ module REXML
|
|||
end
|
||||
|
||||
private
|
||||
def normalized_encoding_name(_encoding)
|
||||
if _encoding == ::Encoding::UTF_16BE
|
||||
"UTF-16"
|
||||
else
|
||||
return _encoding.name
|
||||
end
|
||||
end
|
||||
|
||||
def content(enc)
|
||||
rv = "version='#@version'"
|
||||
if @writeencoding || enc.to_s !~ /\Autf-8\z/i
|
||||
encoding_name = normalized_encoding_name(enc)
|
||||
rv << " encoding='#{encoding_name}'"
|
||||
end
|
||||
rv << " encoding='#{enc}'" if @writeencoding || enc !~ /utf-8/i
|
||||
rv << " standalone='#@standalone'" if @standalone
|
||||
rv
|
||||
end
|
||||
|
|
|
@ -241,7 +241,7 @@ DELIMITER
|
|||
end
|
||||
|
||||
doc = REXML::Document.new(source_iso)
|
||||
assert_equal('ISO-8859-1', doc.xml_decl.encoding.to_s)
|
||||
assert_equal('ISO-8859-1', doc.xml_decl.encoding)
|
||||
assert_equal(koln_utf, doc.root.text)
|
||||
doc.write(out="")
|
||||
assert_equal(source_iso, out )
|
||||
|
@ -255,23 +255,21 @@ DELIMITER
|
|||
<position><aktuell datum="01-10-11">Technik</aktuell></position>
|
||||
<hauptspalte>
|
||||
<headline>Technik</headline>
|
||||
Die Technik ist das Rückgrat der meisten Geschäftsprozesse bei Home of the Brave. Deshalb sollen hier alle relevanten technischen Abläufe, Daten und Einrichtungen beschrieben werden, damit jeder im Bedarfsfall die nötigen Informationen, Anweisungen und Verhaltensempfehlungen nachlesen und/oder abrufen kann.
|
||||
Die Technik ist das R\xFCckgrat der meisten Gesch\xFCftsprozesse bei Home of the Brave. Deshalb sollen hier alle relevanten technischen Abl\xFCufe, Daten und Einrichtungen beschrieben werden, damit jeder im Bedarfsfall die n\xFCtigen Informationen, Anweisungen und Verhaltensempfehlungen nachlesen und/oder abrufen kann.
|
||||
</hauptspalte>
|
||||
<nebenspalte>
|
||||
<link ziel="Flash/">Flash</link><umbruch/>
|
||||
Nützliches von Flashern für Flasher.<umbruch/>
|
||||
N\xFCtzliches von Flashern f\xFCr Flasher.<umbruch/>
|
||||
<link neu="ja" ziel="Cvs/">CVS-FAQ</link><umbruch/>
|
||||
FAQ zur Benutzung von CVS bei HOB
|
||||
</nebenspalte>
|
||||
</intranet>
|
||||
EOF
|
||||
tn = XPath.first(doc, "//nebenspalte/text()[2]")
|
||||
expected_iso = "Nützliches von Flashern für Flasher."
|
||||
expected_iso = "N\xFCtzliches von Flashern f\xFCr Flasher."
|
||||
expected_utf = expected_iso.unpack('C*').pack('U*')
|
||||
if expected_utf.respond_to? :encode
|
||||
expected_iso.force_encoding("iso-8859-1")
|
||||
expected_iso.force_encoding(::Encoding::ISO_8859_1)
|
||||
expected_utf.force_encoding(::Encoding::UTF_8)
|
||||
end
|
||||
assert_equal(expected_utf, tn.to_s.strip)
|
||||
f = REXML::Formatters::Default.new
|
||||
f.write( tn, Output.new(o = "", "ISO-8859-1") )
|
||||
|
|
|
@ -230,34 +230,12 @@ class Tester < Test::Unit::TestCase
|
|||
doc = Document.new(docin)
|
||||
doc.write(test="")
|
||||
assert_equal(31, doc.doctype.size)
|
||||
|
||||
# Here's a little ditty from Tobias...
|
||||
src = <<-EOL
|
||||
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
|
||||
"http://www.w3.org/TR/SVG/DTD/svg10.dtd"
|
||||
[
|
||||
<!-- <!ENTITY % fast-slow "0 0 .5 1">-->
|
||||
<!--<!ENTITY % slow-fast ".5 0 1 1">-->
|
||||
<!ENTITY hover_ani
|
||||
'<animateTransform attributeName="transform"
|
||||
type="scale" restart="whenNotActive" values="1;0.96"
|
||||
dur="0.5s" calcMode="spline" keySplines="0 0 .5 1"
|
||||
fill="freeze" begin="mouseover"/>
|
||||
<animateTransform attributeName="transform"
|
||||
type="scale" restart="whenNotActive" values="0.96;1"
|
||||
dur="0.5s" calcMode="spline" keySplines=".5 0 1 1"
|
||||
fill="freeze" begin="mouseover+0.5s"/>'
|
||||
>
|
||||
]
|
||||
> <a/>
|
||||
EOL
|
||||
end
|
||||
|
||||
def test_document
|
||||
# Testing cloning
|
||||
source = "<element/>"
|
||||
doc = Document.new source
|
||||
doc2 = Document.new doc
|
||||
|
||||
# Testing Root
|
||||
assert_equal doc.root.name.to_s, "element"
|
||||
|
@ -642,11 +620,10 @@ class Tester < Test::Unit::TestCase
|
|||
end
|
||||
|
||||
def test_line
|
||||
doc = Document.new File.new(fixture_path("bad.xml"))
|
||||
Document.new File.new(fixture_path("bad.xml"))
|
||||
assert_fail "There should have been an error"
|
||||
rescue Exception
|
||||
# We should get here
|
||||
er = $!
|
||||
assert($!.line == 5, "Should have been an error on line 5, "+
|
||||
"but was reported as being on line #{$!.line}" )
|
||||
end
|
||||
|
@ -664,13 +641,11 @@ class Tester < Test::Unit::TestCase
|
|||
def test_exception
|
||||
source = SourceFactory.create_from "<a/>"
|
||||
p = ParseException.new( "dummy message", source )
|
||||
s = p.to_s
|
||||
begin
|
||||
raise "dummy"
|
||||
rescue Exception
|
||||
p.continued_exception = $!
|
||||
end
|
||||
s = p.to_s
|
||||
end
|
||||
|
||||
def test_bad_content
|
||||
|
@ -682,7 +657,7 @@ class Tester < Test::Unit::TestCase
|
|||
assert_equal "content>content", tree_gt.elements[1].text
|
||||
# This isn't
|
||||
begin
|
||||
tree_lt = Document.new in_lt
|
||||
Document.new in_lt
|
||||
assert_fail "Should have gotten a parse error"
|
||||
rescue ParseException
|
||||
end
|
||||
|
@ -856,8 +831,6 @@ EOL
|
|||
def test_attlist_write
|
||||
file=File.new(fixture_path("foo.xml"))
|
||||
doc=Document.new file
|
||||
root = doc.root
|
||||
|
||||
out = ''
|
||||
doc.write(out)
|
||||
end
|
||||
|
@ -865,7 +838,7 @@ EOL
|
|||
def test_more_namespaces
|
||||
assert_raise( REXML::UndefinedNamespaceException,
|
||||
%Q{Should have gotten an Undefined Namespace error} ) {
|
||||
doc1 = Document.new("<r><p><n:c/></p></r>")
|
||||
Document.new("<r><p><n:c/></p></r>")
|
||||
}
|
||||
doc2 = Document.new("<r xmlns:n='1'><p><n:c/></p></r>")
|
||||
es = XPath.match(doc2, '//c')
|
||||
|
@ -916,7 +889,7 @@ EOL
|
|||
end
|
||||
|
||||
def test_oses_with_bad_EOLs
|
||||
d = Document.new("\n\n\n<?xml version='1.0'?>\n\n\n<a/>\n\n")
|
||||
Document.new("\n\n\n<?xml version='1.0'?>\n\n\n<a/>\n\n")
|
||||
end
|
||||
|
||||
# Contributed (with patch to fix bug) by Kouhei
|
||||
|
@ -1024,7 +997,6 @@ EOL
|
|||
document.write(s)
|
||||
end
|
||||
|
||||
|
||||
def test_write_cdata
|
||||
src = "<a>A</a>"
|
||||
doc = REXML::Document.new( src )
|
||||
|
@ -1045,7 +1017,7 @@ EOL
|
|||
<x:b x:n="foo"/>
|
||||
</a>
|
||||
EOL
|
||||
d = REXML::Document.new( source )
|
||||
d = Document.new( source )
|
||||
assert_equal( 'foo', REXML::XPath.first(d.root, "//x:b/@x:n").value )
|
||||
assert_equal( nil, REXML::XPath.first(d.root, "//x:b/@x:n", {}))
|
||||
end
|
||||
|
@ -1233,17 +1205,17 @@ EOL
|
|||
def test_ticket_21
|
||||
src = "<foo bar=value/>"
|
||||
assert_raise( ParseException, "invalid XML should be caught" ) {
|
||||
d = REXML::Document.new(src)
|
||||
Document.new(src)
|
||||
}
|
||||
begin
|
||||
d = REXML::Document.new(src)
|
||||
Document.new(src)
|
||||
rescue
|
||||
assert_match( /missing attribute quote/, $!.message )
|
||||
end
|
||||
end
|
||||
|
||||
def test_ticket_63
|
||||
d = REXML::Document.new(File.new(fixture_path("t63-1.xml")))
|
||||
Document.new(File.new(fixture_path("t63-1.xml")))
|
||||
end
|
||||
|
||||
def test_ticket_75
|
||||
|
@ -1275,9 +1247,9 @@ EOL
|
|||
|
||||
def test_ticket_88
|
||||
doc = REXML::Document.new("<?xml version=\"1.0\" encoding=\"shift_jis\"?>")
|
||||
assert_equal("<?xml version='1.0' encoding='Shift_JIS'?>", doc.to_s)
|
||||
assert_equal("<?xml version='1.0' encoding='SHIFT_JIS'?>", doc.to_s)
|
||||
doc = REXML::Document.new("<?xml version = \"1.0\" encoding = \"shift_jis\"?>")
|
||||
assert_equal("<?xml version='1.0' encoding='Shift_JIS'?>", doc.to_s)
|
||||
assert_equal("<?xml version='1.0' encoding='SHIFT_JIS'?>", doc.to_s)
|
||||
end
|
||||
|
||||
def test_ticket_85
|
||||
|
@ -1295,8 +1267,6 @@ ENDXML
|
|||
</bar>
|
||||
</foo>"
|
||||
|
||||
zml = "<foo><bar><bob name='jimmy'/></bar></foo>"
|
||||
|
||||
# The pretty printer ignores all whitespace, anyway so output1 == output2
|
||||
f = REXML::Formatters::Pretty.new( 2 )
|
||||
d = Document.new( xml, :ignore_whitespace_nodes=>:all )
|
||||
|
@ -1358,7 +1328,7 @@ ENDXML
|
|||
# Per .2.5 Node Tests of XPath spec
|
||||
assert_raise( REXML::UndefinedNamespaceException,
|
||||
%Q{Should have gotten an Undefined Namespace error} ) {
|
||||
d = Document.new("<a><n:b/></a>")
|
||||
Document.new("<a><n:b/></a>")
|
||||
}
|
||||
end
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ class EncodingTester < Test::Unit::TestCase
|
|||
def test_encoded_in_encoded_out
|
||||
doc = Document.new( @encoded )
|
||||
doc.write( out="" )
|
||||
out.force_encoding('binary') if out.respond_to? :force_encoding
|
||||
out.force_encoding(::Encoding::ASCII_8BIT)
|
||||
assert_equal( @encoded, out )
|
||||
end
|
||||
|
||||
|
@ -26,12 +26,12 @@ class EncodingTester < Test::Unit::TestCase
|
|||
def test_encoded_in_change_out
|
||||
doc = Document.new( @encoded )
|
||||
doc.xml_decl.encoding = "UTF-8"
|
||||
assert_equal( ::Encoding::UTF_8, doc.encoding )
|
||||
assert_equal("UTF-8", doc.encoding)
|
||||
REXML::Formatters::Default.new.write( doc.root, out="" )
|
||||
out.force_encoding('binary') if out.respond_to? :force_encoding
|
||||
out.force_encoding(::Encoding::ASCII_8BIT)
|
||||
assert_equal( @not_encoded, out )
|
||||
char = XPath.first( doc, "/a/b/text()" ).to_s
|
||||
char.force_encoding('binary') if char.respond_to? :force_encoding
|
||||
char.force_encoding(::Encoding::ASCII_8BIT)
|
||||
assert_equal( "ĉ", char )
|
||||
end
|
||||
|
||||
|
@ -39,7 +39,7 @@ class EncodingTester < Test::Unit::TestCase
|
|||
def test_encoded_in_different_out
|
||||
doc = Document.new( @encoded )
|
||||
REXML::Formatters::Default.new.write( doc.root, Output.new( out="", "UTF-8" ) )
|
||||
out.force_encoding('binary') if out.respond_to? :force_encoding
|
||||
out.force_encoding(::Encoding::ASCII_8BIT)
|
||||
assert_equal( @not_encoded, out )
|
||||
end
|
||||
|
||||
|
@ -47,9 +47,9 @@ class EncodingTester < Test::Unit::TestCase
|
|||
def test_in_change_out
|
||||
doc = Document.new( @not_encoded )
|
||||
doc.xml_decl.encoding = "ISO-8859-3"
|
||||
assert_equal( ::Encoding::ISO_8859_3, doc.encoding )
|
||||
assert_equal("ISO-8859-3", doc.encoding)
|
||||
doc.write( out="" )
|
||||
out.force_encoding('binary') if out.respond_to? :force_encoding
|
||||
out.force_encoding(::Encoding::ASCII_8BIT)
|
||||
assert_equal( @encoded, out )
|
||||
end
|
||||
|
||||
|
@ -57,7 +57,7 @@ class EncodingTester < Test::Unit::TestCase
|
|||
def test_in_different_out
|
||||
doc = Document.new( @not_encoded )
|
||||
doc.write( Output.new( out="", "ISO-8859-3" ) )
|
||||
out.force_encoding('binary') if out.respond_to? :force_encoding
|
||||
out.force_encoding(::Encoding::ASCII_8BIT)
|
||||
assert_equal( @encoded, out )
|
||||
end
|
||||
|
||||
|
@ -66,10 +66,10 @@ class EncodingTester < Test::Unit::TestCase
|
|||
def test_in_different_access
|
||||
doc = Document.new <<-EOL
|
||||
<?xml version='1.0' encoding='ISO-8859-1'?>
|
||||
<a a="ÿ">ÿ</a>
|
||||
<a a="\xFF">\xFF</a>
|
||||
EOL
|
||||
expect = "\303\277"
|
||||
expect.force_encoding('UTF-8') if expect.respond_to? :force_encoding
|
||||
expect.force_encoding(::Encoding::UTF_8)
|
||||
assert_equal( expect, doc.elements['a'].attributes['a'] )
|
||||
assert_equal( expect, doc.elements['a'].text )
|
||||
end
|
||||
|
@ -86,7 +86,7 @@ class EncodingTester < Test::Unit::TestCase
|
|||
|
||||
def test_ticket_110
|
||||
utf16 = REXML::Document.new(File.new(fixture_path("ticket_110_utf16.xml")))
|
||||
assert_equal( ::Encoding::UTF_16BE, utf16.encoding )
|
||||
assert_equal(utf16.encoding, "UTF-16")
|
||||
assert( utf16[0].kind_of?(REXML::XMLDecl))
|
||||
end
|
||||
end
|
||||
|
|
Загрузка…
Ссылка в новой задаче