* lib/rexml/encoding.rb (REXML::Encoding#encoding=): store @encoding

a String which means the name of the encoding.
  this partially revert r29646.

* lib/rexml/document.rb: follow above.

* lib/rexml/output.rb: ditto.

* lib/rexml/parsers/baseparser.rb: ditto.

* lib/rexml/source.rb: ditto.

* lib/rexml/xmldecl.rb: ditto.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@31008 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2011-03-02 15:36:48 +00:00
Родитель cddcffb8f9
Коммит f25ff846f6
11 изменённых файлов: 114 добавлений и 147 удалений

Просмотреть файл

@ -1,3 +1,19 @@
Thu Mar 3 00:36:29 2011 NARUSE, Yui <naruse@ruby-lang.org>
* lib/rexml/encoding.rb (REXML::Encoding#encoding=): store @encoding
a String which means the name of the encoding.
this partially revert r29646.
* lib/rexml/document.rb: follow above.
* lib/rexml/output.rb: ditto.
* lib/rexml/parsers/baseparser.rb: ditto.
* lib/rexml/source.rb: ditto.
* lib/rexml/xmldecl.rb: ditto.
Wed Mar 2 23:19:56 2011 Nobuyoshi Nakada <nobu@ruby-lang.org>
* string.c (str_byte_substr): return nil for negative length.

Просмотреть файл

@ -184,7 +184,7 @@ module REXML
# that IE's limited abilities can handle. This hack inserts a space
# before the /> on empty tags. Defaults to false
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
if xml_decl.encoding != ::Encoding::UTF_8 && !output.kind_of?(Output)
if xml_decl.encoding != 'UTF-8' && !output.kind_of?(Output)
output = Output.new( output, xml_decl.encoding )
end
formatter = if indent > -1

Просмотреть файл

@ -1,8 +1,9 @@
module REXML
module Encoding
# ID ---> Encoding object
# ID ---> Encoding name
attr_reader :encoding
def encoding=(encoding)
encoding = encoding.name if encoding.is_a?(Encoding)
if encoding.is_a?(String)
original_encoding = encoding
encoding = find_encoding(encoding)
@ -11,35 +12,25 @@ module REXML
end
end
return false if defined?(@encoding) and encoding == @encoding
if encoding and encoding != ::Encoding::UTF_8
@encoding = encoding
if encoding
@encoding = encoding.upcase
else
@encoding = ::Encoding::UTF_8
@encoding = 'UTF-8'
end
true
end
def check_encoding(xml)
# We have to recognize UTF-16, LSB UTF-16, and UTF-8
# We have to recognize UTF-16BE, UTF-16LE, and UTF-8
if xml[0, 2] == "\xfe\xff"
xml[0, 2] = ""
::Encoding::UTF_16BE
return 'UTF-16BE'
elsif xml[0, 2] == "\xff\xfe"
xml[0, 2] = ""
::Encoding::UTF_16LE
else
if /\A\s*<\?xml\s+version\s*=\s*(['"]).*?\1
\s+encoding\s*=\s*(["'])(.*?)\2/mx =~ xml
encoding_name = $3
if /\Autf-16\z/i =~ encoding_name
::Encoding::UTF_16BE
else
find_encoding(encoding_name)
end
else
::Encoding::UTF_8
end
return 'UTF-16LE'
end
xml =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/m
return $3 ? $3.upcase : 'UTF-8'
end
def encode(string)
@ -53,14 +44,19 @@ module REXML
private
def find_encoding(name)
case name
when "UTF-16"
name = "UTF-16BE"
when /\Ashift-jis\z/i
name = "Shift_JIS"
return "SHIFT_JIS"
when /\ACP-(\d+)\z/
name = "CP#{$1}"
when /\AUTF-8\z/i
return name
end
::Encoding.find(name)
begin
::Encoding::Converter.search_convpath(name, 'UTF-8')
rescue ::Encoding::ConverterNotFoundError
return nil
end
name
end
end
end

Просмотреть файл

@ -22,7 +22,7 @@ module REXML
case node
when Document
if node.xml_decl.encoding != ::Encoding::UTF_8 && !output.kind_of?(Output)
if node.xml_decl.encoding != 'UTF-8' && !output.kind_of?(Output)
output = Output.new( output, node.xml_decl.encoding )
end
write_document( node, output )

Просмотреть файл

@ -10,7 +10,7 @@ module REXML
@output = real_IO
self.encoding = encd
@to_utf = (@encoding != ::Encoding::UTF_8)
@to_utf = encd != 'UTF-8'
end
def <<( content )

Просмотреть файл

@ -248,7 +248,7 @@ module REXML
@document_status = :after_doctype
@source.read if @source.buffer.size<2
md = @source.match(/\s*/um, true)
if @source.encoding == ::Encoding::UTF_8
if @source.encoding == "UTF-8"
@source.buffer.force_encoding(::Encoding::UTF_8)
end
end

Просмотреть файл

@ -54,16 +54,14 @@ module REXML
def encoding=(enc)
return unless super
@line_break = encode( '>' )
if @encoding != ::Encoding::UTF_8
if @encoding != 'UTF-8'
@buffer = decode(@buffer)
@to_utf = true
else
@to_utf = false
if @buffer.respond_to? :force_encoding
@buffer.force_encoding ::Encoding::UTF_8
end
end
end
# Scans the source for a given pattern. Note, that this is not your
# usual scan() method. For one thing, the pattern argument has some

Просмотреть файл

@ -109,20 +109,9 @@ module REXML
end
private
def normalized_encoding_name(_encoding)
if _encoding == ::Encoding::UTF_16BE
"UTF-16"
else
return _encoding.name
end
end
def content(enc)
rv = "version='#@version'"
if @writeencoding || enc.to_s !~ /\Autf-8\z/i
encoding_name = normalized_encoding_name(enc)
rv << " encoding='#{encoding_name}'"
end
rv << " encoding='#{enc}'" if @writeencoding || enc !~ /utf-8/i
rv << " standalone='#@standalone'" if @standalone
rv
end

Просмотреть файл

@ -241,7 +241,7 @@ DELIMITER
end
doc = REXML::Document.new(source_iso)
assert_equal('ISO-8859-1', doc.xml_decl.encoding.to_s)
assert_equal('ISO-8859-1', doc.xml_decl.encoding)
assert_equal(koln_utf, doc.root.text)
doc.write(out="")
assert_equal(source_iso, out )
@ -255,23 +255,21 @@ DELIMITER
<position><aktuell datum="01-10-11">Technik</aktuell></position>
<hauptspalte>
<headline>Technik</headline>
Die Technik ist das Rückgrat der meisten Geschäftsprozesse bei Home of the Brave. Deshalb sollen hier alle relevanten technischen Abläufe, Daten und Einrichtungen beschrieben werden, damit jeder im Bedarfsfall die nötigen Informationen, Anweisungen und Verhaltensempfehlungen nachlesen und/oder abrufen kann.
Die Technik ist das R\xFCckgrat der meisten Gesch\xFCftsprozesse bei Home of the Brave. Deshalb sollen hier alle relevanten technischen Abl\xFCufe, Daten und Einrichtungen beschrieben werden, damit jeder im Bedarfsfall die n\xFCtigen Informationen, Anweisungen und Verhaltensempfehlungen nachlesen und/oder abrufen kann.
</hauptspalte>
<nebenspalte>
<link ziel="Flash/">Flash</link><umbruch/>
Nützliches von Flashern r Flasher.<umbruch/>
N\xFCtzliches von Flashern f\xFCr Flasher.<umbruch/>
<link neu="ja" ziel="Cvs/">CVS-FAQ</link><umbruch/>
FAQ zur Benutzung von CVS bei HOB
</nebenspalte>
</intranet>
EOF
tn = XPath.first(doc, "//nebenspalte/text()[2]")
expected_iso = "Nützliches von Flashern für Flasher."
expected_iso = "N\xFCtzliches von Flashern f\xFCr Flasher."
expected_utf = expected_iso.unpack('C*').pack('U*')
if expected_utf.respond_to? :encode
expected_iso.force_encoding("iso-8859-1")
expected_iso.force_encoding(::Encoding::ISO_8859_1)
expected_utf.force_encoding(::Encoding::UTF_8)
end
assert_equal(expected_utf, tn.to_s.strip)
f = REXML::Formatters::Default.new
f.write( tn, Output.new(o = "", "ISO-8859-1") )

Просмотреть файл

@ -230,34 +230,12 @@ class Tester < Test::Unit::TestCase
doc = Document.new(docin)
doc.write(test="")
assert_equal(31, doc.doctype.size)
# Here's a little ditty from Tobias...
src = <<-EOL
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
"http://www.w3.org/TR/SVG/DTD/svg10.dtd"
[
<!-- <!ENTITY % fast-slow "0 0 .5 1">-->
<!--<!ENTITY % slow-fast ".5 0 1 1">-->
<!ENTITY hover_ani
'<animateTransform attributeName="transform"
type="scale" restart="whenNotActive" values="1;0.96"
dur="0.5s" calcMode="spline" keySplines="0 0 .5 1"
fill="freeze" begin="mouseover"/>
<animateTransform attributeName="transform"
type="scale" restart="whenNotActive" values="0.96;1"
dur="0.5s" calcMode="spline" keySplines=".5 0 1 1"
fill="freeze" begin="mouseover+0.5s"/>'
>
]
> <a/>
EOL
end
def test_document
# Testing cloning
source = "<element/>"
doc = Document.new source
doc2 = Document.new doc
# Testing Root
assert_equal doc.root.name.to_s, "element"
@ -642,11 +620,10 @@ class Tester < Test::Unit::TestCase
end
def test_line
doc = Document.new File.new(fixture_path("bad.xml"))
Document.new File.new(fixture_path("bad.xml"))
assert_fail "There should have been an error"
rescue Exception
# We should get here
er = $!
assert($!.line == 5, "Should have been an error on line 5, "+
"but was reported as being on line #{$!.line}" )
end
@ -664,13 +641,11 @@ class Tester < Test::Unit::TestCase
def test_exception
source = SourceFactory.create_from "<a/>"
p = ParseException.new( "dummy message", source )
s = p.to_s
begin
raise "dummy"
rescue Exception
p.continued_exception = $!
end
s = p.to_s
end
def test_bad_content
@ -682,7 +657,7 @@ class Tester < Test::Unit::TestCase
assert_equal "content>content", tree_gt.elements[1].text
# This isn't
begin
tree_lt = Document.new in_lt
Document.new in_lt
assert_fail "Should have gotten a parse error"
rescue ParseException
end
@ -856,8 +831,6 @@ EOL
def test_attlist_write
file=File.new(fixture_path("foo.xml"))
doc=Document.new file
root = doc.root
out = ''
doc.write(out)
end
@ -865,7 +838,7 @@ EOL
def test_more_namespaces
assert_raise( REXML::UndefinedNamespaceException,
%Q{Should have gotten an Undefined Namespace error} ) {
doc1 = Document.new("<r><p><n:c/></p></r>")
Document.new("<r><p><n:c/></p></r>")
}
doc2 = Document.new("<r xmlns:n='1'><p><n:c/></p></r>")
es = XPath.match(doc2, '//c')
@ -916,7 +889,7 @@ EOL
end
def test_oses_with_bad_EOLs
d = Document.new("\n\n\n<?xml version='1.0'?>\n\n\n<a/>\n\n")
Document.new("\n\n\n<?xml version='1.0'?>\n\n\n<a/>\n\n")
end
# Contributed (with patch to fix bug) by Kouhei
@ -1024,7 +997,6 @@ EOL
document.write(s)
end
def test_write_cdata
src = "<a>A</a>"
doc = REXML::Document.new( src )
@ -1045,7 +1017,7 @@ EOL
<x:b x:n="foo"/>
</a>
EOL
d = REXML::Document.new( source )
d = Document.new( source )
assert_equal( 'foo', REXML::XPath.first(d.root, "//x:b/@x:n").value )
assert_equal( nil, REXML::XPath.first(d.root, "//x:b/@x:n", {}))
end
@ -1233,17 +1205,17 @@ EOL
def test_ticket_21
src = "<foo bar=value/>"
assert_raise( ParseException, "invalid XML should be caught" ) {
d = REXML::Document.new(src)
Document.new(src)
}
begin
d = REXML::Document.new(src)
Document.new(src)
rescue
assert_match( /missing attribute quote/, $!.message )
end
end
def test_ticket_63
d = REXML::Document.new(File.new(fixture_path("t63-1.xml")))
Document.new(File.new(fixture_path("t63-1.xml")))
end
def test_ticket_75
@ -1275,9 +1247,9 @@ EOL
def test_ticket_88
doc = REXML::Document.new("<?xml version=\"1.0\" encoding=\"shift_jis\"?>")
assert_equal("<?xml version='1.0' encoding='Shift_JIS'?>", doc.to_s)
assert_equal("<?xml version='1.0' encoding='SHIFT_JIS'?>", doc.to_s)
doc = REXML::Document.new("<?xml version = \"1.0\" encoding = \"shift_jis\"?>")
assert_equal("<?xml version='1.0' encoding='Shift_JIS'?>", doc.to_s)
assert_equal("<?xml version='1.0' encoding='SHIFT_JIS'?>", doc.to_s)
end
def test_ticket_85
@ -1295,8 +1267,6 @@ ENDXML
</bar>
</foo>"
zml = "<foo><bar><bob name='jimmy'/></bar></foo>"
# The pretty printer ignores all whitespace, anyway so output1 == output2
f = REXML::Formatters::Pretty.new( 2 )
d = Document.new( xml, :ignore_whitespace_nodes=>:all )
@ -1358,7 +1328,7 @@ ENDXML
# Per .2.5 Node Tests of XPath spec
assert_raise( REXML::UndefinedNamespaceException,
%Q{Should have gotten an Undefined Namespace error} ) {
d = Document.new("<a><n:b/></a>")
Document.new("<a><n:b/></a>")
}
end

Просмотреть файл

@ -18,7 +18,7 @@ class EncodingTester < Test::Unit::TestCase
def test_encoded_in_encoded_out
doc = Document.new( @encoded )
doc.write( out="" )
out.force_encoding('binary') if out.respond_to? :force_encoding
out.force_encoding(::Encoding::ASCII_8BIT)
assert_equal( @encoded, out )
end
@ -26,12 +26,12 @@ class EncodingTester < Test::Unit::TestCase
def test_encoded_in_change_out
doc = Document.new( @encoded )
doc.xml_decl.encoding = "UTF-8"
assert_equal( ::Encoding::UTF_8, doc.encoding )
assert_equal("UTF-8", doc.encoding)
REXML::Formatters::Default.new.write( doc.root, out="" )
out.force_encoding('binary') if out.respond_to? :force_encoding
out.force_encoding(::Encoding::ASCII_8BIT)
assert_equal( @not_encoded, out )
char = XPath.first( doc, "/a/b/text()" ).to_s
char.force_encoding('binary') if char.respond_to? :force_encoding
char.force_encoding(::Encoding::ASCII_8BIT)
assert_equal( "ĉ", char )
end
@ -39,7 +39,7 @@ class EncodingTester < Test::Unit::TestCase
def test_encoded_in_different_out
doc = Document.new( @encoded )
REXML::Formatters::Default.new.write( doc.root, Output.new( out="", "UTF-8" ) )
out.force_encoding('binary') if out.respond_to? :force_encoding
out.force_encoding(::Encoding::ASCII_8BIT)
assert_equal( @not_encoded, out )
end
@ -47,9 +47,9 @@ class EncodingTester < Test::Unit::TestCase
def test_in_change_out
doc = Document.new( @not_encoded )
doc.xml_decl.encoding = "ISO-8859-3"
assert_equal( ::Encoding::ISO_8859_3, doc.encoding )
assert_equal("ISO-8859-3", doc.encoding)
doc.write( out="" )
out.force_encoding('binary') if out.respond_to? :force_encoding
out.force_encoding(::Encoding::ASCII_8BIT)
assert_equal( @encoded, out )
end
@ -57,7 +57,7 @@ class EncodingTester < Test::Unit::TestCase
def test_in_different_out
doc = Document.new( @not_encoded )
doc.write( Output.new( out="", "ISO-8859-3" ) )
out.force_encoding('binary') if out.respond_to? :force_encoding
out.force_encoding(::Encoding::ASCII_8BIT)
assert_equal( @encoded, out )
end
@ -66,10 +66,10 @@ class EncodingTester < Test::Unit::TestCase
def test_in_different_access
doc = Document.new <<-EOL
<?xml version='1.0' encoding='ISO-8859-1'?>
<a a="ÿ">ÿ</a>
<a a="\xFF">\xFF</a>
EOL
expect = "\303\277"
expect.force_encoding('UTF-8') if expect.respond_to? :force_encoding
expect.force_encoding(::Encoding::UTF_8)
assert_equal( expect, doc.elements['a'].attributes['a'] )
assert_equal( expect, doc.elements['a'].text )
end
@ -86,7 +86,7 @@ class EncodingTester < Test::Unit::TestCase
def test_ticket_110
utf16 = REXML::Document.new(File.new(fixture_path("ticket_110_utf16.xml")))
assert_equal( ::Encoding::UTF_16BE, utf16.encoding )
assert_equal(utf16.encoding, "UTF-16")
assert( utf16[0].kind_of?(REXML::XMLDecl))
end
end