* lib/rexml/encoding.rb (REXML::Encoding#encoding=): store @encoding

a String which means the name of the encoding. this partially revert r29646. * lib/rexml/document.rb: follow above. * lib/rexml/output.rb: ditto. * lib/rexml/parsers/baseparser.rb: ditto. * lib/rexml/source.rb: ditto. * lib/rexml/xmldecl.rb: ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@31008 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2011-03-02 15:36:48 +00:00 · 2011-03-02 15:36:48 +00:00 · f25ff846f6
--- a/16
+++ b/16
@ -1,3 +1,19 @@
+Thu Mar  3 00:36:29 2011  NARUSE, Yui  <naruse@ruby-lang.org>
+
+	* lib/rexml/encoding.rb (REXML::Encoding#encoding=): store @encoding
+	  a String which means the name of the encoding.
+	  this partially revert r29646.
+
+	* lib/rexml/document.rb: follow above.
+
+	* lib/rexml/output.rb: ditto.
+
+	* lib/rexml/parsers/baseparser.rb: ditto.
+
+	* lib/rexml/source.rb: ditto.
+
+	* lib/rexml/xmldecl.rb: ditto.
+
 Wed Mar  2 23:19:56 2011  Nobuyoshi Nakada  <nobu@ruby-lang.org>

 	* string.c (str_byte_substr): return nil for negative length.
--- a/lib/rexml/document.rb
+++ b/lib/rexml/document.rb
@ -184,7 +184,7 @@ module REXML
    #   that IE's limited abilities can handle.  This hack inserts a space
    #   before the /> on empty tags.  Defaults to false
    def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
-      if xml_decl.encoding != ::Encoding::UTF_8 && !output.kind_of?(Output)
+      if xml_decl.encoding != 'UTF-8' && !output.kind_of?(Output)
        output = Output.new( output, xml_decl.encoding )
      end
      formatter = if indent > -1
--- a/lib/rexml/encoding.rb
+++ b/lib/rexml/encoding.rb
@ -1,8 +1,9 @@
 module REXML
  module Encoding
-    # ID ---> Encoding object
+    # ID ---> Encoding name
    attr_reader :encoding
    def encoding=(encoding)
+      encoding = encoding.name if encoding.is_a?(Encoding)
      if encoding.is_a?(String)
        original_encoding = encoding
        encoding = find_encoding(encoding)
@ -11,35 +12,25 @@ module REXML
        end
      end
      return false if defined?(@encoding) and encoding == @encoding
-      if encoding and encoding != ::Encoding::UTF_8
-        @encoding = encoding
+      if encoding
+        @encoding = encoding.upcase
      else
-        @encoding = ::Encoding::UTF_8
+        @encoding = 'UTF-8'
      end
      true
    end

    def check_encoding(xml)
-      # We have to recognize UTF-16, LSB UTF-16, and UTF-8
+      # We have to recognize UTF-16BE, UTF-16LE, and UTF-8
      if xml[0, 2] == "\xfe\xff"
        xml[0, 2] = ""
-        ::Encoding::UTF_16BE
+        return 'UTF-16BE'
      elsif xml[0, 2] == "\xff\xfe"
        xml[0, 2] = ""
-        ::Encoding::UTF_16LE
-      else
-        if /\A\s*<\?xml\s+version\s*=\s*(['"]).*?\1
-            \s+encoding\s*=\s*(["'])(.*?)\2/mx =~ xml
-          encoding_name = $3
-          if /\Autf-16\z/i =~ encoding_name
-            ::Encoding::UTF_16BE
-          else
-            find_encoding(encoding_name)
-          end
-        else
-          ::Encoding::UTF_8
-        end
+        return 'UTF-16LE'
      end
+      xml =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/m
+      return $3 ? $3.upcase : 'UTF-8'
    end

    def encode(string)
@ -53,14 +44,19 @@ module REXML
    private
    def find_encoding(name)
      case name
-      when "UTF-16"
-        name = "UTF-16BE"
      when /\Ashift-jis\z/i
-        name = "Shift_JIS"
+        return "SHIFT_JIS"
      when /\ACP-(\d+)\z/
        name = "CP#{$1}"
+      when /\AUTF-8\z/i
+        return name
      end
-      ::Encoding.find(name)
+      begin
+        ::Encoding::Converter.search_convpath(name, 'UTF-8')
+      rescue ::Encoding::ConverterNotFoundError
+        return nil
+      end
+      name
    end
  end
 end
--- a/lib/rexml/formatters/default.rb
+++ b/lib/rexml/formatters/default.rb
@ -22,7 +22,7 @@ module REXML
        case node

        when Document
-          if node.xml_decl.encoding != ::Encoding::UTF_8 && !output.kind_of?(Output)
+          if node.xml_decl.encoding != 'UTF-8' && !output.kind_of?(Output)
            output = Output.new( output, node.xml_decl.encoding )
          end
          write_document( node, output )
--- a/lib/rexml/output.rb
+++ b/lib/rexml/output.rb
@ -10,7 +10,7 @@ module REXML
      @output = real_IO
      self.encoding = encd

-      @to_utf = (@encoding != ::Encoding::UTF_8)
+      @to_utf = encd != 'UTF-8'
    end

    def <<( content )
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@ -248,7 +248,7 @@ module REXML
            @document_status = :after_doctype
            @source.read if @source.buffer.size<2
            md = @source.match(/\s*/um, true)
-            if @source.encoding == ::Encoding::UTF_8
+            if @source.encoding == "UTF-8"
              @source.buffer.force_encoding(::Encoding::UTF_8)
            end
          end
--- a/lib/rexml/source.rb
+++ b/lib/rexml/source.rb
@ -54,16 +54,14 @@ module REXML
    def encoding=(enc)
      return unless super
      @line_break = encode( '>' )
-      if @encoding != ::Encoding::UTF_8
+      if @encoding != 'UTF-8'
        @buffer = decode(@buffer)
        @to_utf = true
      else
        @to_utf = false
-        if @buffer.respond_to? :force_encoding
        @buffer.force_encoding ::Encoding::UTF_8
      end
    end
-    end

    # Scans the source for a given pattern.  Note, that this is not your
    # usual scan() method.  For one thing, the pattern argument has some
--- a/lib/rexml/xmldecl.rb
+++ b/lib/rexml/xmldecl.rb
@ -109,20 +109,9 @@ module REXML
    end

    private
-    def normalized_encoding_name(_encoding)
-      if _encoding == ::Encoding::UTF_16BE
-        "UTF-16"
-      else
-        return _encoding.name
-      end
-    end
-
    def content(enc)
      rv = "version='#@version'"
-      if @writeencoding || enc.to_s !~ /\Autf-8\z/i
-        encoding_name = normalized_encoding_name(enc)
-        rv << " encoding='#{encoding_name}'"
-      end
+      rv << " encoding='#{enc}'" if @writeencoding || enc !~ /utf-8/i
      rv << " standalone='#@standalone'" if @standalone
      rv
    end
--- a/test/rexml/test_contrib.rb
+++ b/test/rexml/test_contrib.rb
@ -241,7 +241,7 @@ DELIMITER
    end

    doc = REXML::Document.new(source_iso)
-    assert_equal('ISO-8859-1', doc.xml_decl.encoding.to_s)
+    assert_equal('ISO-8859-1', doc.xml_decl.encoding)
    assert_equal(koln_utf, doc.root.text)
    doc.write(out="")
    assert_equal(source_iso, out )
@ -255,23 +255,21 @@ DELIMITER
 <position><aktuell datum="01-10-11">Technik</aktuell></position>
 <hauptspalte>
 <headline>Technik</headline>
-Die Technik ist das Rückgrat der meisten Geschäftsprozesse bei Home of the Brave. Deshalb sollen hier alle relevanten technischen Abläufe, Daten und Einrichtungen beschrieben werden, damit jeder im Bedarfsfall die nötigen Informationen, Anweisungen und Verhaltensempfehlungen nachlesen und/oder abrufen kann.
+Die Technik ist das R\xFCckgrat der meisten Gesch\xFCftsprozesse bei Home of the Brave. Deshalb sollen hier alle relevanten technischen Abl\xFCufe, Daten und Einrichtungen beschrieben werden, damit jeder im Bedarfsfall die n\xFCtigen Informationen, Anweisungen und Verhaltensempfehlungen nachlesen und/oder abrufen kann.
 </hauptspalte>
 <nebenspalte>
  <link ziel="Flash/">Flash</link><umbruch/>
-  Nützliches von Flashern für Flasher.<umbruch/>
+  N\xFCtzliches von Flashern f\xFCr Flasher.<umbruch/>
  <link neu="ja" ziel="Cvs/">CVS-FAQ</link><umbruch/>
  FAQ zur Benutzung von CVS bei HOB
 </nebenspalte>
 </intranet>
 EOF
    tn = XPath.first(doc, "//nebenspalte/text()[2]")
-    expected_iso = "Nützliches von Flashern für Flasher."
+    expected_iso = "N\xFCtzliches von Flashern f\xFCr Flasher."
    expected_utf = expected_iso.unpack('C*').pack('U*')
-                if expected_utf.respond_to? :encode
-      expected_iso.force_encoding("iso-8859-1")
+    expected_iso.force_encoding(::Encoding::ISO_8859_1)
    expected_utf.force_encoding(::Encoding::UTF_8)
-                end
    assert_equal(expected_utf, tn.to_s.strip)
    f = REXML::Formatters::Default.new
    f.write( tn, Output.new(o = "", "ISO-8859-1") )
--- a/test/rexml/test_core.rb
+++ b/test/rexml/test_core.rb
@ -230,34 +230,12 @@ class Tester < Test::Unit::TestCase
    doc = Document.new(docin)
    doc.write(test="")
    assert_equal(31, doc.doctype.size)
-
-    # Here's a little ditty from Tobias...
-    src = <<-EOL
-    <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
-    "http://www.w3.org/TR/SVG/DTD/svg10.dtd"
-    [
-    <!-- <!ENTITY % fast-slow "0 0  .5 1">-->
-    <!--<!ENTITY % slow-fast ".5 0  1 1">-->
-    <!ENTITY hover_ani
-    '<animateTransform attributeName="transform"
-    type="scale" restart="whenNotActive" values="1;0.96"
-    dur="0.5s" calcMode="spline" keySplines="0 0  .5 1"
-    fill="freeze" begin="mouseover"/>
-    <animateTransform  attributeName="transform"
-    type="scale" restart="whenNotActive" values="0.96;1"
-    dur="0.5s" calcMode="spline" keySplines=".5 0  1 1"
-    fill="freeze" begin="mouseover+0.5s"/>'
-    >
-    ]
-    > <a/>
-    EOL
  end

  def test_document
    # Testing cloning
    source = "<element/>"
    doc = Document.new source
-    doc2 = Document.new doc

    # Testing Root
    assert_equal doc.root.name.to_s, "element"
@ -642,11 +620,10 @@ class Tester < Test::Unit::TestCase
  end

  def test_line
-    doc = Document.new File.new(fixture_path("bad.xml"))
+    Document.new File.new(fixture_path("bad.xml"))
    assert_fail "There should have been an error"
  rescue Exception
    # We should get here
-    er = $!
    assert($!.line == 5, "Should have been an error on line 5, "+
      "but was reported as being on line #{$!.line}" )
  end
@ -664,13 +641,11 @@ class Tester < Test::Unit::TestCase
  def test_exception
    source = SourceFactory.create_from "<a/>"
    p = ParseException.new( "dummy message", source )
-    s = p.to_s
    begin
      raise "dummy"
    rescue Exception
      p.continued_exception = $!
    end
-    s = p.to_s
  end

  def test_bad_content
@ -682,7 +657,7 @@ class Tester < Test::Unit::TestCase
    assert_equal "content>content", tree_gt.elements[1].text
    # This isn't
    begin
-      tree_lt = Document.new in_lt
+      Document.new in_lt
      assert_fail "Should have gotten a parse error"
    rescue ParseException
    end
@ -856,8 +831,6 @@ EOL
  def test_attlist_write
    file=File.new(fixture_path("foo.xml"))
    doc=Document.new file
-    root = doc.root 
-
    out = ''
    doc.write(out)
  end
@ -865,7 +838,7 @@ EOL
  def test_more_namespaces
    assert_raise( REXML::UndefinedNamespaceException,
                   %Q{Should have gotten an Undefined Namespace error} )  {
-      doc1 = Document.new("<r><p><n:c/></p></r>")
+      Document.new("<r><p><n:c/></p></r>")
    }
    doc2 = Document.new("<r xmlns:n='1'><p><n:c/></p></r>")
    es = XPath.match(doc2, '//c')
@ -916,7 +889,7 @@ EOL
  end

  def test_oses_with_bad_EOLs
-    d = Document.new("\n\n\n<?xml version='1.0'?>\n\n\n<a/>\n\n")
+    Document.new("\n\n\n<?xml version='1.0'?>\n\n\n<a/>\n\n")
  end

  # Contributed (with patch to fix bug) by Kouhei
@ -1024,7 +997,6 @@ EOL
    document.write(s)
  end

-  
  def test_write_cdata
    src = "<a>A</a>"
    doc = REXML::Document.new( src )
@ -1045,7 +1017,7 @@ EOL
      <x:b x:n="foo"/>
    </a>
    EOL
-    d = REXML::Document.new( source )
+    d = Document.new( source )
    assert_equal( 'foo', REXML::XPath.first(d.root, "//x:b/@x:n").value )
    assert_equal( nil, REXML::XPath.first(d.root, "//x:b/@x:n", {}))
  end
@ -1233,17 +1205,17 @@ EOL
  def test_ticket_21
    src = "<foo bar=value/>"
    assert_raise( ParseException, "invalid XML should be caught" ) {
-      d = REXML::Document.new(src)
+      Document.new(src)
    }
    begin
-      d = REXML::Document.new(src)
+      Document.new(src)
    rescue
      assert_match( /missing attribute quote/, $!.message )
    end
  end

  def test_ticket_63
-    d = REXML::Document.new(File.new(fixture_path("t63-1.xml")))
+    Document.new(File.new(fixture_path("t63-1.xml")))
  end

  def test_ticket_75
@ -1275,9 +1247,9 @@ EOL

  def test_ticket_88
    doc = REXML::Document.new("<?xml version=\"1.0\" encoding=\"shift_jis\"?>")
-    assert_equal("<?xml version='1.0' encoding='Shift_JIS'?>", doc.to_s)
+    assert_equal("<?xml version='1.0' encoding='SHIFT_JIS'?>", doc.to_s)
    doc = REXML::Document.new("<?xml version = \"1.0\" encoding = \"shift_jis\"?>")
-    assert_equal("<?xml version='1.0' encoding='Shift_JIS'?>", doc.to_s)
+    assert_equal("<?xml version='1.0' encoding='SHIFT_JIS'?>", doc.to_s)
  end

  def test_ticket_85
@ -1295,8 +1267,6 @@ ENDXML
  </bar>
 </foo>"

-    zml = "<foo><bar><bob name='jimmy'/></bar></foo>"
-    
    # The pretty printer ignores all whitespace, anyway so output1 == output2
    f = REXML::Formatters::Pretty.new( 2 )
    d = Document.new( xml, :ignore_whitespace_nodes=>:all )
@ -1358,7 +1328,7 @@ ENDXML
    # Per .2.5 Node Tests of XPath spec
    assert_raise( REXML::UndefinedNamespaceException,
                   %Q{Should have gotten an Undefined Namespace error} )  {
-      d = Document.new("<a><n:b/></a>") 
+      Document.new("<a><n:b/></a>")
    }
  end

--- a/test/rexml/test_encoding.rb
+++ b/test/rexml/test_encoding.rb
@ -18,7 +18,7 @@ class EncodingTester < Test::Unit::TestCase
  def test_encoded_in_encoded_out
    doc = Document.new( @encoded )
    doc.write( out="" )
-    out.force_encoding('binary') if out.respond_to? :force_encoding
+    out.force_encoding(::Encoding::ASCII_8BIT)
    assert_equal( @encoded, out )
  end

@ -26,12 +26,12 @@ class EncodingTester < Test::Unit::TestCase
  def test_encoded_in_change_out
    doc = Document.new( @encoded )
    doc.xml_decl.encoding = "UTF-8"
-    assert_equal( ::Encoding::UTF_8, doc.encoding )
+    assert_equal("UTF-8", doc.encoding)
    REXML::Formatters::Default.new.write( doc.root, out="" )
-    out.force_encoding('binary') if out.respond_to? :force_encoding
+    out.force_encoding(::Encoding::ASCII_8BIT)
    assert_equal( @not_encoded, out )
    char = XPath.first( doc, "/a/b/text()" ).to_s
-    char.force_encoding('binary') if char.respond_to? :force_encoding
+    char.force_encoding(::Encoding::ASCII_8BIT)
    assert_equal( "ĉ", char )
  end

@ -39,7 +39,7 @@ class EncodingTester < Test::Unit::TestCase
  def test_encoded_in_different_out
    doc = Document.new( @encoded )
    REXML::Formatters::Default.new.write( doc.root, Output.new( out="", "UTF-8" ) )
-    out.force_encoding('binary') if out.respond_to? :force_encoding
+    out.force_encoding(::Encoding::ASCII_8BIT)
    assert_equal( @not_encoded, out )
  end

@ -47,9 +47,9 @@ class EncodingTester < Test::Unit::TestCase
  def test_in_change_out
    doc = Document.new( @not_encoded )
    doc.xml_decl.encoding = "ISO-8859-3"
-    assert_equal( ::Encoding::ISO_8859_3, doc.encoding )
+    assert_equal("ISO-8859-3", doc.encoding)
    doc.write( out="" )
-    out.force_encoding('binary') if out.respond_to? :force_encoding
+    out.force_encoding(::Encoding::ASCII_8BIT)
    assert_equal( @encoded, out )
  end

@ -57,7 +57,7 @@ class EncodingTester < Test::Unit::TestCase
  def test_in_different_out
    doc = Document.new( @not_encoded )
    doc.write( Output.new( out="", "ISO-8859-3" ) )
-    out.force_encoding('binary') if out.respond_to? :force_encoding
+    out.force_encoding(::Encoding::ASCII_8BIT)
    assert_equal( @encoded, out )
  end

@ -66,10 +66,10 @@ class EncodingTester < Test::Unit::TestCase
  def test_in_different_access
    doc = Document.new <<-EOL
    <?xml version='1.0' encoding='ISO-8859-1'?>
-    <a a="ÿ">ÿ</a>
+    <a a="\xFF">\xFF</a>
    EOL
    expect = "\303\277"
-    expect.force_encoding('UTF-8') if expect.respond_to? :force_encoding
+    expect.force_encoding(::Encoding::UTF_8)
    assert_equal( expect, doc.elements['a'].attributes['a'] )
    assert_equal( expect, doc.elements['a'].text )
  end
@ -86,7 +86,7 @@ class EncodingTester < Test::Unit::TestCase

  def test_ticket_110
    utf16 = REXML::Document.new(File.new(fixture_path("ticket_110_utf16.xml")))
-    assert_equal( ::Encoding::UTF_16BE, utf16.encoding )
+    assert_equal(utf16.encoding, "UTF-16")
    assert( utf16[0].kind_of?(REXML::XMLDecl))
  end
 end