2004-01-28 06:46:13 +03:00
|
|
|
require 'xmlscan/scanner'
|
2005-11-23 16:35:11 +03:00
|
|
|
require 'stringio'
|
2004-01-28 06:46:13 +03:00
|
|
|
|
|
|
|
module RSS
|
2004-10-16 08:51:15 +04:00
|
|
|
|
|
|
|
class XMLScanParser < BaseParser
|
|
|
|
|
2005-11-23 16:35:11 +03:00
|
|
|
class << self
|
|
|
|
def listener
|
|
|
|
XMLScanListener
|
|
|
|
end
|
2004-10-16 08:51:15 +04:00
|
|
|
end
|
2005-11-23 16:35:11 +03:00
|
|
|
|
|
|
|
private
|
2004-10-16 08:51:15 +04:00
|
|
|
def _parse
|
|
|
|
begin
|
2005-11-23 16:35:11 +03:00
|
|
|
if @rss.is_a?(String)
|
|
|
|
input = StringIO.new(@rss)
|
|
|
|
else
|
|
|
|
input = @rss
|
|
|
|
end
|
|
|
|
scanner = XMLScan::XMLScanner.new(@listener)
|
|
|
|
scanner.parse(input)
|
2004-10-16 08:51:15 +04:00
|
|
|
rescue XMLScan::Error => e
|
2005-11-23 16:35:11 +03:00
|
|
|
lineno = e.lineno || scanner.lineno || input.lineno
|
|
|
|
raise NotWellFormedError.new(lineno){e.message}
|
2004-10-16 08:51:15 +04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
class XMLScanListener < BaseListener
|
|
|
|
|
|
|
|
include XMLScan::Visitor
|
|
|
|
include ListenerMixin
|
|
|
|
|
|
|
|
ENTITIES = {
|
|
|
|
'lt' => '<',
|
|
|
|
'gt' => '>',
|
|
|
|
'amp' => '&',
|
|
|
|
'quot' => '"',
|
|
|
|
'apos' => '\''
|
|
|
|
}
|
2004-01-28 06:46:13 +03:00
|
|
|
|
|
|
|
def on_xmldecl_version(str)
|
2004-10-16 08:51:15 +04:00
|
|
|
@version = str
|
2004-01-28 06:46:13 +03:00
|
|
|
end
|
|
|
|
|
|
|
|
def on_xmldecl_encoding(str)
|
2004-10-16 08:51:15 +04:00
|
|
|
@encoding = str
|
2004-01-28 06:46:13 +03:00
|
|
|
end
|
|
|
|
|
|
|
|
def on_xmldecl_standalone(str)
|
2004-10-16 08:51:15 +04:00
|
|
|
@standalone = str
|
2004-01-28 06:46:13 +03:00
|
|
|
end
|
|
|
|
|
|
|
|
def on_xmldecl_end
|
2004-11-19 11:25:25 +03:00
|
|
|
xmldecl(@version, @encoding, @standalone == "yes")
|
2004-01-28 06:46:13 +03:00
|
|
|
end
|
|
|
|
|
2004-10-16 08:51:15 +04:00
|
|
|
alias_method(:on_pi, :instruction)
|
|
|
|
alias_method(:on_chardata, :text)
|
|
|
|
alias_method(:on_cdata, :text)
|
2004-01-28 06:46:13 +03:00
|
|
|
|
|
|
|
def on_etag(name)
|
|
|
|
tag_end(name)
|
|
|
|
end
|
|
|
|
|
|
|
|
def on_entityref(ref)
|
2005-11-23 16:35:11 +03:00
|
|
|
text(entity(ref))
|
2004-01-28 06:46:13 +03:00
|
|
|
end
|
|
|
|
|
|
|
|
def on_charref(code)
|
2004-10-16 08:51:15 +04:00
|
|
|
text([code].pack('U'))
|
2004-01-28 06:46:13 +03:00
|
|
|
end
|
|
|
|
|
2004-10-16 08:51:15 +04:00
|
|
|
alias_method(:on_charref_hex, :on_charref)
|
2004-01-28 06:46:13 +03:00
|
|
|
|
|
|
|
def on_stag(name)
|
2004-10-16 08:51:15 +04:00
|
|
|
@attrs = {}
|
2004-01-28 06:46:13 +03:00
|
|
|
end
|
|
|
|
|
|
|
|
def on_attribute(name)
|
2004-10-16 08:51:15 +04:00
|
|
|
@attrs[name] = @current_attr = ''
|
2004-01-28 06:46:13 +03:00
|
|
|
end
|
|
|
|
|
|
|
|
def on_attr_value(str)
|
2004-10-16 08:51:15 +04:00
|
|
|
@current_attr << str
|
2004-01-28 06:46:13 +03:00
|
|
|
end
|
|
|
|
|
|
|
|
def on_attr_entityref(ref)
|
2005-11-23 16:35:11 +03:00
|
|
|
@current_attr << entity(ref)
|
2004-01-28 06:46:13 +03:00
|
|
|
end
|
|
|
|
|
|
|
|
def on_attr_charref(code)
|
2004-10-16 08:51:15 +04:00
|
|
|
@current_attr << [code].pack('U')
|
2004-01-28 06:46:13 +03:00
|
|
|
end
|
|
|
|
|
2004-10-16 08:51:15 +04:00
|
|
|
alias_method(:on_attr_charref_hex, :on_attr_charref)
|
2004-01-28 06:46:13 +03:00
|
|
|
|
|
|
|
def on_stag_end(name)
|
2004-10-16 08:51:15 +04:00
|
|
|
tag_start(name, @attrs)
|
2004-01-28 06:46:13 +03:00
|
|
|
end
|
|
|
|
|
|
|
|
def on_stag_end_empty(name)
|
2004-10-16 08:51:15 +04:00
|
|
|
tag_start(name, @attrs)
|
|
|
|
tag_end(name)
|
2004-01-28 06:46:13 +03:00
|
|
|
end
|
|
|
|
|
2005-11-23 16:35:11 +03:00
|
|
|
private
|
|
|
|
def entity(ref)
|
|
|
|
ent = ENTITIES[ref]
|
|
|
|
if ent
|
|
|
|
ent
|
|
|
|
else
|
|
|
|
wellformed_error("undefined entity: #{ref}")
|
|
|
|
end
|
|
|
|
end
|
2004-10-16 08:51:15 +04:00
|
|
|
end
|
2004-01-28 06:46:13 +03:00
|
|
|
|
|
|
|
end
|