зеркало из https://github.com/github/ruby.git
600 строки
16 KiB
Plaintext
600 строки
16 KiB
Plaintext
# Copyright (c) 2006 Pluron Inc.
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining
|
|
# a copy of this software and associated documentation files (the
|
|
# "Software"), to deal in the Software without restriction, including
|
|
# without limitation the rights to use, copy, modify, merge, publish,
|
|
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
# permit persons to whom the Software is furnished to do so, subject to
|
|
# the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be
|
|
# included in all copies or substantial portions of the Software.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
# The parser for the MediaWiki language.
|
|
#
|
|
# Usage together with a lexer:
|
|
# inputFile = File.new("data/input1", "r")
|
|
# input = inputFile.read
|
|
# parser = MediaWikiParser.new
|
|
# parser.lexer = MediaWikiLexer.new
|
|
# parser.parse(input)
|
|
|
|
class MediaWikiParser
|
|
|
|
token TEXT BOLD_START BOLD_END ITALIC_START ITALIC_END LINK_START LINK_END LINKSEP
|
|
INTLINK_START INTLINK_END INTLINKSEP RESOURCESEP CHAR_ENT
|
|
PRE_START PRE_END PREINDENT_START PREINDENT_END
|
|
SECTION_START SECTION_END HLINE SIGNATURE_NAME SIGNATURE_DATE SIGNATURE_FULL
|
|
PARA_START PARA_END UL_START UL_END OL_START OL_END LI_START LI_END
|
|
DL_START DL_END DT_START DT_END DD_START DD_END TAG_START TAG_END ATTR_NAME ATTR_VALUE
|
|
TABLE_START TABLE_END ROW_START ROW_END HEAD_START HEAD_END CELL_START CELL_END
|
|
KEYWORD TEMPLATE_START TEMPLATE_END CATEGORY PASTE_START PASTE_END
|
|
|
|
|
|
rule
|
|
|
|
wiki:
|
|
repeated_contents
|
|
{
|
|
@nodes.push WikiAST.new(0, @wiki_ast_length)
|
|
#@nodes.last.children.insert(0, val[0])
|
|
#puts val[0]
|
|
@nodes.last.children += val[0]
|
|
}
|
|
;
|
|
|
|
contents:
|
|
text
|
|
{
|
|
result = val[0]
|
|
}
|
|
| bulleted_list
|
|
{
|
|
result = val[0]
|
|
}
|
|
| numbered_list
|
|
{
|
|
result = val[0]
|
|
}
|
|
| dictionary_list
|
|
{
|
|
list = ListAST.new(@ast_index, @ast_length)
|
|
list.list_type = :Dictionary
|
|
list.children = val[0]
|
|
result = list
|
|
}
|
|
| preformatted
|
|
{
|
|
result = val[0]
|
|
}
|
|
| section
|
|
{
|
|
result = val[0]
|
|
}
|
|
| tag
|
|
{
|
|
result = val[0]
|
|
}
|
|
| template
|
|
{
|
|
result = val[0]
|
|
}
|
|
| KEYWORD
|
|
{
|
|
k = KeywordAST.new(@ast_index, @ast_length)
|
|
k.text = val[0]
|
|
result = k
|
|
}
|
|
| PARA_START para_contents PARA_END
|
|
{
|
|
p = ParagraphAST.new(@ast_index, @ast_length)
|
|
p.children = val[1]
|
|
result = p
|
|
}
|
|
| LINK_START link_contents LINK_END
|
|
{
|
|
l = LinkAST.new(@ast_index, @ast_length)
|
|
l.link_type = val[0]
|
|
l.url = val[1][0]
|
|
l.children += val[1][1..-1] if val[1].length > 1
|
|
result = l
|
|
}
|
|
| PASTE_START para_contents PASTE_END
|
|
{
|
|
p = PasteAST.new(@ast_index, @ast_length)
|
|
p.children = val[1]
|
|
result = p
|
|
}
|
|
| INTLINK_START TEXT RESOURCESEP TEXT reslink_repeated_contents INTLINK_END
|
|
{
|
|
l = ResourceLinkAST.new(@ast_index, @ast_length)
|
|
l.prefix = val[1]
|
|
l.locator = val[3]
|
|
l.children = val[4] unless val[4].nil? or val[4].empty?
|
|
result = l
|
|
}
|
|
| INTLINK_START TEXT intlink_repeated_contents INTLINK_END
|
|
{
|
|
l = InternalLinkAST.new(@ast_index, @ast_length)
|
|
l.locator = val[1]
|
|
l.children = val[2] unless val[2].nil? or val[2].empty?
|
|
result = l
|
|
}
|
|
| INTLINK_START CATEGORY TEXT cat_sort_contents INTLINK_END
|
|
{
|
|
l = CategoryAST.new(@ast_index, @ast_length)
|
|
l.locator = val[2]
|
|
l.sort_as = val[3]
|
|
result = l
|
|
}
|
|
| INTLINK_START RESOURCESEP CATEGORY TEXT intlink_repeated_contents INTLINK_END
|
|
{
|
|
l = CategoryLinkAST.new(@ast_index, @ast_length)
|
|
l.locator = val[3]
|
|
l.children = val[4] unless val[4].nil? or val[4].empty?
|
|
result = l
|
|
}
|
|
| table
|
|
;
|
|
|
|
para_contents:
|
|
{
|
|
result = nil
|
|
}
|
|
| repeated_contents
|
|
{
|
|
result = val[0]
|
|
}
|
|
;
|
|
|
|
tag:
|
|
TAG_START tag_attributes TAG_END
|
|
{
|
|
if val[0] != val[2]
|
|
raise Racc::ParseError.new("XHTML end tag #{val[2]} does not match start tag #{val[0]}")
|
|
end
|
|
elem = ElementAST.new(@ast_index, @ast_length)
|
|
elem.name = val[0]
|
|
elem.attributes = val[1]
|
|
result = elem
|
|
}
|
|
| TAG_START tag_attributes repeated_contents TAG_END
|
|
{
|
|
if val[0] != val[3]
|
|
raise Racc::ParseError.new("XHTML end tag #{val[3]} does not match start tag #{val[0]}")
|
|
end
|
|
elem = ElementAST.new(@ast_index, @ast_length)
|
|
elem.name = val[0]
|
|
elem.attributes = val[1]
|
|
elem.children += val[2]
|
|
result = elem
|
|
}
|
|
;
|
|
|
|
tag_attributes:
|
|
{
|
|
result = nil
|
|
}
|
|
| ATTR_NAME tag_attributes
|
|
{
|
|
attr_map = val[2] ? val[2] : {}
|
|
attr_map[val[0]] = true
|
|
result = attr_map
|
|
}
|
|
| ATTR_NAME ATTR_VALUE tag_attributes
|
|
{
|
|
attr_map = val[2] ? val[2] : {}
|
|
attr_map[val[0]] = val[1]
|
|
result = attr_map
|
|
}
|
|
;
|
|
|
|
|
|
link_contents:
|
|
TEXT
|
|
{
|
|
result = val
|
|
}
|
|
| TEXT LINKSEP link_repeated_contents
|
|
{
|
|
result = [val[0]]
|
|
result += val[2]
|
|
}
|
|
;
|
|
|
|
|
|
link_repeated_contents:
|
|
repeated_contents
|
|
{
|
|
result = val[0]
|
|
}
|
|
| repeated_contents LINKSEP link_repeated_contents
|
|
{
|
|
result = val[0]
|
|
result += val[2] if val[2]
|
|
}
|
|
;
|
|
|
|
|
|
intlink_repeated_contents:
|
|
{
|
|
result = nil
|
|
}
|
|
| INTLINKSEP repeated_contents
|
|
{
|
|
result = val[1]
|
|
}
|
|
;
|
|
|
|
cat_sort_contents:
|
|
{
|
|
result = nil
|
|
}
|
|
| INTLINKSEP TEXT
|
|
{
|
|
result = val[1]
|
|
}
|
|
;
|
|
|
|
reslink_repeated_contents:
|
|
{
|
|
result = nil
|
|
}
|
|
| INTLINKSEP reslink_repeated_contents
|
|
{
|
|
result = val[1]
|
|
}
|
|
| INTLINKSEP repeated_contents reslink_repeated_contents
|
|
{
|
|
i = InternalLinkItemAST.new(@ast_index, @ast_length)
|
|
i.children = val[1]
|
|
result = [i]
|
|
result += val[2] if val[2]
|
|
}
|
|
;
|
|
|
|
repeated_contents: contents
|
|
{
|
|
result = []
|
|
result << val[0]
|
|
}
|
|
| repeated_contents contents
|
|
{
|
|
result = []
|
|
result += val[0]
|
|
result << val[1]
|
|
}
|
|
;
|
|
|
|
text: element
|
|
{
|
|
p = TextAST.new(@ast_index, @ast_length)
|
|
p.formatting = val[0][0]
|
|
p.contents = val[0][1]
|
|
result = p
|
|
}
|
|
| formatted_element
|
|
{
|
|
result = val[0]
|
|
}
|
|
;
|
|
|
|
table:
|
|
TABLE_START table_contents TABLE_END
|
|
{
|
|
table = TableAST.new(@ast_index, @ast_length)
|
|
table.children = val[1] unless val[1].nil? or val[1].empty?
|
|
result = table
|
|
}
|
|
| TABLE_START TEXT table_contents TABLE_END
|
|
{
|
|
table = TableAST.new(@ast_index, @ast_length)
|
|
table.options = val[1]
|
|
table.children = val[2] unless val[2].nil? or val[2].empty?
|
|
result = table
|
|
}
|
|
|
|
table_contents:
|
|
{
|
|
result = nil
|
|
}
|
|
| ROW_START row_contents ROW_END table_contents
|
|
{
|
|
row = TableRowAST.new(@ast_index, @ast_length)
|
|
row.children = val[1] unless val[1].nil? or val[1].empty?
|
|
result = [row]
|
|
result += val[3] unless val[3].nil? or val[3].empty?
|
|
}
|
|
| ROW_START TEXT row_contents ROW_END table_contents
|
|
{
|
|
row = TableRowAST.new(@ast_index, @ast_length)
|
|
row.children = val[2] unless val[2].nil? or val[2].empty?
|
|
row.options = val[1]
|
|
result = [row]
|
|
result += val[4] unless val[4].nil? or val[4].empty?
|
|
}
|
|
|
|
row_contents:
|
|
{
|
|
result = nil
|
|
}
|
|
| HEAD_START HEAD_END row_contents
|
|
{
|
|
cell = TableCellAST.new(@ast_index, @ast_length)
|
|
cell.type = :head
|
|
result = [cell]
|
|
result += val[2] unless val[2].nil? or val[2].empty?
|
|
}
|
|
| HEAD_START repeated_contents HEAD_END row_contents
|
|
{
|
|
cell = TableCellAST.new(@ast_index, @ast_length)
|
|
cell.children = val[1] unless val[1].nil? or val[1].empty?
|
|
cell.type = :head
|
|
result = [cell]
|
|
result += val[3] unless val[3].nil? or val[3].empty?
|
|
}
|
|
| CELL_START CELL_END row_contents
|
|
{
|
|
cell = TableCellAST.new(@ast_index, @ast_length)
|
|
cell.type = :body
|
|
result = [cell]
|
|
result += val[2] unless val[2].nil? or val[2].empty?
|
|
}
|
|
| CELL_START repeated_contents CELL_END row_contents
|
|
{
|
|
if val[2] == 'attributes'
|
|
result = []
|
|
else
|
|
cell = TableCellAST.new(@ast_index, @ast_length)
|
|
cell.children = val[1] unless val[1].nil? or val[1].empty?
|
|
cell.type = :body
|
|
result = [cell]
|
|
end
|
|
result += val[3] unless val[3].nil? or val[3].empty?
|
|
if val[2] == 'attributes' and val[3] and val[3].first.class == TableCellAST
|
|
val[3].first.attributes = val[1]
|
|
end
|
|
result
|
|
}
|
|
|
|
|
|
element:
|
|
TEXT
|
|
{ return [:None, val[0]] }
|
|
| HLINE
|
|
{ return [:HLine, val[0]] }
|
|
| CHAR_ENT
|
|
{ return [:CharacterEntity, val[0]] }
|
|
| SIGNATURE_DATE
|
|
{ return [:SignatureDate, val[0]] }
|
|
| SIGNATURE_NAME
|
|
{ return [:SignatureName, val[0]] }
|
|
| SIGNATURE_FULL
|
|
{ return [:SignatureFull, val[0]] }
|
|
;
|
|
|
|
formatted_element:
|
|
BOLD_START BOLD_END
|
|
{
|
|
result = FormattedAST.new(@ast_index, @ast_length)
|
|
result.formatting = :Bold
|
|
result
|
|
}
|
|
| ITALIC_START ITALIC_END
|
|
{
|
|
result = FormattedAST.new(@ast_index, @ast_length)
|
|
result.formatting = :Italic
|
|
result
|
|
}
|
|
| BOLD_START repeated_contents BOLD_END
|
|
{
|
|
p = FormattedAST.new(@ast_index, @ast_length)
|
|
p.formatting = :Bold
|
|
p.children += val[1]
|
|
result = p
|
|
}
|
|
| ITALIC_START repeated_contents ITALIC_END
|
|
{
|
|
p = FormattedAST.new(@ast_index, @ast_length)
|
|
p.formatting = :Italic
|
|
p.children += val[1]
|
|
result = p
|
|
}
|
|
;
|
|
|
|
bulleted_list: UL_START list_item list_contents UL_END
|
|
{
|
|
list = ListAST.new(@ast_index, @ast_length)
|
|
list.list_type = :Bulleted
|
|
list.children << val[1]
|
|
list.children += val[2]
|
|
result = list
|
|
}
|
|
;
|
|
|
|
numbered_list: OL_START list_item list_contents OL_END
|
|
{
|
|
list = ListAST.new(@ast_index, @ast_length)
|
|
list.list_type = :Numbered
|
|
list.children << val[1]
|
|
list.children += val[2]
|
|
result = list
|
|
}
|
|
;
|
|
|
|
list_contents:
|
|
{ result = [] }
|
|
list_item list_contents
|
|
{
|
|
result << val[1]
|
|
result += val[2]
|
|
}
|
|
|
|
|
{ result = [] }
|
|
;
|
|
|
|
list_item:
|
|
LI_START LI_END
|
|
{
|
|
result = ListItemAST.new(@ast_index, @ast_length)
|
|
}
|
|
| LI_START repeated_contents LI_END
|
|
{
|
|
li = ListItemAST.new(@ast_index, @ast_length)
|
|
li.children += val[1]
|
|
result = li
|
|
}
|
|
;
|
|
|
|
dictionary_list:
|
|
DL_START dictionary_term dictionary_contents DL_END
|
|
{
|
|
result = [val[1]]
|
|
result += val[2]
|
|
}
|
|
| DL_START dictionary_contents DL_END
|
|
{
|
|
result = val[1]
|
|
}
|
|
;
|
|
|
|
dictionary_term:
|
|
DT_START DT_END
|
|
{
|
|
result = ListTermAST.new(@ast_index, @ast_length)
|
|
}
|
|
| DT_START repeated_contents DT_END
|
|
{
|
|
term = ListTermAST.new(@ast_index, @ast_length)
|
|
term.children += val[1]
|
|
result = term
|
|
}
|
|
|
|
dictionary_contents:
|
|
dictionary_definition dictionary_contents
|
|
{
|
|
result = [val[0]]
|
|
result += val[1] if val[1]
|
|
}
|
|
|
|
|
{
|
|
result = []
|
|
}
|
|
|
|
dictionary_definition:
|
|
DD_START DD_END
|
|
{
|
|
result = ListDefinitionAST.new(@ast_index, @ast_length)
|
|
}
|
|
| DD_START repeated_contents DD_END
|
|
{
|
|
term = ListDefinitionAST.new(@ast_index, @ast_length)
|
|
term.children += val[1]
|
|
result = term
|
|
}
|
|
|
|
preformatted: PRE_START repeated_contents PRE_END
|
|
{
|
|
p = PreformattedAST.new(@ast_index, @ast_length)
|
|
p.children += val[1]
|
|
result = p
|
|
}
|
|
| PREINDENT_START repeated_contents PREINDENT_END
|
|
{
|
|
p = PreformattedAST.new(@ast_index, @ast_length)
|
|
p.indented = true
|
|
p.children += val[1]
|
|
result = p
|
|
}
|
|
;
|
|
|
|
section: SECTION_START repeated_contents SECTION_END
|
|
{ result = [val[1], val[0].length]
|
|
s = SectionAST.new(@ast_index, @ast_length)
|
|
s.children = val[1]
|
|
s.level = val[0].length
|
|
result = s
|
|
}
|
|
;
|
|
|
|
template: TEMPLATE_START TEXT template_parameters TEMPLATE_END
|
|
{
|
|
t = TemplateAST.new(@ast_index, @ast_length)
|
|
t.template_name = val[1]
|
|
t.children = val[2] unless val[2].nil? or val[2].empty?
|
|
result = t
|
|
}
|
|
;
|
|
|
|
template_parameters:
|
|
{
|
|
result = nil
|
|
}
|
|
| INTLINKSEP TEXT template_parameters
|
|
{
|
|
p = TemplateParameterAST.new(@ast_index, @ast_length)
|
|
p.parameter_value = val[1]
|
|
result = [p]
|
|
result += val[2] if val[2]
|
|
}
|
|
| INTLINKSEP template template_parameters
|
|
{
|
|
p = TemplateParameterAST.new(@ast_index, @ast_length)
|
|
p.children << val[1]
|
|
result = [p]
|
|
result += val[2] if val[2]
|
|
}
|
|
;
|
|
|
|
end
|
|
|
|
---- header ----
|
|
require 'mediacloth/mediawikiast'
|
|
|
|
---- inner ----
|
|
|
|
attr_accessor :lexer
|
|
|
|
def initialize
|
|
@nodes = []
|
|
@context = []
|
|
@wiki_ast_length = 0
|
|
super
|
|
end
|
|
|
|
#Tokenizes input string and parses it.
|
|
def parse(input)
|
|
@yydebug=true
|
|
lexer.tokenize(input)
|
|
do_parse
|
|
return @nodes.last
|
|
end
|
|
|
|
#Asks the lexer to return the next token.
|
|
def next_token
|
|
token = @lexer.lex
|
|
if token[0].to_s.upcase.include? "_START"
|
|
@context << token[2..3]
|
|
elsif token[0].to_s.upcase.include? "_END"
|
|
@ast_index = @context.last[0]
|
|
@ast_length = token[2] + token[3] - @context.last[0]
|
|
@context.pop
|
|
else
|
|
@ast_index = token[2]
|
|
@ast_length = token[3]
|
|
end
|
|
|
|
@wiki_ast_length += token[3]
|
|
|
|
return token[0..1]
|
|
end
|