ruby/test/racc/assets/machete.y

424 строки
10 KiB
Plaintext

# Copyright (c) 2011 SUSE
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
class Machete::Parser
token NIL
token TRUE
token FALSE
token INTEGER
token SYMBOL
token STRING
token REGEXP
token ANY
token EVEN
token ODD
token METHOD_NAME
token CLASS_NAME
start expression
rule
expression : primary
| expression "|" primary {
result = if val[0].is_a?(ChoiceMatcher)
ChoiceMatcher.new(val[0].alternatives << val[2])
else
ChoiceMatcher.new([val[0], val[2]])
end
}
primary : node
| array
| literal
| any
node : CLASS_NAME {
result = NodeMatcher.new(val[0].to_sym)
}
| CLASS_NAME "<" attrs ">" {
result = NodeMatcher.new(val[0].to_sym, val[2])
}
attrs : attr
| attrs "," attr { result = val[0].merge(val[2]) }
attr : method_name "=" expression { result = { val[0].to_sym => val[2] } }
| method_name "^=" SYMBOL {
result = {
val[0].to_sym => SymbolRegexpMatcher.new(
Regexp.new("^" + Regexp.escape(symbol_value(val[2]).to_s))
)
}
}
| method_name "$=" SYMBOL {
result = {
val[0].to_sym => SymbolRegexpMatcher.new(
Regexp.new(Regexp.escape(symbol_value(val[2]).to_s) + "$")
)
}
}
| method_name "*=" SYMBOL {
result = {
val[0].to_sym => SymbolRegexpMatcher.new(
Regexp.new(Regexp.escape(symbol_value(val[2]).to_s))
)
}
}
| method_name "^=" STRING {
result = {
val[0].to_sym => StringRegexpMatcher.new(
Regexp.new("^" + Regexp.escape(string_value(val[2])))
)
}
}
| method_name "$=" STRING {
result = {
val[0].to_sym => StringRegexpMatcher.new(
Regexp.new(Regexp.escape(string_value(val[2])) + "$")
)
}
}
| method_name "*=" STRING {
result = {
val[0].to_sym => StringRegexpMatcher.new(
Regexp.new(Regexp.escape(string_value(val[2])))
)
}
}
| method_name "*=" REGEXP {
result = {
val[0].to_sym => IndifferentRegexpMatcher.new(
Regexp.new(regexp_value(val[2]))
)
}
}
# Hack to overcome the fact that some tokens will lex as simple tokens, not
# METHOD_NAME tokens, and that "reserved words" will lex as separate kinds of
# tokens.
method_name : METHOD_NAME
| NIL
| TRUE
| FALSE
| ANY
| EVEN
| ODD
| "*"
| "+"
| "<"
| ">"
| "^"
| "|"
array : "[" items_opt "]" { result = ArrayMatcher.new(val[1]) }
items_opt : /* empty */ { result = [] }
| items
items : item { result = [val[0]] }
| items "," item { result = val[0] << val[2] }
item : expression
| expression quantifier { result = Quantifier.new(val[0], *val[1]) }
quantifier : "*" { result = [0, nil, 1] }
| "+" { result = [1, nil, 1] }
| "?" { result = [0, 1, 1] }
| "{" INTEGER "}" {
result = [integer_value(val[1]), integer_value(val[1]), 1]
}
| "{" INTEGER "," "}" {
result = [integer_value(val[1]), nil, 1]
}
| "{" "," INTEGER "}" {
result = [0, integer_value(val[2]), 1]
}
| "{" INTEGER "," INTEGER "}" {
result = [integer_value(val[1]), integer_value(val[3]), 1]
}
| "{" EVEN "}" { result = [0, nil, 2] }
| "{" ODD "}" { result = [1, nil, 2] }
literal : NIL { result = LiteralMatcher.new(nil) }
| TRUE { result = LiteralMatcher.new(true) }
| FALSE { result = LiteralMatcher.new(false) }
| INTEGER { result = LiteralMatcher.new(integer_value(val[0])) }
| SYMBOL { result = LiteralMatcher.new(symbol_value(val[0])) }
| STRING { result = LiteralMatcher.new(string_value(val[0])) }
| REGEXP { result = LiteralMatcher.new(regexp_value(val[0])) }
any : ANY { result = AnyMatcher.new }
---- inner
include Matchers
class SyntaxError < StandardError; end
def parse(input)
@input = input
@pos = 0
do_parse
end
private
def integer_value(value)
if value =~ /^0[bB]/
value[2..-1].to_i(2)
elsif value =~ /^0[oO]/
value[2..-1].to_i(8)
elsif value =~ /^0[dD]/
value[2..-1].to_i(10)
elsif value =~ /^0[xX]/
value[2..-1].to_i(16)
elsif value =~ /^0/
value.to_i(8)
else
value.to_i
end
end
def symbol_value(value)
value[1..-1].to_sym
end
def string_value(value)
quote = value[0..0]
if quote == "'"
value[1..-2].gsub("\\\\", "\\").gsub("\\'", "'")
elsif quote == '"'
value[1..-2].
gsub("\\\\", "\\").
gsub('\\"', '"').
gsub("\\n", "\n").
gsub("\\t", "\t").
gsub("\\r", "\r").
gsub("\\f", "\f").
gsub("\\v", "\v").
gsub("\\a", "\a").
gsub("\\e", "\e").
gsub("\\b", "\b").
gsub("\\s", "\s").
gsub(/\\([0-7]{1,3})/) { $1.to_i(8).chr }.
gsub(/\\x([0-9a-fA-F]{1,2})/) { $1.to_i(16).chr }
else
raise "Unknown quote: #{quote.inspect}."
end
end
REGEXP_OPTIONS = {
'i' => Regexp::IGNORECASE,
'm' => Regexp::MULTILINE,
'x' => Regexp::EXTENDED
}
def regexp_value(value)
/\A\/(.*)\/([imx]*)\z/ =~ value
pattern, options = $1, $2
Regexp.new(pattern, options.chars.map { |ch| REGEXP_OPTIONS[ch] }.inject(:|))
end
# "^" needs to be here because if it were among operators recognized by
# METHOD_NAME, "^=" would be recognized as two tokens.
SIMPLE_TOKENS = [
"|",
"<",
">",
",",
"=",
"^=",
"^",
"$=",
"[",
"]",
"*=",
"*",
"+",
"?",
"{",
"}"
]
COMPLEX_TOKENS = [
[:NIL, /^nil/],
[:TRUE, /^true/],
[:FALSE, /^false/],
# INTEGER needs to be before METHOD_NAME, otherwise e.g. "+1" would be
# recognized as two tokens.
[
:INTEGER,
/^
[+-]? # sign
(
0[bB][01]+(_[01]+)* # binary (prefixed)
|
0[oO][0-7]+(_[0-7]+)* # octal (prefixed)
|
0[dD]\d+(_\d+)* # decimal (prefixed)
|
0[xX][0-9a-fA-F]+(_[0-9a-fA-F]+)* # hexadecimal (prefixed)
|
0[0-7]*(_[0-7]+)* # octal (unprefixed)
|
[1-9]\d*(_\d+)* # decimal (unprefixed)
)
/x
],
[
:SYMBOL,
/^
:
(
# class name
[A-Z][a-zA-Z0-9_]*
|
# regular method name
[a-z_][a-zA-Z0-9_]*[?!=]?
|
# instance variable name
@[a-zA-Z_][a-zA-Z0-9_]*
|
# class variable name
@@[a-zA-Z_][a-zA-Z0-9_]*
|
# operator (sorted by length, then alphabetically)
(<=>|===|\[\]=|\*\*|\+@|-@|<<|<=|==|=~|>=|>>|\[\]|[%&*+\-\/<>^`|~])
)
/x
],
[
:STRING,
/^
(
' # sinqle-quoted string
(
\\[\\'] # escape
|
[^'] # regular character
)*
'
|
" # double-quoted string
(
\\ # escape
(
[\\"ntrfvaebs] # one-character escape
|
[0-7]{1,3} # octal number escape
|
x[0-9a-fA-F]{1,2} # hexadecimal number escape
)
|
[^"] # regular character
)*
"
)
/x
],
[
:REGEXP,
/^
\/
(
\\ # escape
(
[\\\/ntrfvaebs\(\)\[\]\{\}\-\.\?\*\+\|\^\$] # one-character escape
|
[0-7]{2,3} # octal number escape
|
x[0-9a-fA-F]{1,2} # hexadecimal number escape
)
|
[^\/] # regular character
)*
\/
[imx]*
/x
],
# ANY, EVEN and ODD need to be before METHOD_NAME, otherwise they would be
# recognized as method names.
[:ANY, /^any/],
[:EVEN, /^even/],
[:ODD, /^odd/],
# We exclude "*", "+", "<", ">", "^" and "|" from method names since they are
# lexed as simple tokens. This is because they have also other meanings in
# Machette patterns beside Ruby method names.
[
:METHOD_NAME,
/^
(
# regular name
[a-z_][a-zA-Z0-9_]*[?!=]?
|
# operator (sorted by length, then alphabetically)
(<=>|===|\[\]=|\*\*|\+@|-@|<<|<=|==|=~|>=|>>|\[\]|[%&\-\/`~])
)
/x
],
[:CLASS_NAME, /^[A-Z][a-zA-Z0-9_]*/]
]
def next_token
skip_whitespace
return false if remaining_input.empty?
# Complex tokens need to be before simple tokens, otherwise e.g. "<<" would be
# recognized as two tokens.
COMPLEX_TOKENS.each do |type, regexp|
if remaining_input =~ regexp
@pos += $&.length
return [type, $&]
end
end
SIMPLE_TOKENS.each do |token|
if remaining_input[0...token.length] == token
@pos += token.length
return [token, token]
end
end
raise SyntaxError, "Unexpected character: #{remaining_input[0..0].inspect}."
end
def skip_whitespace
if remaining_input =~ /\A^[ \t\r\n]+/
@pos += $&.length
end
end
def remaining_input
@input[@pos..-1]
end
def on_error(error_token_id, error_value, value_stack)
raise SyntaxError, "Unexpected token: #{error_value.inspect}."
end