ruby/lib/rdoc/markdown/literals.rb

456 строки
10 KiB
Ruby

# coding: UTF-8
# frozen_string_literal: true
# :markup: markdown
##
#--
# This set of literals is for Ruby 1.9 regular expressions and gives full
# unicode support.
#
# Unlike peg-markdown, this set of literals recognizes Unicode alphanumeric
# characters, newlines and spaces.
class RDoc::Markdown::Literals
# :stopdoc:
# This is distinct from setup_parser so that a standalone parser
# can redefine #initialize and still have access to the proper
# parser setup code.
def initialize(str, debug=false)
setup_parser(str, debug)
end
# Prepares for parsing +str+. If you define a custom initialize you must
# call this method before #parse
def setup_parser(str, debug=false)
set_string str, 0
@memoizations = Hash.new { |h,k| h[k] = {} }
@result = nil
@failed_rule = nil
@failing_rule_offset = -1
@line_offsets = nil
setup_foreign_grammar
end
attr_reader :string
attr_reader :failing_rule_offset
attr_accessor :result, :pos
def current_column(target=pos)
if string[target] == "\n" && (c = string.rindex("\n", target-1) || -1)
return target - c
elsif c = string.rindex("\n", target)
return target - c
end
target + 1
end
def position_line_offsets
unless @position_line_offsets
@position_line_offsets = []
total = 0
string.each_line do |line|
total += line.size
@position_line_offsets << total
end
end
@position_line_offsets
end
if [].respond_to? :bsearch_index
def current_line(target=pos)
if line = position_line_offsets.bsearch_index {|x| x > target }
return line + 1
end
raise "Target position #{target} is outside of string"
end
else
def current_line(target=pos)
if line = position_line_offsets.index {|x| x > target }
return line + 1
end
raise "Target position #{target} is outside of string"
end
end
def current_character(target=pos)
if target < 0 || target >= string.size
raise "Target position #{target} is outside of string"
end
string[target, 1]
end
KpegPosInfo = Struct.new(:pos, :lno, :col, :line, :char)
def current_pos_info(target=pos)
l = current_line target
c = current_column target
ln = get_line(l-1)
chr = string[target,1]
KpegPosInfo.new(target, l, c, ln, chr)
end
def lines
string.lines
end
def get_line(no)
loff = position_line_offsets
if no < 0
raise "Line No is out of range: #{no} < 0"
elsif no >= loff.size
raise "Line No is out of range: #{no} >= #{loff.size}"
end
lend = loff[no]-1
lstart = no > 0 ? loff[no-1] : 0
string[lstart..lend]
end
def get_text(start)
@string[start..@pos-1]
end
# Sets the string and current parsing position for the parser.
def set_string string, pos
@string = string
@string_size = string ? string.size : 0
@pos = pos
@position_line_offsets = nil
end
def show_pos
width = 10
if @pos < width
"#{@pos} (\"#{@string[0,@pos]}\" @ \"#{@string[@pos,width]}\")"
else
"#{@pos} (\"... #{@string[@pos - width, width]}\" @ \"#{@string[@pos,width]}\")"
end
end
def failure_info
l = current_line @failing_rule_offset
c = current_column @failing_rule_offset
if @failed_rule.kind_of? Symbol
info = self.class::Rules[@failed_rule]
"line #{l}, column #{c}: failed rule '#{info.name}' = '#{info.rendered}'"
else
"line #{l}, column #{c}: failed rule '#{@failed_rule}'"
end
end
def failure_caret
p = current_pos_info @failing_rule_offset
"#{p.line.chomp}\n#{' ' * (p.col - 1)}^"
end
def failure_character
current_character @failing_rule_offset
end
def failure_oneline
p = current_pos_info @failing_rule_offset
if @failed_rule.kind_of? Symbol
info = self.class::Rules[@failed_rule]
"@#{p.lno}:#{p.col} failed rule '#{info.name}', got '#{p.char}'"
else
"@#{p.lno}:#{p.col} failed rule '#{@failed_rule}', got '#{p.char}'"
end
end
class ParseError < RuntimeError
end
def raise_error
raise ParseError, failure_oneline
end
def show_error(io=STDOUT)
error_pos = @failing_rule_offset
p = current_pos_info(error_pos)
io.puts "On line #{p.lno}, column #{p.col}:"
if @failed_rule.kind_of? Symbol
info = self.class::Rules[@failed_rule]
io.puts "Failed to match '#{info.rendered}' (rule '#{info.name}')"
else
io.puts "Failed to match rule '#{@failed_rule}'"
end
io.puts "Got: #{p.char.inspect}"
io.puts "=> #{p.line}"
io.print(" " * (p.col + 2))
io.puts "^"
end
def set_failed_rule(name)
if @pos > @failing_rule_offset
@failed_rule = name
@failing_rule_offset = @pos
end
end
attr_reader :failed_rule
def match_string(str)
len = str.size
if @string[pos,len] == str
@pos += len
return str
end
return nil
end
def scan(reg)
if m = reg.match(@string, @pos)
@pos = m.end(0)
return true
end
return nil
end
if "".respond_to? :ord
def get_byte
if @pos >= @string_size
return nil
end
s = @string[@pos].ord
@pos += 1
s
end
else
def get_byte
if @pos >= @string_size
return nil
end
s = @string[@pos]
@pos += 1
s
end
end
def parse(rule=nil)
# We invoke the rules indirectly via apply
# instead of by just calling them as methods because
# if the rules use left recursion, apply needs to
# manage that.
if !rule
apply(:_root)
else
method = rule.gsub("-","_hyphen_")
apply :"_#{method}"
end
end
class MemoEntry
def initialize(ans, pos)
@ans = ans
@pos = pos
@result = nil
@set = false
@left_rec = false
end
attr_reader :ans, :pos, :result, :set
attr_accessor :left_rec
def move!(ans, pos, result)
@ans = ans
@pos = pos
@result = result
@set = true
@left_rec = false
end
end
def external_invoke(other, rule, *args)
old_pos = @pos
old_string = @string
set_string other.string, other.pos
begin
if val = __send__(rule, *args)
other.pos = @pos
other.result = @result
else
other.set_failed_rule "#{self.class}##{rule}"
end
val
ensure
set_string old_string, old_pos
end
end
def apply_with_args(rule, *args)
@result = nil
memo_key = [rule, args]
if m = @memoizations[memo_key][@pos]
@pos = m.pos
if !m.set
m.left_rec = true
return nil
end
@result = m.result
return m.ans
else
m = MemoEntry.new(nil, @pos)
@memoizations[memo_key][@pos] = m
start_pos = @pos
ans = __send__ rule, *args
lr = m.left_rec
m.move! ans, @pos, @result
# Don't bother trying to grow the left recursion
# if it's failing straight away (thus there is no seed)
if ans and lr
return grow_lr(rule, args, start_pos, m)
else
return ans
end
end
end
def apply(rule)
@result = nil
if m = @memoizations[rule][@pos]
@pos = m.pos
if !m.set
m.left_rec = true
return nil
end
@result = m.result
return m.ans
else
m = MemoEntry.new(nil, @pos)
@memoizations[rule][@pos] = m
start_pos = @pos
ans = __send__ rule
lr = m.left_rec
m.move! ans, @pos, @result
# Don't bother trying to grow the left recursion
# if it's failing straight away (thus there is no seed)
if ans and lr
return grow_lr(rule, nil, start_pos, m)
else
return ans
end
end
end
def grow_lr(rule, args, start_pos, m)
while true
@pos = start_pos
@result = m.result
if args
ans = __send__ rule, *args
else
ans = __send__ rule
end
return nil unless ans
break if @pos <= m.pos
m.move! ans, @pos, @result
end
@result = m.result
@pos = m.pos
return m.ans
end
class RuleInfo
def initialize(name, rendered)
@name = name
@rendered = rendered
end
attr_reader :name, :rendered
end
def self.rule_info(name, rendered)
RuleInfo.new(name, rendered)
end
# :startdoc:
# :stopdoc:
def setup_foreign_grammar; end
# Alphanumeric = /\p{Word}/
def _Alphanumeric
_tmp = scan(/\G(?-mix:\p{Word})/)
set_failed_rule :_Alphanumeric unless _tmp
return _tmp
end
# AlphanumericAscii = /[A-Za-z0-9]/
def _AlphanumericAscii
_tmp = scan(/\G(?-mix:[A-Za-z0-9])/)
set_failed_rule :_AlphanumericAscii unless _tmp
return _tmp
end
# BOM = "uFEFF"
def _BOM
_tmp = match_string("uFEFF")
set_failed_rule :_BOM unless _tmp
return _tmp
end
# Newline = /\n|\r\n?|\p{Zl}|\p{Zp}/
def _Newline
_tmp = scan(/\G(?-mix:\n|\r\n?|\p{Zl}|\p{Zp})/)
set_failed_rule :_Newline unless _tmp
return _tmp
end
# NonAlphanumeric = /\p{^Word}/
def _NonAlphanumeric
_tmp = scan(/\G(?-mix:\p{^Word})/)
set_failed_rule :_NonAlphanumeric unless _tmp
return _tmp
end
# Spacechar = /\t|\p{Zs}/
def _Spacechar
_tmp = scan(/\G(?-mix:\t|\p{Zs})/)
set_failed_rule :_Spacechar unless _tmp
return _tmp
end
Rules = {}
Rules[:_Alphanumeric] = rule_info("Alphanumeric", "/\\p{Word}/")
Rules[:_AlphanumericAscii] = rule_info("AlphanumericAscii", "/[A-Za-z0-9]/")
Rules[:_BOM] = rule_info("BOM", "\"uFEFF\"")
Rules[:_Newline] = rule_info("Newline", "/\\n|\\r\\n?|\\p{Zl}|\\p{Zp}/")
Rules[:_NonAlphanumeric] = rule_info("NonAlphanumeric", "/\\p{^Word}/")
Rules[:_Spacechar] = rule_info("Spacechar", "/\\t|\\p{Zs}/")
# :startdoc:
end