From 94b740b2499242e1aca67f7bbf595e75e63abc40 Mon Sep 17 00:00:00 2001 From: aycabta Date: Sun, 28 Apr 2019 03:41:06 +0900 Subject: [PATCH] Use Ripper for IRB The debug option of IRB is deleted because it's just for IRB's pure Ruby parser. --- doc/irb/irb.rd.ja | 6 - lib/irb.rb | 3 - lib/irb/context.rb | 20 - lib/irb/init.rb | 4 - lib/irb/lc/help-message | 1 - lib/irb/lc/ja/help-message | 2 - lib/irb/ruby-lex.rb | 1283 ++++++------------------------------ test/irb/test_ruby-lex.rb | 108 --- 8 files changed, 204 insertions(+), 1223 deletions(-) delete mode 100644 test/irb/test_ruby-lex.rb diff --git a/doc/irb/irb.rd.ja b/doc/irb/irb.rd.ja index 85b6536ee4..0522b3fa3d 100644 --- a/doc/irb/irb.rd.ja +++ b/doc/irb/irb.rd.ja @@ -70,8 +70,6 @@ irbの使い方は, Rubyさえ知っていればいたって簡単です. 基本 --back-trace-limit n バックトレース表示をバックトレースの頭から n, 後ろ からnだけ行なう. デフォルトは16 - --irb_debug n irbのデバッグデバッグレベルをnに設定する(利用しな - い方が無難でしょう). -v, --version irbのバージョンを表示する = コンフィギュレーション @@ -97,7 +95,6 @@ irb起動時に``~/.irbrc''を読み込みます. もし存在しない場合は IRB.conf[:IGNORE_EOF] = false IRB.conf[:PROMPT_MODE] = :DEFAULT IRB.conf[:PROMPT] = {...} - IRB.conf[:DEBUG_LEVEL]=0 IRB.conf[:VERBOSE]=true == プロンプトの設定 @@ -183,9 +180,6 @@ irb拡張コマンドは, 簡単な名前と頭に`irb_'をつけた名前と両 バックトレース表示をバックトレースの頭からn, 後ろからnだけ行なう. デフォルトは16 ---- conf.debug_level = N - irb用のデバッグレベルの設定 - --- conf.ignore_eof = true/false ^Dが入力された時の動作を設定する. trueの時は^Dを無視する, falseの 時はirbを終了する. diff --git a/lib/irb.rb b/lib/irb.rb index ba12bdbcab..d0246b077c 100644 --- a/lib/irb.rb +++ b/lib/irb.rb @@ -74,7 +74,6 @@ require "irb/version" # --back-trace-limit n # Display backtrace top n and tail n. The default # value is 16. -# --irb_debug n Set internal debug level to n (not for popular use) # -v, --version Print the version of irb # # == Configuration @@ -102,7 +101,6 @@ require "irb/version" # IRB.conf[:IGNORE_EOF] = false # IRB.conf[:PROMPT_MODE] = :DEFAULT # IRB.conf[:PROMPT] = {...} -# IRB.conf[:DEBUG_LEVEL]=0 # # === Auto indentation # @@ -413,7 +411,6 @@ module IRB @signal_status = :IN_IRB @scanner = RubyLex.new - @scanner.exception_on_syntax_error = false end def run(conf = IRB.conf) diff --git a/lib/irb/context.rb b/lib/irb/context.rb index e8e6a118e6..f8a6009d17 100644 --- a/lib/irb/context.rb +++ b/lib/irb/context.rb @@ -101,7 +101,6 @@ module IRB if @echo.nil? @echo = true end - self.debug_level = IRB.conf[:DEBUG_LEVEL] end # The top-level workspace, see WorkSpace#main @@ -211,10 +210,6 @@ module IRB # # A copy of the default IRB.conf[:VERBOSE] attr_accessor :verbose - # The debug level of irb - # - # See #debug_level= for more information. - attr_reader :debug_level # The limit of backtrace lines displayed as top +n+ and tail +n+. # @@ -361,21 +356,6 @@ module IRB print "Do nothing." end - # Sets the debug level of irb - # - # Can also be set using the +--irb_debug+ command line option. - # - # See IRB@Command+line+options for more command line options. - def debug_level=(value) - @debug_level = value - RubyLex.debug_level = value - end - - # Whether or not debug mode is enabled, see #debug_level=. - def debug? - @debug_level > 0 - end - def evaluate(line, line_no, exception: nil) # :nodoc: @line_no = line_no if exception diff --git a/lib/irb/init.rb b/lib/irb/init.rb index 2066d8cb64..344b243f12 100644 --- a/lib/irb/init.rb +++ b/lib/irb/init.rb @@ -112,8 +112,6 @@ module IRB # :nodoc: @CONF[:LC_MESSAGES] = Locale.new @CONF[:AT_EXIT] = [] - - @CONF[:DEBUG_LEVEL] = 0 end def IRB.init_error @@ -191,8 +189,6 @@ module IRB # :nodoc: @CONF[:CONTEXT_MODE] = ($1 || argv.shift).to_i when "--single-irb" @CONF[:SINGLE_IRB] = true - when /^--irb_debug(?:=(.+))?/ - @CONF[:DEBUG_LEVEL] = ($1 || argv.shift).to_i when "-v", "--version" print IRB.version, "\n" exit 0 diff --git a/lib/irb/lc/help-message b/lib/irb/lc/help-message index d43c6a1695..d1a66dddda 100644 --- a/lib/irb/lc/help-message +++ b/lib/irb/lc/help-message @@ -39,7 +39,6 @@ Usage: irb.rb [options] [programfile] [arguments] --back-trace-limit n Display backtrace top n and tail n. The default value is 16. - --irb_debug n Set internal debug level to n (not for popular use) --verbose Show details --noverbose Don't show details -v, --version Print the version of irb diff --git a/lib/irb/lc/ja/help-message b/lib/irb/lc/ja/help-message index 1b24d14d28..7a15f973c6 100644 --- a/lib/irb/lc/ja/help-message +++ b/lib/irb/lc/ja/help-message @@ -41,8 +41,6 @@ Usage: irb.rb [options] [programfile] [arguments] バックトレース表示をバックトレースの頭から n, 後ろ からnだけ行なう. デフォルトは16 - --irb_debug n irbのデバッグレベルをnに設定する(非推奨). - --verbose 詳細なメッセージを出力する. --noverbose 詳細なメッセージを出力しない(デフォルト). -v, --version irbのバージョンを表示する. diff --git a/lib/irb/ruby-lex.rb b/lib/irb/ruby-lex.rb index 555d1f024f..c4bec4a854 100644 --- a/lib/irb/ruby-lex.rb +++ b/lib/irb/ruby-lex.rb @@ -11,73 +11,39 @@ # require "e2mmap" -require_relative "slex" -require_relative "ruby-token" +require "ripper" # :stopdoc: class RubyLex extend Exception2MessageMapper - def_exception(:AlreadyDefinedToken, "Already defined token(%s)") - def_exception(:TkReading2TokenNoKey, "key nothing(key='%s')") - def_exception(:TkSymbol2TokenNoKey, "key nothing(key='%s')") - def_exception(:TkReading2TokenDuplicateError, - "key duplicate(token_n='%s', key='%s')") - def_exception(:SyntaxError, "%s") - def_exception(:TerminateLineInput, "Terminate Line Input") - include RubyToken - - class << self - attr_accessor :debug_level - def debug? - @debug_level > 0 - end - end - @debug_level = 0 - def initialize - lex_init - set_input(STDIN) - - @seek = 0 @exp_line_no = @line_no = 1 - @base_char_no = 0 - @char_no = 0 - @rests = [] - @readed = [] - @here_readed = [] - @indent = 0 - @indent_stack = [] - @lex_state = EXPR_BEG - @space_seen = false - @here_header = false - @post_symbeg = false - @continue = false @line = "" - - @skip_space = false - @readed_auto_clean_up = false - @exception_on_syntax_error = true - @prompt = nil end - attr_accessor :skip_space - attr_accessor :readed_auto_clean_up - attr_accessor :exception_on_syntax_error - - attr_reader :seek - attr_reader :char_no - attr_reader :line_no - attr_reader :indent - # io functions def set_input(io, p = nil, &block) @io = io + if @io.respond_to?(:check_termination) + @io.check_termination do |code| + @tokens = Ripper.lex(code) + continue = process_continue + code_block_open = check_code_block(code) + indent = process_nesting_level + ltype = process_literal_type + if code_block_open or ltype or continue or indent > 0 + false + else + true + end + end + end if p.respond_to?(:call) @input = p elsif block_given? @@ -87,112 +53,6 @@ class RubyLex end end - def get_readed - if idx = @readed.rindex("\n") - @base_char_no = @readed.size - (idx + 1) - else - @base_char_no += @readed.size - end - - readed = @readed.join("") - @readed = [] - readed - end - - def getc - while @rests.empty? - @rests.push nil unless buf_input - end - c = @rests.shift - if @here_header - @here_readed.push c - else - @readed.push c - end - @seek += 1 - if c == "\n" - @line_no += 1 - @char_no = 0 - else - @char_no += 1 - end - c - end - - def gets - l = "" - while c = getc - l.concat(c) - break if c == "\n" - end - return nil if l == "" and c.nil? - l - end - - def eof? - @io.eof? - end - - def getc_of_rests - if @rests.empty? - nil - else - getc - end - end - - def ungetc(c = nil) - if @here_readed.empty? - c2 = @readed.pop - else - c2 = @here_readed.pop - end - c = c2 unless c - @rests.unshift c #c = - @seek -= 1 - if c == "\n" - @line_no -= 1 - if idx = @readed.rindex("\n") - @char_no = idx + 1 - else - @char_no = @base_char_no + @readed.size - end - else - @char_no -= 1 - end - end - - def peek_equal?(str) - chrs = str.split(//) - until @rests.size >= chrs.size - return false unless buf_input - end - @rests[0, chrs.size] == chrs - end - - def peek_match?(regexp) - while @rests.empty? - return false unless buf_input - end - regexp =~ @rests.join("") - end - - def peek(i = 0) - while @rests.size <= i - return nil unless buf_input - end - @rests[i] - end - - def buf_input - prompt - line = @input.call - return nil unless line - @rests.concat line.chars.to_a - true - end - private :buf_input - def set_prompt(p = nil, &block) p = block if block_given? if p.respond_to?(:call) @@ -210,20 +70,11 @@ class RubyLex def initialize_input @ltype = nil - @quoted = nil @indent = 0 - @indent_stack = [] - @lex_state = EXPR_BEG - @space_seen = false - @here_header = false - @continue = false - @post_symbeg = false - - prompt - @line = "" @exp_line_no = @line_no + @code_block_open = false end def each_top_level_statement @@ -231,13 +82,14 @@ class RubyLex catch(:TERM_INPUT) do loop do begin - @continue = false prompt unless l = lex throw :TERM_INPUT if @line == '' else + @line_no += 1 + next if l == "\n" @line.concat l - if @ltype or @continue or @indent > 0 + if @code_block_open or @ltype or @continue or @indent > 0 next end end @@ -250,930 +102,203 @@ class RubyLex @exp_line_no = @line_no @indent = 0 - @indent_stack = [] - prompt rescue TerminateLineInput initialize_input prompt - get_readed end end end end def lex - continue = @continue - while tk = token - case tk - when TkNL, TkEND_OF_SCRIPT - @continue = continue unless continue.nil? - break unless @continue - when TkSPACE, TkCOMMENT - when TkSEMICOLON, TkBEGIN, TkELSE - @continue = continue = false - else - continue = nil + line = @input.call + if @io.respond_to?(:check_termination) + return line # multiline + end + code = @line + (line.nil? ? '' : line) + code.gsub!(/\n*$/, '').concat("\n") + @tokens = Ripper.lex(code) + @continue = process_continue + @code_block_open = check_code_block(code) + @indent = process_nesting_level + @ltype = process_literal_type + line + end + + def process_continue + continued_bits = Ripper::EXPR_BEG | Ripper::EXPR_FNAME | Ripper::EXPR_DOT + # last token is always newline + if @tokens.size >= 2 and @tokens[-2][1] == :on_regexp_end + # end of regexp literal + return false + elsif @tokens.size >= 2 and @tokens[-2][1] == :on_semicolon + return false + elsif @tokens.size >= 2 and @tokens[-2][1] == :on_kw and (@tokens[-2][2] == 'begin' or @tokens[-2][2] == 'else') + return false + elsif !@tokens.empty? and @tokens.last[2] == "\\\n" + return true + elsif @tokens.size >= 2 and @tokens[-2][3].anybits?(continued_bits) + # end of literal except for regexp + return true + end + false + end + + def check_code_block(code) + return true if @tokens.empty? + if @tokens.last[1] == :on_heredoc_beg + return true + end + + begin # check if parser error are available + RubyVM::InstructionSequence.compile(code) + rescue SyntaxError => e + case e.message + when /unterminated (?:string|regexp) meets end of file/ + # "unterminated regexp meets end of file" + # + # example: + # / + # + # "unterminated string meets end of file" + # + # example: + # ' + return true + when /syntax error, unexpected end-of-input/ + # "syntax error, unexpected end-of-input, expecting keyword_end" + # + # example: + # if ture + # hoge + # if false + # fuga + # end + return true + when /syntax error, unexpected keyword_end/ + # "syntax error, unexpected keyword_end" + # + # example: + # if ( + # end + # + # example: + # end + return false + when /unexpected tREGEXP_BEG/ + # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('" + # + # example: + # method / f / + return false end end - line = get_readed - if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil? + + last_lex_state = @tokens.last[3] + if last_lex_state.allbits?(Ripper::EXPR_BEG) + return false + elsif last_lex_state.allbits?(Ripper::EXPR_DOT) + return true + elsif last_lex_state.allbits?(Ripper::EXPR_CLASS) + return true + elsif last_lex_state.allbits?(Ripper::EXPR_FNAME) + return true + elsif last_lex_state.allbits?(Ripper::EXPR_VALUE) + return true + elsif last_lex_state.allbits?(Ripper::EXPR_ARG) + return false + end + + false + end + + def process_nesting_level + @tokens.inject(0) { |indent, t| + case t[1] + when :on_lbracket, :on_lbrace, :on_lparen + indent += 1 + when :on_rbracket, :on_rbrace, :on_rparen + indent -= 1 + when :on_kw + case t[2] + when 'def', 'do', 'case', 'for', 'begin', 'class', 'module' + indent += 1 + when 'if', 'unless', 'while', 'until', 'rescue' + # postfix if/unless/while/until/rescue must be Ripper::EXPR_LABEL + indent += 1 unless t[3].allbits?(Ripper::EXPR_LABEL) + when 'end' + indent -= 1 + end + end + # percent literals are not indented + indent + } + end + + def check_string_literal + i = 0 + start_token = [] + end_type = [] + while i < @tokens.size + t = @tokens[i] + case t[1] + when :on_tstring_beg + start_token << t + end_type << :on_tstring_end + when :on_regexp_beg + start_token << t + end_type << :on_regexp_end + when :on_symbeg + if (i + 1) < @tokens.size and @tokens[i + 1][1] != :on_ident + start_token << t + end_type << :on_tstring_end + end + when :on_backtick + start_token << t + end_type << :on_tstring_end + when :on_qwords_beg, :on_words_beg, :on_qsymbols_beg, :on_symbols_beg + start_token << t + end_type << :on_tstring_end + when :on_heredoc_beg + start_token << t + end_type << :on_heredoc_end + when end_type.last + start_token.pop + end_type.pop + end + i += 1 + end + start_token.last.nil? ? '' : start_token.last + end + + def process_literal_type + start_token = check_string_literal + case start_token[1] + when :on_tstring_beg + case start_token[2] + when ?" then ?" + when /^%.$/ then ?" + when /^%Q.$/ then ?" + when ?' then ?' + when /^%q.$/ then ?' + end + when :on_regexp_beg then ?/ + when :on_symbeg then ?: + when :on_backtick then ?` + when :on_qwords_beg then ?] + when :on_words_beg then ?] + when :on_qsymbols_beg then ?] + when :on_symbols_beg then ?] + when :on_heredoc_beg + start_token[2] =~ /<<[-~]?(['"`])[_a-zA-Z0-9]+\1/ + case $1 + when ?" then ?" + when ?' then ?' + when ?` then ?` + else ?" + end + else nil - else - line - end - end - - def token - @prev_seek = @seek - @prev_line_no = @line_no - @prev_char_no = @char_no - begin - begin - tk = @OP.match(self) - @space_seen = tk.kind_of?(TkSPACE) - @lex_state = EXPR_END if @post_symbeg && tk.kind_of?(TkOp) - @post_symbeg = tk.kind_of?(TkSYMBEG) - rescue SyntaxError - raise if @exception_on_syntax_error - tk = TkError.new(@seek, @line_no, @char_no) - end - end while @skip_space and tk.kind_of?(TkSPACE) - if @readed_auto_clean_up - get_readed - end - tk - end - - ENINDENT_CLAUSE = [ - "case", "class", "def", "do", "for", "if", - "module", "unless", "until", "while", "begin" - ] - DEINDENT_CLAUSE = ["end" - ] - - PERCENT_LTYPE = { - "q" => "\'", - "Q" => "\"", - "x" => "\`", - "r" => "/", - "w" => "]", - "W" => "]", - "i" => "]", - "I" => "]", - "s" => ":" - } - - PERCENT_PAREN = { - "{" => "}", - "[" => "]", - "<" => ">", - "(" => ")" - } - - Ltype2Token = { - "\'" => TkSTRING, - "\"" => TkSTRING, - "\`" => TkXSTRING, - "/" => TkREGEXP, - "]" => TkDSTRING, - ":" => TkSYMBOL - } - DLtype2Token = { - "\"" => TkDSTRING, - "\`" => TkDXSTRING, - "/" => TkDREGEXP, - } - - def lex_init() - @OP = IRB::SLex.new - @OP.def_rules("\0", "\004", "\032") do |op, io| - Token(TkEND_OF_SCRIPT) - end - - @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |op, io| - @space_seen = true - while getc =~ /[ \t\f\r\13]/; end - ungetc - Token(TkSPACE) - end - - @OP.def_rule("#") do |op, io| - identify_comment - end - - @OP.def_rule("=begin", - proc{|op, io| @prev_char_no == 0 && peek(0) =~ /\s/}) do - |op, io| - @ltype = "=" - until getc == "\n"; end - until peek_equal?("=end") && peek(4) =~ /\s/ - until getc == "\n"; end - end - gets - @ltype = nil - Token(TkRD_COMMENT) - end - - @OP.def_rule("\n") do |op, io| - print "\\n\n" if RubyLex.debug? - case @lex_state - when EXPR_BEG, EXPR_FNAME, EXPR_DOT - @continue = true - else - @continue = false - @lex_state = EXPR_BEG - until (@indent_stack.empty? || - [TkLPAREN, TkLBRACK, TkLBRACE, - TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last)) - @indent_stack.pop - end - end - @here_header = false - @here_readed = [] - Token(TkNL) - end - - @OP.def_rules("*", "**", - "=", "==", "===", - "=~", "<=>", - "<", "<=", - ">", ">=", ">>", - "!", "!=", "!~") do - |op, io| - case @lex_state - when EXPR_FNAME, EXPR_DOT - @lex_state = EXPR_ARG - else - @lex_state = EXPR_BEG - end - Token(op) - end - - @OP.def_rules("<<") do - |op, io| - tk = nil - if @lex_state != EXPR_END && @lex_state != EXPR_CLASS && - (@lex_state != EXPR_ARG || @space_seen) - c = peek(0) - if /[-~"'`\w]/ =~ c - tk = identify_here_document - end - end - unless tk - tk = Token(op) - case @lex_state - when EXPR_FNAME, EXPR_DOT - @lex_state = EXPR_ARG - else - @lex_state = EXPR_BEG - end - end - tk - end - - @OP.def_rules("'", '"') do - |op, io| - identify_string(op) - end - - @OP.def_rules("`") do - |op, io| - if @lex_state == EXPR_FNAME - @lex_state = EXPR_END - Token(op) - else - identify_string(op) - end - end - - @OP.def_rules('?') do - |op, io| - if @lex_state == EXPR_END - @lex_state = EXPR_BEG - Token(TkQUESTION) - else - ch = getc - if @lex_state == EXPR_ARG && ch =~ /\s/ - ungetc - @lex_state = EXPR_BEG; - Token(TkQUESTION) - else - if (ch == '\\') - read_escape - end - @lex_state = EXPR_END - Token(TkINTEGER) - end - end - end - - @OP.def_rules("&", "&&", "|", "||") do - |op, io| - @lex_state = EXPR_BEG - Token(op) - end - - @OP.def_rules("+=", "-=", "*=", "**=", - "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do - |op, io| - @lex_state = EXPR_BEG - op =~ /^(.*)=$/ - Token(TkOPASGN, $1) - end - - @OP.def_rule("+@", proc{|op, io| @lex_state == EXPR_FNAME}) do - |op, io| - @lex_state = EXPR_ARG - Token(op) - end - - @OP.def_rule("-@", proc{|op, io| @lex_state == EXPR_FNAME}) do - |op, io| - @lex_state = EXPR_ARG - Token(op) - end - - @OP.def_rules("+", "-") do - |op, io| - catch(:RET) do - if @lex_state == EXPR_ARG - if @space_seen and peek(0) =~ /[0-9]/ - throw :RET, identify_number - else - @lex_state = EXPR_BEG - end - elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/ - throw :RET, identify_number - else - @lex_state = EXPR_BEG - end - Token(op) - end - end - - @OP.def_rule(".") do - |op, io| - @lex_state = EXPR_BEG - if peek(0) =~ /[0-9]/ - ungetc - identify_number - else - # for "obj.if" etc. - @lex_state = EXPR_DOT - Token(TkDOT) - end - end - - @OP.def_rules("..", "...") do - |op, io| - @lex_state = EXPR_BEG - Token(op) - end - - lex_int2 - end - - def lex_int2 - @OP.def_rules("]", "}", ")") do - |op, io| - @lex_state = EXPR_END - @indent -= 1 - @indent_stack.pop - Token(op) - end - - @OP.def_rule(":") do - |op, io| - if @lex_state == EXPR_END || peek(0) =~ /\s/ - @lex_state = EXPR_BEG - Token(TkCOLON) - else - @lex_state = EXPR_FNAME - Token(TkSYMBEG) - end - end - - @OP.def_rule("::") do - |op, io| - if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen - @lex_state = EXPR_BEG - Token(TkCOLON3) - else - @lex_state = EXPR_DOT - Token(TkCOLON2) - end - end - - @OP.def_rule("/") do - |op, io| - if @lex_state == EXPR_BEG || @lex_state == EXPR_MID - identify_string(op) - elsif peek(0) == '=' - getc - @lex_state = EXPR_BEG - Token(TkOPASGN, "/") #/) - elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/ - identify_string(op) - else - @lex_state = EXPR_BEG - Token("/") #/) - end - end - - @OP.def_rules("^") do - |op, io| - @lex_state = EXPR_BEG - Token("^") - end - - @OP.def_rules(",") do - |op, io| - @lex_state = EXPR_BEG - Token(op) - end - - @OP.def_rules(";") do - |op, io| - @lex_state = EXPR_BEG - until (@indent_stack.empty? || - [TkLPAREN, TkLBRACK, TkLBRACE, - TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last)) - @indent_stack.pop - end - Token(op) - end - - @OP.def_rule("~") do - |op, io| - @lex_state = EXPR_BEG - Token("~") - end - - @OP.def_rule("~@", proc{|op, io| @lex_state == EXPR_FNAME}) do - |op, io| - @lex_state = EXPR_BEG - Token("~") - end - - @OP.def_rule("(") do - |op, io| - @indent += 1 - if @lex_state == EXPR_BEG || @lex_state == EXPR_MID - @lex_state = EXPR_BEG - tk_c = TkfLPAREN - else - @lex_state = EXPR_BEG - tk_c = TkLPAREN - end - @indent_stack.push tk_c - Token(tk_c) - end - - @OP.def_rule("[]", proc{|op, io| @lex_state == EXPR_FNAME}) do - |op, io| - @lex_state = EXPR_ARG - Token("[]") - end - - @OP.def_rule("[]=", proc{|op, io| @lex_state == EXPR_FNAME}) do - |op, io| - @lex_state = EXPR_ARG - Token("[]=") - end - - @OP.def_rule("[") do - |op, io| - @indent += 1 - if @lex_state == EXPR_FNAME - tk_c = TkfLBRACK - else - if @lex_state == EXPR_BEG || @lex_state == EXPR_MID - tk_c = TkLBRACK - elsif @lex_state == EXPR_ARG && @space_seen - tk_c = TkLBRACK - else - tk_c = TkfLBRACK - end - @lex_state = EXPR_BEG - end - @indent_stack.push tk_c - Token(tk_c) - end - - @OP.def_rule("{") do - |op, io| - @indent += 1 - if @lex_state != EXPR_END && @lex_state != EXPR_ARG - tk_c = TkLBRACE - else - tk_c = TkfLBRACE - end - @lex_state = EXPR_BEG - @indent_stack.push tk_c - Token(tk_c) - end - - @OP.def_rule('\\') do - |op, io| - if getc == "\n" - @space_seen = true - @continue = true - Token(TkSPACE) - else - read_escape - Token("\\") - end - end - - @OP.def_rule('%') do - |op, io| - if @lex_state == EXPR_BEG || @lex_state == EXPR_MID - identify_quotation - elsif peek(0) == '=' - getc - Token(TkOPASGN, :%) - elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/ - identify_quotation - else - @lex_state = EXPR_BEG - Token("%") #)) - end - end - - @OP.def_rule('$') do - |op, io| - identify_gvar - end - - @OP.def_rule('@') do - |op, io| - if peek(0) =~ /[\w@]/ - ungetc - identify_identifier - else - Token("@") - end - end - - @OP.def_rule("") do - |op, io| - printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug? - if peek(0) =~ /[0-9]/ - t = identify_number - elsif peek(0) =~ /[^\x00-\/:-@\[-^`{-\x7F]/ - t = identify_identifier - end - printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug? - t - end - - p @OP if RubyLex.debug? - end - - def identify_gvar - @lex_state = EXPR_END - - case ch = getc - when /[~_*$?!@\/\\;,=:<>".]/ #" - Token(TkGVAR, "$" + ch) - when "-" - Token(TkGVAR, "$-" + getc) - when "&", "`", "'", "+" - Token(TkBACK_REF, "$"+ch) - when /[1-9]/ - while getc =~ /[0-9]/; end - ungetc - Token(TkNTH_REF) - when /\w/ - ungetc - ungetc - identify_identifier - else - ungetc - Token("$") - end - end - - def identify_identifier - token = "" - if peek(0) =~ /[$@]/ - token.concat(c = getc) - if c == "@" and peek(0) == "@" - token.concat getc - end - end - - while (ch = getc) =~ /[^\x00-\/:-@\[-^`{-\x7F]/ - print ":", ch, ":" if RubyLex.debug? - token.concat ch - end - ungetc - - if (ch == "!" || ch == "?") && token[0,1] =~ /\w/ && peek(0) != "=" - token.concat getc - end - - # almost fix token - - case token - when /^\$/ - return Token(TkGVAR, token) - when /^\@\@/ - @lex_state = EXPR_END - # p Token(TkCVAR, token) - return Token(TkCVAR, token) - when /^\@/ - @lex_state = EXPR_END - return Token(TkIVAR, token) - end - - if @lex_state != EXPR_DOT - print token, "\n" if RubyLex.debug? - - token_c, *trans = TkReading2Token[token] - if token_c - # reserved word? - - if (@lex_state != EXPR_BEG && - @lex_state != EXPR_FNAME && - trans[1]) - # modifiers - token_c = TkSymbol2Token[trans[1]] - @lex_state = trans[0] - else - if @lex_state != EXPR_FNAME and peek(0) != ':' - if ENINDENT_CLAUSE.include?(token) - # check for ``class = val'' etc. - valid = true - case token - when "class" - valid = false unless peek_match?(/^\s*(<<|\w|::)/) - when "def" - valid = false if peek_match?(/^\s*(([+\-\/*&\|^]|<<|>>|\|\||\&\&)=|\&\&|\|\|)/) - when "do" - valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&)/) - when *ENINDENT_CLAUSE - valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&|\|)/) - else - # no nothing - end - if valid - if token == "do" - if ![TkFOR, TkWHILE, TkUNTIL].include?(@indent_stack.last) - @indent += 1 - @indent_stack.push token_c - end - else - @indent += 1 - @indent_stack.push token_c - end - end - - elsif DEINDENT_CLAUSE.include?(token) - @indent -= 1 - @indent_stack.pop - end - @lex_state = trans[0] - else - @lex_state = EXPR_END - end - end - return Token(token_c, token) - end - end - - if @lex_state == EXPR_FNAME - @lex_state = EXPR_END - if peek(0) == '=' - token.concat getc - end - elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT - @lex_state = EXPR_ARG - else - @lex_state = EXPR_END - end - - if token[0, 1] =~ /[A-Z]/ - return Token(TkCONSTANT, token) - elsif token[token.size - 1, 1] =~ /[!?]/ - return Token(TkFID, token) - else - return Token(TkIDENTIFIER, token) - end - end - - def identify_here_document - ch = getc - if ch == "-" || ch == "~" - ch = getc - indent = true - end - if /['"`]/ =~ ch - lt = ch - quoted = "" - while (c = getc) && c != lt - quoted.concat c - end - else - lt = '"' - quoted = ch.dup - while (c = getc) && c =~ /\w/ - quoted.concat c - end - ungetc - end - - ltback, @ltype = @ltype, lt - reserve = [] - while ch = getc - reserve.push ch - if ch == "\\" - reserve.push ch = getc - elsif ch == "\n" - break - end - end - - @here_header = false - - line = "" - while ch = getc - if ch == "\n" - if line == quoted - break - end - line = "" - else - line.concat ch unless indent && line == "" && /\s/ =~ ch - if @ltype != "'" && ch == "#" && peek(0) == "{" - identify_string_dvar - end - end - end - - @here_header = true - @here_readed.concat reserve - while ch = reserve.pop - ungetc ch - end - - @ltype = ltback - @lex_state = EXPR_END - Token(Ltype2Token[lt]) - end - - def identify_quotation - ch = getc - if lt = PERCENT_LTYPE[ch] - ch = getc - elsif ch =~ /\W/ - lt = "\"" - else - RubyLex.fail SyntaxError, "unknown type of %string" - end - @quoted = ch unless @quoted = PERCENT_PAREN[ch] - identify_string(lt, @quoted) - end - - def identify_number - @lex_state = EXPR_END - - if peek(0) == "0" && peek(1) !~ /[.eE]/ - getc - case peek(0) - when /[xX]/ - ch = getc - match = /[0-9a-fA-F_]/ - when /[bB]/ - ch = getc - match = /[01_]/ - when /[oO]/ - ch = getc - match = /[0-7_]/ - when /[dD]/ - ch = getc - match = /[0-9_]/ - when /[0-7]/ - match = /[0-7_]/ - when /[89]/ - RubyLex.fail SyntaxError, "Invalid octal digit" - else - return Token(TkINTEGER) - end - - len0 = true - non_digit = false - while ch = getc - if match =~ ch - if ch == "_" - if non_digit - RubyLex.fail SyntaxError, "trailing `#{ch}' in number" - else - non_digit = ch - end - else - non_digit = false - len0 = false - end - else - ungetc - if len0 - RubyLex.fail SyntaxError, "numeric literal without digits" - end - if non_digit - RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number" - end - break - end - end - return Token(TkINTEGER) - end - - type = TkINTEGER - allow_point = true - allow_e = true - non_digit = false - while ch = getc - case ch - when /[0-9]/ - non_digit = false - when "_" - non_digit = ch - when allow_point && "." - if non_digit - RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number" - end - type = TkFLOAT - if peek(0) !~ /[0-9]/ - type = TkINTEGER - ungetc - break - end - allow_point = false - when allow_e && "e", allow_e && "E" - if non_digit - RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number" - end - type = TkFLOAT - if peek(0) =~ /[+-]/ - getc - end - allow_e = false - allow_point = false - non_digit = ch - else - if non_digit - RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number" - end - ungetc - break - end - end - Token(type) - end - - def identify_string(ltype, quoted = ltype) - @ltype = ltype - @quoted = quoted - subtype = nil - begin - nest = 0 - while ch = getc - if @quoted == ch and nest == 0 - break - elsif @ltype != "'" && ch == "#" && peek(0) == "{" - identify_string_dvar - elsif @ltype != "'" && @ltype != "]" && @ltype != ":" and ch == "#" - subtype = true - elsif ch == '\\' and @ltype == "'" #' - case ch = getc - when "\\", "\n", "'" - else - ungetc - end - elsif ch == '\\' #' - read_escape - end - if PERCENT_PAREN.values.include?(@quoted) - if PERCENT_PAREN[ch] == @quoted - nest += 1 - elsif ch == @quoted - nest -= 1 - end - end - end - if @ltype == "/" - while /[imxoesun]/ =~ peek(0) - getc - end - end - if subtype - Token(DLtype2Token[ltype]) - else - Token(Ltype2Token[ltype]) - end - ensure - @ltype = nil - @quoted = nil - @lex_state = EXPR_END - end - end - - def identify_string_dvar - begin - getc - - reserve_continue = @continue - reserve_ltype = @ltype - reserve_indent = @indent - reserve_indent_stack = @indent_stack - reserve_state = @lex_state - reserve_quoted = @quoted - - @ltype = nil - @quoted = nil - @indent = 0 - @indent_stack = [] - @lex_state = EXPR_BEG - - loop do - @continue = false - prompt - tk = token - if @ltype or @continue or @indent >= 0 - next - end - break if tk.kind_of?(TkRBRACE) - end - ensure - @continue = reserve_continue - @ltype = reserve_ltype - @indent = reserve_indent - @indent_stack = reserve_indent_stack - @lex_state = reserve_state - @quoted = reserve_quoted - end - end - - def identify_comment - @ltype = "#" - - while ch = getc - if ch == "\n" - @ltype = nil - ungetc - break - end - end - return Token(TkCOMMENT) - end - - def read_escape - case ch = getc - when "\n", "\r", "\f" - when "\\", "n", "t", "r", "f", "v", "a", "e", "b", "s" #" - when /[0-7]/ - ungetc ch - 3.times do - case ch = getc - when /[0-7]/ - when nil - break - else - ungetc - break - end - end - - when "x" - 2.times do - case ch = getc - when /[0-9a-fA-F]/ - when nil - break - else - ungetc - break - end - end - - when "M" - if (ch = getc) != '-' - ungetc - else - if (ch = getc) == "\\" #" - read_escape - end - end - - when "C", "c" #, "^" - if ch == "C" and (ch = getc) != "-" - ungetc - elsif (ch = getc) == "\\" #" - read_escape - end - else - # other characters end end end diff --git a/test/irb/test_ruby-lex.rb b/test/irb/test_ruby-lex.rb deleted file mode 100644 index b07b4a2eb6..0000000000 --- a/test/irb/test_ruby-lex.rb +++ /dev/null @@ -1,108 +0,0 @@ -# frozen_string_literal: false -require 'test/unit' -require 'irb/ruby-lex' -require 'stringio' - -module TestIRB - class TestRubyLex < Test::Unit::TestCase - def setup - @scanner = RubyLex.new - end - - def teardown - RubyLex.debug_level = 0 - end - - def test_set_input_proc - called = false - @scanner.set_input(nil) {called = true; nil} - @scanner.each_top_level_statement {} - assert(called) - end - - def test_comment - assert_equal([["#\n", 1]], top_level_statement("#\n")) - end - - def test_top_level_statement - result = top_level_statement("#{<<-"begin;"}#{<<~"end;"}") - begin; - begin - end - begin - end - end; - assert_equal([ - ["begin\n""end\n", 1], - ["begin\n""end\n", 3], - ], - result) - end - - def test_immature_statement - src = "if false\n" - assert_equal([[src, 1]], top_level_statement(src)) - end - - def test_prompt - prompts = [] - @scanner.set_prompt {|*a| - a << @scanner.instance_variable_get(:@lex_state) - unless prompts.last == a - prompts << a - end - } - src, lineno = "#{<<-"begin;"}#{<<~'end;'}", __LINE__+1 - begin; - # #;# LTYPE:INDENT:CONTINUE - x #;# -:0:- - x( #;# -:0:- - ) #;# -:1:* - a \ #;# -:0:- - #;# -:0:* - a; #;# -:0:- - a #;# -:0:- - #;# -:0:- - a #;# -:0:- - a = #;# -:0:- - ' #;# -:0:* - ' #;# ':0:* - if false or #;# -:0:- - true #;# -:1:* - a #;# -:1:- - " #;# -:1:- - " #;# ":1:- - begin #;# -:1:- - a #;# -:2:- - a #;# -:2:- - end #;# -:2:- - else #;# -:1:- - nil #;# -:1:- - end #;# -:1:- - end; - top_level_statement(src.gsub(/[ \t]*#;#.*/, '')) - src.each_line.with_index(1) do |line, i| - p = prompts.shift - next unless /#;#\s*(?:-|(?\S)):(?\d+):(?:(?\*)|-)(?:.*FIXME:(?.*))?/ =~ line - indent = indent.to_i - cont = (fixme && /`continue'/.match?(fixme)) ^ cont - assert_equal([ltype, indent, cont, i], p[0..3], "#{lineno+i}:#{p[4]}: #{line}") - end - end - - def top_level_statement(lines) - input = InputLines.new(lines, "r") - scanned = [] - @scanner.set_input(input) - @scanner.each_top_level_statement {|*e| - scanned << e - yield(*e) if defined?(yield) - } - scanned - end - - class InputLines < StringIO - alias encoding external_encoding - end - end -end