From 94b740b2499242e1aca67f7bbf595e75e63abc40 Mon Sep 17 00:00:00 2001
From: aycabta <aycabta@gmail.com>
Date: Sun, 28 Apr 2019 03:41:06 +0900
Subject: [PATCH] Use Ripper for IRB

The debug option of IRB is deleted because it's just for IRB's pure Ruby
parser.
---
 doc/irb/irb.rd.ja          |    6 -
 lib/irb.rb                 |    3 -
 lib/irb/context.rb         |   20 -
 lib/irb/init.rb            |    4 -
 lib/irb/lc/help-message    |    1 -
 lib/irb/lc/ja/help-message |    2 -
 lib/irb/ruby-lex.rb        | 1283 ++++++------------------------------
 test/irb/test_ruby-lex.rb  |  108 ---
 8 files changed, 204 insertions(+), 1223 deletions(-)
 delete mode 100644 test/irb/test_ruby-lex.rb
diff --git a/doc/irb/irb.rd.ja b/doc/irb/irb.rd.ja
index 85b6536ee4..0522b3fa3d 100644
--- a/doc/irb/irb.rd.ja
+++ b/doc/irb/irb.rd.ja
@@ -70,8 +70,6 @@ irbの使い方は, Rubyさえ知っていればいたって簡単です. 基本
   --back-trace-limit n
 		    バックトレース表示をバックトレースの頭から n, 後ろ
 		    からnだけ行なう. デフォルトは16
-  --irb_debug n	    irbのデバッグデバッグレベルをnに設定する(利用しな
-		    い方が無難でしょう).
   -v, --version	    irbのバージョンを表示する
 
 = コンフィギュレーション
@@ -97,7 +95,6 @@ irb起動時に``~/.irbrc''を読み込みます. もし存在しない場合は
   IRB.conf[:IGNORE_EOF] = false
   IRB.conf[:PROMPT_MODE] = :DEFAULT
   IRB.conf[:PROMPT] = {...}
-  IRB.conf[:DEBUG_LEVEL]=0
   IRB.conf[:VERBOSE]=true
 
 == プロンプトの設定
@@ -183,9 +180,6 @@ irb拡張コマンドは, 簡単な名前と頭に`irb_'をつけた名前と両
     バックトレース表示をバックトレースの頭からn, 後ろからnだけ行なう.
     デフォルトは16
 
---- conf.debug_level = N
-    irb用のデバッグレベルの設定
-
 --- conf.ignore_eof = true/false
     ^Dが入力された時の動作を設定する. trueの時は^Dを無視する, falseの
     時はirbを終了する.
diff --git a/lib/irb.rb b/lib/irb.rb
index ba12bdbcab..d0246b077c 100644
--- a/lib/irb.rb
+++ b/lib/irb.rb
@@ -74,7 +74,6 @@ require "irb/version"
 #     --back-trace-limit n
 #                       Display backtrace top n and tail n. The default
 #                       value is 16.
-#     --irb_debug n     Set internal debug level to n (not for popular use)
 #     -v, --version     Print the version of irb
 #
 # == Configuration
@@ -102,7 +101,6 @@ require "irb/version"
 #     IRB.conf[:IGNORE_EOF] = false
 #     IRB.conf[:PROMPT_MODE] = :DEFAULT
 #     IRB.conf[:PROMPT] = {...}
-#     IRB.conf[:DEBUG_LEVEL]=0
 #
 # === Auto indentation
 #
@@ -413,7 +411,6 @@ module IRB
       @signal_status = :IN_IRB
 
       @scanner = RubyLex.new
-      @scanner.exception_on_syntax_error = false
     end
 
     def run(conf = IRB.conf)
diff --git a/lib/irb/context.rb b/lib/irb/context.rb
index e8e6a118e6..f8a6009d17 100644
--- a/lib/irb/context.rb
+++ b/lib/irb/context.rb
@@ -101,7 +101,6 @@ module IRB
       if @echo.nil?
         @echo = true
       end
-      self.debug_level = IRB.conf[:DEBUG_LEVEL]
     end
 
     # The top-level workspace, see WorkSpace#main
@@ -211,10 +210,6 @@ module IRB
     #
     # A copy of the default <code>IRB.conf[:VERBOSE]</code>
     attr_accessor :verbose
-    # The debug level of irb
-    #
-    # See #debug_level= for more information.
-    attr_reader :debug_level
 
     # The limit of backtrace lines displayed as top +n+ and tail +n+.
     #
@@ -361,21 +356,6 @@ module IRB
       print "Do nothing."
     end
 
-    # Sets the debug level of irb
-    #
-    # Can also be set using the +--irb_debug+ command line option.
-    #
-    # See IRB@Command+line+options for more command line options.
-    def debug_level=(value)
-      @debug_level = value
-      RubyLex.debug_level = value
-    end
-
-    # Whether or not debug mode is enabled, see #debug_level=.
-    def debug?
-      @debug_level > 0
-    end
-
     def evaluate(line, line_no, exception: nil) # :nodoc:
       @line_no = line_no
       if exception
diff --git a/lib/irb/init.rb b/lib/irb/init.rb
index 2066d8cb64..344b243f12 100644
--- a/lib/irb/init.rb
+++ b/lib/irb/init.rb
@@ -112,8 +112,6 @@ module IRB # :nodoc:
     @CONF[:LC_MESSAGES] = Locale.new
 
     @CONF[:AT_EXIT] = []
-
-    @CONF[:DEBUG_LEVEL] = 0
   end
 
   def IRB.init_error
@@ -191,8 +189,6 @@ module IRB # :nodoc:
         @CONF[:CONTEXT_MODE] = ($1 || argv.shift).to_i
       when "--single-irb"
         @CONF[:SINGLE_IRB] = true
-      when /^--irb_debug(?:=(.+))?/
-        @CONF[:DEBUG_LEVEL] = ($1 || argv.shift).to_i
       when "-v", "--version"
         print IRB.version, "\n"
         exit 0
diff --git a/lib/irb/lc/help-message b/lib/irb/lc/help-message
index d43c6a1695..d1a66dddda 100644
--- a/lib/irb/lc/help-message
+++ b/lib/irb/lc/help-message
@@ -39,7 +39,6 @@ Usage:  irb.rb [options] [programfile] [arguments]
   --back-trace-limit n
 		    Display backtrace top n and tail n. The default
 		    value is 16.
-  --irb_debug n	    Set internal debug level to n (not for popular use)
   --verbose         Show details
   --noverbose       Don't show details
   -v, --version	    Print the version of irb
diff --git a/lib/irb/lc/ja/help-message b/lib/irb/lc/ja/help-message
index 1b24d14d28..7a15f973c6 100644
--- a/lib/irb/lc/ja/help-message
+++ b/lib/irb/lc/ja/help-message
@@ -41,8 +41,6 @@ Usage:  irb.rb [options] [programfile] [arguments]
 		    バックトレース表示をバックトレースの頭から n, 後ろ
 		    からnだけ行なう. デフォルトは16
 
-  --irb_debug n	    irbのデバッグレベルをnに設定する(非推奨).
-
   --verbose	    詳細なメッセージを出力する.
   --noverbose	    詳細なメッセージを出力しない(デフォルト).
   -v, --version	    irbのバージョンを表示する.
diff --git a/lib/irb/ruby-lex.rb b/lib/irb/ruby-lex.rb
index 555d1f024f..c4bec4a854 100644
--- a/lib/irb/ruby-lex.rb
+++ b/lib/irb/ruby-lex.rb
@@ -11,73 +11,39 @@
 #
 
 require "e2mmap"
-require_relative "slex"
-require_relative "ruby-token"
+require "ripper"
 
 # :stopdoc:
 class RubyLex
 
   extend Exception2MessageMapper
-  def_exception(:AlreadyDefinedToken, "Already defined token(%s)")
-  def_exception(:TkReading2TokenNoKey, "key nothing(key='%s')")
-  def_exception(:TkSymbol2TokenNoKey, "key nothing(key='%s')")
-  def_exception(:TkReading2TokenDuplicateError,
-                "key duplicate(token_n='%s', key='%s')")
-  def_exception(:SyntaxError, "%s")
-
   def_exception(:TerminateLineInput, "Terminate Line Input")
 
-  include RubyToken
-
-  class << self
-    attr_accessor :debug_level
-    def debug?
-      @debug_level > 0
-    end
-  end
-  @debug_level = 0
-
   def initialize
-    lex_init
-    set_input(STDIN)
-
-    @seek = 0
     @exp_line_no = @line_no = 1
-    @base_char_no = 0
-    @char_no = 0
-    @rests = []
-    @readed = []
-    @here_readed = []
-
     @indent = 0
-    @indent_stack = []
-    @lex_state = EXPR_BEG
-    @space_seen = false
-    @here_header = false
-    @post_symbeg = false
-
     @continue = false
     @line = ""
-
-    @skip_space = false
-    @readed_auto_clean_up = false
-    @exception_on_syntax_error = true
-
     @prompt = nil
   end
 
-  attr_accessor :skip_space
-  attr_accessor :readed_auto_clean_up
-  attr_accessor :exception_on_syntax_error
-
-  attr_reader :seek
-  attr_reader :char_no
-  attr_reader :line_no
-  attr_reader :indent
-
   # io functions
   def set_input(io, p = nil, &block)
     @io = io
+    if @io.respond_to?(:check_termination)
+      @io.check_termination do |code|
+        @tokens = Ripper.lex(code)
+        continue = process_continue
+        code_block_open = check_code_block(code)
+        indent = process_nesting_level
+        ltype = process_literal_type
+        if code_block_open or ltype or continue or indent > 0
+          false
+        else
+          true
+        end
+      end
+    end
     if p.respond_to?(:call)
       @input = p
     elsif block_given?
@@ -87,112 +53,6 @@ class RubyLex
     end
   end
 
-  def get_readed
-    if idx = @readed.rindex("\n")
-      @base_char_no = @readed.size - (idx + 1)
-    else
-      @base_char_no += @readed.size
-    end
-
-    readed = @readed.join("")
-    @readed = []
-    readed
-  end
-
-  def getc
-    while @rests.empty?
-      @rests.push nil unless buf_input
-    end
-    c = @rests.shift
-    if @here_header
-      @here_readed.push c
-    else
-      @readed.push c
-    end
-    @seek += 1
-    if c == "\n"
-      @line_no += 1
-      @char_no = 0
-    else
-      @char_no += 1
-    end
-    c
-  end
-
-  def gets
-    l = ""
-    while c = getc
-      l.concat(c)
-      break if c == "\n"
-    end
-    return nil if l == "" and c.nil?
-    l
-  end
-
-  def eof?
-    @io.eof?
-  end
-
-  def getc_of_rests
-    if @rests.empty?
-      nil
-    else
-      getc
-    end
-  end
-
-  def ungetc(c = nil)
-    if @here_readed.empty?
-      c2 = @readed.pop
-    else
-      c2 = @here_readed.pop
-    end
-    c = c2 unless c
-    @rests.unshift c #c =
-    @seek -= 1
-    if c == "\n"
-      @line_no -= 1
-      if idx = @readed.rindex("\n")
-        @char_no = idx + 1
-      else
-        @char_no = @base_char_no + @readed.size
-      end
-    else
-      @char_no -= 1
-    end
-  end
-
-  def peek_equal?(str)
-    chrs = str.split(//)
-    until @rests.size >= chrs.size
-      return false unless buf_input
-    end
-    @rests[0, chrs.size] == chrs
-  end
-
-  def peek_match?(regexp)
-    while @rests.empty?
-      return false unless buf_input
-    end
-    regexp =~ @rests.join("")
-  end
-
-  def peek(i = 0)
-    while @rests.size <= i
-      return nil unless buf_input
-    end
-    @rests[i]
-  end
-
-  def buf_input
-    prompt
-    line = @input.call
-    return nil unless line
-    @rests.concat line.chars.to_a
-    true
-  end
-  private :buf_input
-
   def set_prompt(p = nil, &block)
     p = block if block_given?
     if p.respond_to?(:call)
@@ -210,20 +70,11 @@ class RubyLex
 
   def initialize_input
     @ltype = nil
-    @quoted = nil
     @indent = 0
-    @indent_stack = []
-    @lex_state = EXPR_BEG
-    @space_seen = false
-    @here_header = false
-
     @continue = false
-    @post_symbeg = false
-
-    prompt
-
     @line = ""
     @exp_line_no = @line_no
+    @code_block_open = false
   end
 
   def each_top_level_statement
@@ -231,13 +82,14 @@ class RubyLex
     catch(:TERM_INPUT) do
       loop do
         begin
-          @continue = false
           prompt
           unless l = lex
             throw :TERM_INPUT if @line == ''
           else
+            @line_no += 1
+            next if l == "\n"
             @line.concat l
-            if @ltype or @continue or @indent > 0
+            if @code_block_open or @ltype or @continue or @indent > 0
               next
             end
           end
@@ -250,930 +102,203 @@ class RubyLex
           @exp_line_no = @line_no
 
           @indent = 0
-          @indent_stack = []
-          prompt
         rescue TerminateLineInput
           initialize_input
           prompt
-          get_readed
         end
       end
     end
   end
 
   def lex
-    continue = @continue
-    while tk = token
-      case tk
-      when TkNL, TkEND_OF_SCRIPT
-        @continue = continue unless continue.nil?
-        break unless @continue
-      when TkSPACE, TkCOMMENT
-      when TkSEMICOLON, TkBEGIN, TkELSE
-        @continue = continue = false
-      else
-        continue = nil
+    line = @input.call
+    if @io.respond_to?(:check_termination)
+      return line # multiline
+    end
+    code = @line + (line.nil? ? '' : line)
+    code.gsub!(/\n*$/, '').concat("\n")
+    @tokens = Ripper.lex(code)
+    @continue = process_continue
+    @code_block_open = check_code_block(code)
+    @indent = process_nesting_level
+    @ltype = process_literal_type
+    line
+  end
+
+  def process_continue
+    continued_bits = Ripper::EXPR_BEG | Ripper::EXPR_FNAME | Ripper::EXPR_DOT
+    # last token is always newline
+    if @tokens.size >= 2 and @tokens[-2][1] == :on_regexp_end
+      # end of regexp literal
+      return false
+    elsif @tokens.size >= 2 and @tokens[-2][1] == :on_semicolon
+      return false
+    elsif @tokens.size >= 2 and @tokens[-2][1] == :on_kw and (@tokens[-2][2] == 'begin' or @tokens[-2][2] == 'else')
+      return false
+    elsif !@tokens.empty? and @tokens.last[2] == "\\\n"
+      return true
+    elsif @tokens.size >= 2 and @tokens[-2][3].anybits?(continued_bits)
+      # end of literal except for regexp
+      return true
+    end
+    false
+  end
+
+  def check_code_block(code)
+    return true if @tokens.empty?
+    if @tokens.last[1] == :on_heredoc_beg
+      return true
+    end
+
+    begin # check if parser error are available
+      RubyVM::InstructionSequence.compile(code)
+    rescue SyntaxError => e
+      case e.message
+      when /unterminated (?:string|regexp) meets end of file/
+        # "unterminated regexp meets end of file"
+        #
+        #   example:
+        #     /
+        #
+        # "unterminated string meets end of file"
+        #
+        #   example:
+        #     '
+        return true
+      when /syntax error, unexpected end-of-input/
+        # "syntax error, unexpected end-of-input, expecting keyword_end"
+        #
+        #   example:
+        #     if ture
+        #       hoge
+        #       if false
+        #         fuga
+        #       end
+        return true
+      when /syntax error, unexpected keyword_end/
+        # "syntax error, unexpected keyword_end"
+        #
+        #   example:
+        #     if (
+        #     end
+        #
+        #   example:
+        #     end
+        return false
+      when /unexpected tREGEXP_BEG/
+        # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('"
+        #
+        #   example:
+        #     method / f /
+        return false
       end
     end
-    line = get_readed
-    if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil?
+
+    last_lex_state = @tokens.last[3]
+    if last_lex_state.allbits?(Ripper::EXPR_BEG)
+      return false
+    elsif last_lex_state.allbits?(Ripper::EXPR_DOT)
+      return true
+    elsif last_lex_state.allbits?(Ripper::EXPR_CLASS)
+      return true
+    elsif last_lex_state.allbits?(Ripper::EXPR_FNAME)
+      return true
+    elsif last_lex_state.allbits?(Ripper::EXPR_VALUE)
+      return true
+    elsif last_lex_state.allbits?(Ripper::EXPR_ARG)
+      return false
+    end
+
+    false
+  end
+
+  def process_nesting_level
+    @tokens.inject(0) { |indent, t|
+      case t[1]
+      when :on_lbracket, :on_lbrace, :on_lparen
+        indent += 1
+      when :on_rbracket, :on_rbrace, :on_rparen
+        indent -= 1
+      when :on_kw
+        case t[2]
+        when 'def', 'do', 'case', 'for', 'begin', 'class', 'module'
+          indent += 1
+        when 'if', 'unless', 'while', 'until', 'rescue'
+          # postfix if/unless/while/until/rescue must be Ripper::EXPR_LABEL
+          indent += 1 unless t[3].allbits?(Ripper::EXPR_LABEL)
+        when 'end'
+          indent -= 1
+        end
+      end
+      # percent literals are not indented
+      indent
+    }
+  end
+
+  def check_string_literal
+    i = 0
+    start_token = []
+    end_type = []
+    while i < @tokens.size
+      t = @tokens[i]
+      case t[1]
+      when :on_tstring_beg
+        start_token << t
+        end_type << :on_tstring_end
+      when :on_regexp_beg
+        start_token << t
+        end_type << :on_regexp_end
+      when :on_symbeg
+        if (i + 1) < @tokens.size and @tokens[i + 1][1] != :on_ident
+          start_token << t
+          end_type << :on_tstring_end
+        end
+      when :on_backtick
+        start_token << t
+        end_type << :on_tstring_end
+      when :on_qwords_beg, :on_words_beg, :on_qsymbols_beg, :on_symbols_beg
+        start_token << t
+        end_type << :on_tstring_end
+      when :on_heredoc_beg
+        start_token << t
+        end_type << :on_heredoc_end
+      when end_type.last
+        start_token.pop
+        end_type.pop
+      end
+      i += 1
+    end
+    start_token.last.nil? ? '' : start_token.last
+  end
+
+  def process_literal_type
+    start_token = check_string_literal
+    case start_token[1]
+    when :on_tstring_beg
+      case start_token[2]
+      when ?"      then ?"
+      when /^%.$/  then ?"
+      when /^%Q.$/ then ?"
+      when ?'      then ?'
+      when /^%q.$/ then ?'
+      end
+    when :on_regexp_beg   then ?/
+    when :on_symbeg       then ?:
+    when :on_backtick     then ?`
+    when :on_qwords_beg   then ?]
+    when :on_words_beg    then ?]
+    when :on_qsymbols_beg then ?]
+    when :on_symbols_beg  then ?]
+    when :on_heredoc_beg
+      start_token[2] =~ /<<[-~]?(['"`])[_a-zA-Z0-9]+\1/
+      case $1
+      when ?" then ?"
+      when ?' then ?'
+      when ?` then ?`
+      else         ?"
+      end
+    else
       nil
-    else
-      line
-    end
-  end
-
-  def token
-    @prev_seek = @seek
-    @prev_line_no = @line_no
-    @prev_char_no = @char_no
-    begin
-      begin
-        tk = @OP.match(self)
-        @space_seen = tk.kind_of?(TkSPACE)
-        @lex_state = EXPR_END if @post_symbeg && tk.kind_of?(TkOp)
-        @post_symbeg = tk.kind_of?(TkSYMBEG)
-      rescue SyntaxError
-        raise if @exception_on_syntax_error
-        tk = TkError.new(@seek, @line_no, @char_no)
-      end
-    end while @skip_space and tk.kind_of?(TkSPACE)
-    if @readed_auto_clean_up
-      get_readed
-    end
-    tk
-  end
-
-  ENINDENT_CLAUSE = [
-    "case", "class", "def", "do", "for", "if",
-    "module", "unless", "until", "while", "begin"
-  ]
-  DEINDENT_CLAUSE = ["end"
-  ]
-
-  PERCENT_LTYPE = {
-    "q" => "\'",
-    "Q" => "\"",
-    "x" => "\`",
-    "r" => "/",
-    "w" => "]",
-    "W" => "]",
-    "i" => "]",
-    "I" => "]",
-    "s" => ":"
-  }
-
-  PERCENT_PAREN = {
-    "{" => "}",
-    "[" => "]",
-    "<" => ">",
-    "(" => ")"
-  }
-
-  Ltype2Token = {
-    "\'" => TkSTRING,
-    "\"" => TkSTRING,
-    "\`" => TkXSTRING,
-    "/" => TkREGEXP,
-    "]" => TkDSTRING,
-    ":" => TkSYMBOL
-  }
-  DLtype2Token = {
-    "\"" => TkDSTRING,
-    "\`" => TkDXSTRING,
-    "/" => TkDREGEXP,
-  }
-
-  def lex_init()
-    @OP = IRB::SLex.new
-    @OP.def_rules("\0", "\004", "\032") do |op, io|
-      Token(TkEND_OF_SCRIPT)
-    end
-
-    @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |op, io|
-      @space_seen = true
-      while getc =~ /[ \t\f\r\13]/; end
-      ungetc
-      Token(TkSPACE)
-    end
-
-    @OP.def_rule("#") do |op, io|
-      identify_comment
-    end
-
-    @OP.def_rule("=begin",
-                 proc{|op, io| @prev_char_no == 0 && peek(0) =~ /\s/}) do
-      |op, io|
-      @ltype = "="
-      until getc == "\n"; end
-      until peek_equal?("=end") && peek(4) =~ /\s/
-        until getc == "\n"; end
-      end
-      gets
-      @ltype = nil
-      Token(TkRD_COMMENT)
-    end
-
-    @OP.def_rule("\n") do |op, io|
-      print "\\n\n" if RubyLex.debug?
-      case @lex_state
-      when EXPR_BEG, EXPR_FNAME, EXPR_DOT
-        @continue = true
-      else
-        @continue = false
-        @lex_state = EXPR_BEG
-        until (@indent_stack.empty? ||
-            [TkLPAREN, TkLBRACK, TkLBRACE,
-             TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
-          @indent_stack.pop
-        end
-      end
-      @here_header = false
-      @here_readed = []
-      Token(TkNL)
-    end
-
-    @OP.def_rules("*", "**",
-                  "=", "==", "===",
-                  "=~", "<=>",
-                  "<", "<=",
-                  ">", ">=", ">>",
-                  "!", "!=", "!~") do
-      |op, io|
-      case @lex_state
-      when EXPR_FNAME, EXPR_DOT
-        @lex_state = EXPR_ARG
-      else
-        @lex_state = EXPR_BEG
-      end
-      Token(op)
-    end
-
-    @OP.def_rules("<<") do
-      |op, io|
-      tk = nil
-      if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
-          (@lex_state != EXPR_ARG || @space_seen)
-        c = peek(0)
-        if /[-~"'`\w]/ =~ c
-          tk = identify_here_document
-        end
-      end
-      unless tk
-        tk = Token(op)
-        case @lex_state
-        when EXPR_FNAME, EXPR_DOT
-          @lex_state = EXPR_ARG
-        else
-          @lex_state = EXPR_BEG
-        end
-      end
-      tk
-    end
-
-    @OP.def_rules("'", '"') do
-      |op, io|
-      identify_string(op)
-    end
-
-    @OP.def_rules("`") do
-      |op, io|
-      if @lex_state == EXPR_FNAME
-        @lex_state = EXPR_END
-        Token(op)
-      else
-        identify_string(op)
-      end
-    end
-
-    @OP.def_rules('?') do
-      |op, io|
-      if @lex_state == EXPR_END
-        @lex_state = EXPR_BEG
-        Token(TkQUESTION)
-      else
-        ch = getc
-        if @lex_state == EXPR_ARG && ch =~ /\s/
-          ungetc
-          @lex_state = EXPR_BEG;
-          Token(TkQUESTION)
-        else
-          if (ch == '\\')
-            read_escape
-          end
-          @lex_state = EXPR_END
-          Token(TkINTEGER)
-        end
-      end
-    end
-
-    @OP.def_rules("&", "&&", "|", "||") do
-      |op, io|
-      @lex_state = EXPR_BEG
-      Token(op)
-    end
-
-    @OP.def_rules("+=", "-=", "*=", "**=",
-                  "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
-      |op, io|
-      @lex_state = EXPR_BEG
-      op =~ /^(.*)=$/
-      Token(TkOPASGN, $1)
-    end
-
-    @OP.def_rule("+@", proc{|op, io| @lex_state == EXPR_FNAME}) do
-      |op, io|
-      @lex_state = EXPR_ARG
-      Token(op)
-    end
-
-    @OP.def_rule("-@", proc{|op, io| @lex_state == EXPR_FNAME}) do
-      |op, io|
-      @lex_state = EXPR_ARG
-      Token(op)
-    end
-
-    @OP.def_rules("+", "-") do
-      |op, io|
-      catch(:RET) do
-        if @lex_state == EXPR_ARG
-          if @space_seen and peek(0) =~ /[0-9]/
-            throw :RET, identify_number
-          else
-            @lex_state = EXPR_BEG
-          end
-        elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
-          throw :RET, identify_number
-        else
-          @lex_state = EXPR_BEG
-        end
-        Token(op)
-      end
-    end
-
-    @OP.def_rule(".") do
-      |op, io|
-      @lex_state = EXPR_BEG
-      if peek(0) =~ /[0-9]/
-        ungetc
-        identify_number
-      else
-        # for "obj.if" etc.
-        @lex_state = EXPR_DOT
-        Token(TkDOT)
-      end
-    end
-
-    @OP.def_rules("..", "...") do
-      |op, io|
-      @lex_state = EXPR_BEG
-      Token(op)
-    end
-
-    lex_int2
-  end
-
-  def lex_int2
-    @OP.def_rules("]", "}", ")") do
-      |op, io|
-      @lex_state = EXPR_END
-      @indent -= 1
-      @indent_stack.pop
-      Token(op)
-    end
-
-    @OP.def_rule(":") do
-      |op, io|
-      if @lex_state == EXPR_END || peek(0) =~ /\s/
-        @lex_state = EXPR_BEG
-        Token(TkCOLON)
-      else
-        @lex_state = EXPR_FNAME
-        Token(TkSYMBEG)
-      end
-    end
-
-    @OP.def_rule("::") do
-       |op, io|
-      if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
-        @lex_state = EXPR_BEG
-        Token(TkCOLON3)
-      else
-        @lex_state = EXPR_DOT
-        Token(TkCOLON2)
-      end
-    end
-
-    @OP.def_rule("/") do
-      |op, io|
-      if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
-        identify_string(op)
-      elsif peek(0) == '='
-        getc
-        @lex_state = EXPR_BEG
-        Token(TkOPASGN, "/") #/)
-      elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
-        identify_string(op)
-      else
-        @lex_state = EXPR_BEG
-        Token("/") #/)
-      end
-    end
-
-    @OP.def_rules("^") do
-      |op, io|
-      @lex_state = EXPR_BEG
-      Token("^")
-    end
-
-    @OP.def_rules(",") do
-      |op, io|
-      @lex_state = EXPR_BEG
-      Token(op)
-    end
-
-    @OP.def_rules(";") do
-      |op, io|
-      @lex_state = EXPR_BEG
-      until (@indent_stack.empty? ||
-          [TkLPAREN, TkLBRACK, TkLBRACE,
-           TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
-        @indent_stack.pop
-      end
-      Token(op)
-    end
-
-    @OP.def_rule("~") do
-      |op, io|
-      @lex_state = EXPR_BEG
-      Token("~")
-    end
-
-    @OP.def_rule("~@", proc{|op, io| @lex_state == EXPR_FNAME}) do
-      |op, io|
-      @lex_state = EXPR_BEG
-      Token("~")
-    end
-
-    @OP.def_rule("(") do
-      |op, io|
-      @indent += 1
-      if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
-        @lex_state = EXPR_BEG
-        tk_c = TkfLPAREN
-      else
-        @lex_state = EXPR_BEG
-        tk_c = TkLPAREN
-      end
-      @indent_stack.push tk_c
-      Token(tk_c)
-    end
-
-    @OP.def_rule("[]", proc{|op, io| @lex_state == EXPR_FNAME}) do
-      |op, io|
-      @lex_state = EXPR_ARG
-      Token("[]")
-    end
-
-    @OP.def_rule("[]=", proc{|op, io| @lex_state == EXPR_FNAME}) do
-      |op, io|
-      @lex_state = EXPR_ARG
-      Token("[]=")
-    end
-
-    @OP.def_rule("[") do
-      |op, io|
-      @indent += 1
-      if @lex_state == EXPR_FNAME
-        tk_c = TkfLBRACK
-      else
-        if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
-          tk_c = TkLBRACK
-        elsif @lex_state == EXPR_ARG && @space_seen
-          tk_c = TkLBRACK
-        else
-          tk_c = TkfLBRACK
-        end
-        @lex_state = EXPR_BEG
-      end
-      @indent_stack.push tk_c
-      Token(tk_c)
-    end
-
-    @OP.def_rule("{") do
-      |op, io|
-      @indent += 1
-      if @lex_state != EXPR_END && @lex_state != EXPR_ARG
-        tk_c = TkLBRACE
-      else
-        tk_c = TkfLBRACE
-      end
-      @lex_state = EXPR_BEG
-      @indent_stack.push tk_c
-      Token(tk_c)
-    end
-
-    @OP.def_rule('\\') do
-      |op, io|
-      if getc == "\n"
-        @space_seen = true
-        @continue = true
-        Token(TkSPACE)
-      else
-        read_escape
-        Token("\\")
-      end
-    end
-
-    @OP.def_rule('%') do
-      |op, io|
-      if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
-        identify_quotation
-      elsif peek(0) == '='
-        getc
-        Token(TkOPASGN, :%)
-      elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
-        identify_quotation
-      else
-        @lex_state = EXPR_BEG
-        Token("%") #))
-      end
-    end
-
-    @OP.def_rule('$') do
-      |op, io|
-      identify_gvar
-    end
-
-    @OP.def_rule('@') do
-      |op, io|
-      if peek(0) =~ /[\w@]/
-        ungetc
-        identify_identifier
-      else
-        Token("@")
-      end
-    end
-
-    @OP.def_rule("") do
-      |op, io|
-      printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
-      if peek(0) =~ /[0-9]/
-        t = identify_number
-      elsif peek(0) =~ /[^\x00-\/:-@\[-^`{-\x7F]/
-        t = identify_identifier
-      end
-      printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug?
-      t
-    end
-
-    p @OP if RubyLex.debug?
-  end
-
-  def identify_gvar
-    @lex_state = EXPR_END
-
-    case ch = getc
-    when /[~_*$?!@\/\\;,=:<>".]/   #"
-      Token(TkGVAR, "$" + ch)
-    when "-"
-      Token(TkGVAR, "$-" + getc)
-    when "&", "`", "'", "+"
-      Token(TkBACK_REF, "$"+ch)
-    when /[1-9]/
-      while getc =~ /[0-9]/; end
-      ungetc
-      Token(TkNTH_REF)
-    when /\w/
-      ungetc
-      ungetc
-      identify_identifier
-    else
-      ungetc
-      Token("$")
-    end
-  end
-
-  def identify_identifier
-    token = ""
-    if peek(0) =~ /[$@]/
-      token.concat(c = getc)
-      if c == "@" and peek(0) == "@"
-        token.concat getc
-      end
-    end
-
-    while (ch = getc) =~ /[^\x00-\/:-@\[-^`{-\x7F]/
-      print ":", ch, ":" if RubyLex.debug?
-      token.concat ch
-    end
-    ungetc
-
-    if (ch == "!" || ch == "?") && token[0,1] =~ /\w/ && peek(0) != "="
-      token.concat getc
-    end
-
-    # almost fix token
-
-    case token
-    when /^\$/
-      return Token(TkGVAR, token)
-    when /^\@\@/
-      @lex_state = EXPR_END
-      # p Token(TkCVAR, token)
-      return Token(TkCVAR, token)
-    when /^\@/
-      @lex_state = EXPR_END
-      return Token(TkIVAR, token)
-    end
-
-    if @lex_state != EXPR_DOT
-      print token, "\n" if RubyLex.debug?
-
-      token_c, *trans = TkReading2Token[token]
-      if token_c
-        # reserved word?
-
-        if (@lex_state != EXPR_BEG &&
-            @lex_state != EXPR_FNAME &&
-            trans[1])
-          # modifiers
-          token_c = TkSymbol2Token[trans[1]]
-          @lex_state = trans[0]
-        else
-          if @lex_state != EXPR_FNAME and peek(0) != ':'
-            if ENINDENT_CLAUSE.include?(token)
-              # check for ``class = val'' etc.
-              valid = true
-              case token
-              when "class"
-                valid = false unless peek_match?(/^\s*(<<|\w|::)/)
-              when "def"
-                valid = false if peek_match?(/^\s*(([+\-\/*&\|^]|<<|>>|\|\||\&\&)=|\&\&|\|\|)/)
-              when "do"
-                valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&)/)
-              when *ENINDENT_CLAUSE
-                valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&|\|)/)
-              else
-                # no nothing
-              end
-              if valid
-                if token == "do"
-                  if ![TkFOR, TkWHILE, TkUNTIL].include?(@indent_stack.last)
-                    @indent += 1
-                    @indent_stack.push token_c
-                  end
-                else
-                  @indent += 1
-                  @indent_stack.push token_c
-                end
-              end
-
-            elsif DEINDENT_CLAUSE.include?(token)
-              @indent -= 1
-              @indent_stack.pop
-            end
-            @lex_state = trans[0]
-          else
-            @lex_state = EXPR_END
-          end
-        end
-        return Token(token_c, token)
-      end
-    end
-
-    if @lex_state == EXPR_FNAME
-      @lex_state = EXPR_END
-      if peek(0) == '='
-        token.concat getc
-      end
-    elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
-      @lex_state = EXPR_ARG
-    else
-      @lex_state = EXPR_END
-    end
-
-    if token[0, 1] =~ /[A-Z]/
-      return Token(TkCONSTANT, token)
-    elsif token[token.size - 1, 1] =~ /[!?]/
-      return Token(TkFID, token)
-    else
-      return Token(TkIDENTIFIER, token)
-    end
-  end
-
-  def identify_here_document
-    ch = getc
-    if ch == "-" || ch == "~"
-      ch = getc
-      indent = true
-    end
-    if /['"`]/ =~ ch
-      lt = ch
-      quoted = ""
-      while (c = getc) && c != lt
-        quoted.concat c
-      end
-    else
-      lt = '"'
-      quoted = ch.dup
-      while (c = getc) && c =~ /\w/
-        quoted.concat c
-      end
-      ungetc
-    end
-
-    ltback, @ltype = @ltype, lt
-    reserve = []
-    while ch = getc
-      reserve.push ch
-      if ch == "\\"
-        reserve.push ch = getc
-      elsif ch == "\n"
-        break
-      end
-    end
-
-    @here_header = false
-
-    line = ""
-    while ch = getc
-      if ch == "\n"
-        if line == quoted
-          break
-        end
-        line = ""
-      else
-        line.concat ch unless indent && line == "" && /\s/ =~ ch
-        if @ltype != "'" && ch == "#" && peek(0) == "{"
-          identify_string_dvar
-        end
-      end
-    end
-
-    @here_header = true
-    @here_readed.concat reserve
-    while ch = reserve.pop
-      ungetc ch
-    end
-
-    @ltype = ltback
-    @lex_state = EXPR_END
-    Token(Ltype2Token[lt])
-  end
-
-  def identify_quotation
-    ch = getc
-    if lt = PERCENT_LTYPE[ch]
-      ch = getc
-    elsif ch =~ /\W/
-      lt = "\""
-    else
-      RubyLex.fail SyntaxError, "unknown type of %string"
-    end
-    @quoted = ch unless @quoted = PERCENT_PAREN[ch]
-    identify_string(lt, @quoted)
-  end
-
-  def identify_number
-    @lex_state = EXPR_END
-
-    if peek(0) == "0" && peek(1) !~ /[.eE]/
-      getc
-      case peek(0)
-      when /[xX]/
-        ch = getc
-        match = /[0-9a-fA-F_]/
-      when /[bB]/
-        ch = getc
-        match = /[01_]/
-      when /[oO]/
-        ch = getc
-        match = /[0-7_]/
-      when /[dD]/
-        ch = getc
-        match = /[0-9_]/
-      when /[0-7]/
-        match = /[0-7_]/
-      when /[89]/
-        RubyLex.fail SyntaxError, "Invalid octal digit"
-      else
-        return Token(TkINTEGER)
-      end
-
-      len0 = true
-      non_digit = false
-      while ch = getc
-        if match =~ ch
-          if ch == "_"
-            if non_digit
-              RubyLex.fail SyntaxError, "trailing `#{ch}' in number"
-            else
-              non_digit = ch
-            end
-          else
-            non_digit = false
-            len0 = false
-          end
-        else
-          ungetc
-          if len0
-            RubyLex.fail SyntaxError, "numeric literal without digits"
-          end
-          if non_digit
-            RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
-          end
-          break
-        end
-      end
-      return Token(TkINTEGER)
-    end
-
-    type = TkINTEGER
-    allow_point = true
-    allow_e = true
-    non_digit = false
-    while ch = getc
-      case ch
-      when /[0-9]/
-        non_digit = false
-      when "_"
-        non_digit = ch
-      when allow_point && "."
-        if non_digit
-          RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
-        end
-        type = TkFLOAT
-        if peek(0) !~ /[0-9]/
-          type = TkINTEGER
-          ungetc
-          break
-        end
-        allow_point = false
-      when allow_e && "e", allow_e && "E"
-        if non_digit
-          RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
-        end
-        type = TkFLOAT
-        if peek(0) =~ /[+-]/
-          getc
-        end
-        allow_e = false
-        allow_point = false
-        non_digit = ch
-      else
-        if non_digit
-          RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
-        end
-        ungetc
-        break
-      end
-    end
-    Token(type)
-  end
-
-  def identify_string(ltype, quoted = ltype)
-    @ltype = ltype
-    @quoted = quoted
-    subtype = nil
-    begin
-      nest = 0
-      while ch = getc
-        if @quoted == ch and nest == 0
-          break
-        elsif @ltype != "'" && ch == "#" && peek(0) == "{"
-          identify_string_dvar
-        elsif @ltype != "'" && @ltype != "]" && @ltype != ":" and ch == "#"
-          subtype = true
-        elsif ch == '\\' and @ltype == "'" #'
-          case ch = getc
-          when "\\", "\n", "'"
-          else
-            ungetc
-          end
-        elsif ch == '\\' #'
-          read_escape
-        end
-        if PERCENT_PAREN.values.include?(@quoted)
-          if PERCENT_PAREN[ch] == @quoted
-            nest += 1
-          elsif ch == @quoted
-            nest -= 1
-          end
-        end
-      end
-      if @ltype == "/"
-        while /[imxoesun]/ =~ peek(0)
-          getc
-        end
-      end
-      if subtype
-        Token(DLtype2Token[ltype])
-      else
-        Token(Ltype2Token[ltype])
-      end
-    ensure
-      @ltype = nil
-      @quoted = nil
-      @lex_state = EXPR_END
-    end
-  end
-
-  def identify_string_dvar
-    begin
-      getc
-
-      reserve_continue = @continue
-      reserve_ltype = @ltype
-      reserve_indent = @indent
-      reserve_indent_stack = @indent_stack
-      reserve_state = @lex_state
-      reserve_quoted = @quoted
-
-      @ltype = nil
-      @quoted = nil
-      @indent = 0
-      @indent_stack = []
-      @lex_state = EXPR_BEG
-
-      loop do
-        @continue = false
-        prompt
-        tk = token
-        if @ltype or @continue or @indent >= 0
-          next
-        end
-        break if tk.kind_of?(TkRBRACE)
-      end
-    ensure
-      @continue = reserve_continue
-      @ltype = reserve_ltype
-      @indent = reserve_indent
-      @indent_stack = reserve_indent_stack
-      @lex_state = reserve_state
-      @quoted = reserve_quoted
-    end
-  end
-
-  def identify_comment
-    @ltype = "#"
-
-    while ch = getc
-      if ch == "\n"
-        @ltype = nil
-        ungetc
-        break
-      end
-    end
-    return Token(TkCOMMENT)
-  end
-
-  def read_escape
-    case ch = getc
-    when "\n", "\r", "\f"
-    when "\\", "n", "t", "r", "f", "v", "a", "e", "b", "s" #"
-    when /[0-7]/
-      ungetc ch
-      3.times do
-        case ch = getc
-        when /[0-7]/
-        when nil
-          break
-        else
-          ungetc
-          break
-        end
-      end
-
-    when "x"
-      2.times do
-        case ch = getc
-        when /[0-9a-fA-F]/
-        when nil
-          break
-        else
-          ungetc
-          break
-        end
-      end
-
-    when "M"
-      if (ch = getc) != '-'
-        ungetc
-      else
-        if (ch = getc) == "\\" #"
-          read_escape
-        end
-      end
-
-    when "C", "c" #, "^"
-      if ch == "C" and (ch = getc) != "-"
-        ungetc
-      elsif (ch = getc) == "\\" #"
-        read_escape
-      end
-    else
-      # other characters
     end
   end
 end
diff --git a/test/irb/test_ruby-lex.rb b/test/irb/test_ruby-lex.rb
deleted file mode 100644
index b07b4a2eb6..0000000000
--- a/test/irb/test_ruby-lex.rb
+++ /dev/null
@@ -1,108 +0,0 @@
-# frozen_string_literal: false
-require 'test/unit'
-require 'irb/ruby-lex'
-require 'stringio'
-
-module TestIRB
-  class TestRubyLex < Test::Unit::TestCase
-    def setup
-      @scanner = RubyLex.new
-    end
-
-    def teardown
-      RubyLex.debug_level = 0
-    end
-
-    def test_set_input_proc
-      called = false
-      @scanner.set_input(nil) {called = true; nil}
-      @scanner.each_top_level_statement {}
-      assert(called)
-    end
-
-    def test_comment
-      assert_equal([["#\n", 1]], top_level_statement("#\n"))
-    end
-
-    def test_top_level_statement
-      result = top_level_statement("#{<<-"begin;"}#{<<~"end;"}")
-      begin;
-        begin
-        end
-        begin
-        end
-      end;
-      assert_equal([
-                     ["begin\n""end\n", 1],
-                     ["begin\n""end\n", 3],
-                   ],
-                   result)
-    end
-
-    def test_immature_statement
-      src = "if false\n"
-      assert_equal([[src, 1]], top_level_statement(src))
-    end
-
-    def test_prompt
-      prompts = []
-      @scanner.set_prompt {|*a|
-        a << @scanner.instance_variable_get(:@lex_state)
-        unless prompts.last == a
-          prompts << a
-        end
-      }
-      src, lineno = "#{<<-"begin;"}#{<<~'end;'}", __LINE__+1
-      begin;
-        #            #;# LTYPE:INDENT:CONTINUE
-        x            #;# -:0:-
-        x(           #;# -:0:-
-        )            #;# -:1:*
-        a \          #;# -:0:-
-                     #;# -:0:*
-        a;           #;# -:0:-
-        a            #;# -:0:-
-                     #;# -:0:-
-        a            #;# -:0:-
-        a =          #;# -:0:-
-          '          #;# -:0:*
-          '          #;# ':0:*
-        if false or  #;# -:0:-
-          true       #;# -:1:*
-          a          #;# -:1:-
-          "          #;# -:1:-
-          "          #;# ":1:-
-          begin      #;# -:1:-
-            a        #;# -:2:-
-            a        #;# -:2:-
-          end        #;# -:2:-
-        else         #;# -:1:-
-          nil        #;# -:1:-
-        end          #;# -:1:-
-      end;
-      top_level_statement(src.gsub(/[ \t]*#;#.*/, ''))
-      src.each_line.with_index(1) do |line, i|
-        p = prompts.shift
-        next unless /#;#\s*(?:-|(?<ltype>\S)):(?<indent>\d+):(?:(?<cont>\*)|-)(?:.*FIXME:(?<fixme>.*))?/ =~ line
-        indent = indent.to_i
-        cont = (fixme && /`continue'/.match?(fixme)) ^ cont
-        assert_equal([ltype, indent, cont, i], p[0..3], "#{lineno+i}:#{p[4]}: #{line}")
-      end
-    end
-
-    def top_level_statement(lines)
-      input = InputLines.new(lines, "r")
-      scanned = []
-      @scanner.set_input(input)
-      @scanner.each_top_level_statement {|*e|
-        scanned << e
-        yield(*e) if defined?(yield)
-      }
-      scanned
-    end
-
-    class InputLines < StringIO
-      alias encoding external_encoding
-    end
-  end
-end