ruby/lib/rdoc/markup/attribute_manager.rb

# frozen_string_literal: true
##
# Manages changes of attributes in a block of text

class RDoc::Markup::AttributeManager

  ##
  # The NUL character

  NULL = "\000".freeze

  #--
  # We work by substituting non-printing characters in to the text. For now
  # I'm assuming that I can substitute a character in the range 0..8 for a 7
  # bit character without damaging the encoded string, but this might be
  # optimistic
  #++

  A_PROTECT = 004 # :nodoc:

  ##
  # Special mask character to prevent inline markup handling

  PROTECT_ATTR = A_PROTECT.chr # :nodoc:

  ##
  # The attributes enabled for this markup object.

  attr_reader :attributes

  ##
  # This maps delimiters that occur around words (such as *bold* or +tt+)
  # where the start and end delimiters and the same. This lets us optimize
  # the regexp

  attr_reader :matching_word_pairs

  ##
  # And this is used when the delimiters aren't the same. In this case the
  # hash maps a pattern to the attribute character

  attr_reader :word_pair_map

  ##
  # This maps HTML tags to the corresponding attribute char

  attr_reader :html_tags

  ##
  # A \ in front of a character that would normally be processed turns off
  # processing. We do this by turning \< into <#{PROTECT}

  attr_reader :protectable

  ##
  # And this maps _regexp handling_ sequences to a name. A regexp handling
  # sequence is something like a WikiWord

  attr_reader :regexp_handlings

  ##
  # Creates a new attribute manager that understands bold, emphasized and
  # teletype text.

  def initialize
    @html_tags = {}
    @matching_word_pairs = {}
    @protectable = %w[<]
    @regexp_handlings = []
    @word_pair_map = {}
    @attributes = RDoc::Markup::Attributes.new

    add_word_pair "*", "*", :BOLD
    add_word_pair "_", "_", :EM
    add_word_pair "+", "+", :TT

    add_html "em", :EM
    add_html "i",  :EM
    add_html "b",  :BOLD
    add_html "tt",   :TT
    add_html "code", :TT
  end

  ##
  # Return an attribute object with the given turn_on and turn_off bits set

  def attribute(turn_on, turn_off)
    RDoc::Markup::AttrChanger.new turn_on, turn_off
  end

  ##
  # Changes the current attribute from +current+ to +new+

  def change_attribute current, new
    diff = current ^ new
    attribute(new & diff, current & diff)
  end

  ##
  # Used by the tests to change attributes by name from +current_set+ to
  # +new_set+

  def changed_attribute_by_name current_set, new_set
    current = new = 0
    current_set.each do |name|
      current |= @attributes.bitmap_for(name)
    end

    new_set.each do |name|
      new |= @attributes.bitmap_for(name)
    end

    change_attribute(current, new)
  end

  ##
  # Copies +start_pos+ to +end_pos+ from the current string

  def copy_string(start_pos, end_pos)
    res = @str[start_pos...end_pos]
    res.gsub!(/\000/, '')
    res
  end

  ##
  # Map attributes like <b>text</b>to the sequence
  # \001\002<char>\001\003<char>, where <char> is a per-attribute specific
  # character

  def convert_attrs(str, attrs)
    # first do matching ones
    tags = @matching_word_pairs.keys.join("")

    re = /(^|\W)([#{tags}])([#\\]?[\w:.\/-]+?\S?)\2(\W|$)/

    1 while str.gsub!(re) do
      attr = @matching_word_pairs[$2]
      attrs.set_attrs($`.length + $1.length + $2.length, $3.length, attr)
      $1 + NULL * $2.length + $3 + NULL * $2.length + $4
    end

    # then non-matching
    unless @word_pair_map.empty? then
      @word_pair_map.each do |regexp, attr|
        str.gsub!(regexp) {
          attrs.set_attrs($`.length + $1.length, $2.length, attr)
          NULL * $1.length + $2 + NULL * $3.length
        }
      end
    end
  end

  ##
  # Converts HTML tags to RDoc attributes

  def convert_html(str, attrs)
    tags = @html_tags.keys.join '|'

    1 while str.gsub!(/<(#{tags})>(.*?)<\/\1>/i) {
      attr = @html_tags[$1.downcase]
      html_length = $1.length + 2
      seq = NULL * html_length
      attrs.set_attrs($`.length + html_length, $2.length, attr)
      seq + $2 + seq + NULL
    }
  end

  ##
  # Converts regexp handling sequences to RDoc attributes

  def convert_regexp_handlings str, attrs
    @regexp_handlings.each do |regexp, attribute|
      str.scan(regexp) do
        capture = $~.size == 1 ? 0 : 1

        s, e = $~.offset capture

        attrs.set_attrs s, e - s, attribute | @attributes.regexp_handling
      end
    end
  end

  ##
  # Escapes regexp handling sequences of text to prevent conversion to RDoc

  def mask_protected_sequences
    # protect __send__, __FILE__, etc.
    @str.gsub!(/__([a-z]+)__/i,
      "_#{PROTECT_ATTR}_#{PROTECT_ATTR}\\1_#{PROTECT_ATTR}_#{PROTECT_ATTR}")
    @str.gsub!(/(\A|[^\\])\\([#{Regexp.escape @protectable.join}])/m,
               "\\1\\2#{PROTECT_ATTR}")
    @str.gsub!(/\\(\\[#{Regexp.escape @protectable.join}])/m, "\\1")
  end

  ##
  # Unescapes regexp handling sequences of text

  def unmask_protected_sequences
    @str.gsub!(/(.)#{PROTECT_ATTR}/, "\\1\000")
  end

  ##
  # Adds a markup class with +name+ for words wrapped in the +start+ and
  # +stop+ character.  To make words wrapped with "*" bold:
  #
  #   am.add_word_pair '*', '*', :BOLD

  def add_word_pair(start, stop, name)
    raise ArgumentError, "Word flags may not start with '<'" if
      start[0,1] == '<'

    bitmap = @attributes.bitmap_for name

    if start == stop then
      @matching_word_pairs[start] = bitmap
    else
      pattern = /(#{Regexp.escape start})(\S+)(#{Regexp.escape stop})/
      @word_pair_map[pattern] = bitmap
    end

    @protectable << start[0,1]
    @protectable.uniq!
  end

  ##
  # Adds a markup class with +name+ for words surrounded by HTML tag +tag+.
  # To process emphasis tags:
  #
  #   am.add_html 'em', :EM

  def add_html(tag, name)
    @html_tags[tag.downcase] = @attributes.bitmap_for name
  end

  ##
  # Adds a regexp handling for +pattern+ with +name+.  A simple URL handler
  # would be:
  #
  #   @am.add_regexp_handling(/((https?:)\S+\w)/, :HYPERLINK)

  def add_regexp_handling pattern, name
    @regexp_handlings << [pattern, @attributes.bitmap_for(name)]
  end

  ##
  # Processes +str+ converting attributes, HTML and regexp handlings

  def flow str
    @str = str.dup

    mask_protected_sequences

    @attrs = RDoc::Markup::AttrSpan.new @str.length

    convert_attrs            @str, @attrs
    convert_html             @str, @attrs
    convert_regexp_handlings @str, @attrs

    unmask_protected_sequences

    split_into_flow
  end

  ##
  # Debug method that prints a string along with its attributes

  def display_attributes
    puts
    puts @str.tr(NULL, "!")
    bit = 1
    16.times do |bno|
      line = ""
      @str.length.times do |i|
        if (@attrs[i] & bit) == 0
          line << " "
        else
          if bno.zero?
            line << "S"
          else
            line << ("%d" % (bno+1))
          end
        end
      end
      puts(line) unless line =~ /^ *$/
      bit <<= 1
    end
  end

  ##
  # Splits the string into chunks by attribute change

  def split_into_flow
    res = []
    current_attr = 0

    str_len = @str.length

    # skip leading invisible text
    i = 0
    i += 1 while i < str_len and @str[i].chr == "\0"
    start_pos = i

    # then scan the string, chunking it on attribute changes
    while i < str_len
      new_attr = @attrs[i]
      if new_attr != current_attr
        if i > start_pos
          res << copy_string(start_pos, i)
          start_pos = i
        end

        res << change_attribute(current_attr, new_attr)
        current_attr = new_attr

        if (current_attr & @attributes.regexp_handling) != 0 then
          i += 1 while
            i < str_len and (@attrs[i] & @attributes.regexp_handling) != 0

          res << RDoc::Markup::RegexpHandling.new(current_attr,
                                                  copy_string(start_pos, i))
          start_pos = i
          next
        end
      end

      # move on, skipping any invisible characters
      begin
        i += 1
      end while i < str_len and @str[i].chr == "\0"
    end

    # tidy up trailing text
    if start_pos < str_len
      res << copy_string(start_pos, str_len)
    end

    # and reset to all attributes off
    res << change_attribute(current_attr, 0) if current_attr != 0

    res
  end

end