2020-05-23 00:17:10 +03:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2019-10-29 17:08:37 +03:00
|
|
|
module DidYouMean
|
|
|
|
# spell checker for a dictionary that has a tree
|
|
|
|
# structure, see doc/tree_spell_checker_api.md
|
|
|
|
class TreeSpellChecker
|
2020-05-23 00:17:10 +03:00
|
|
|
attr_reader :dictionary, :separator, :augment
|
2019-10-29 17:08:37 +03:00
|
|
|
|
|
|
|
def initialize(dictionary:, separator: '/', augment: nil)
|
|
|
|
@dictionary = dictionary
|
|
|
|
@separator = separator
|
|
|
|
@augment = augment
|
|
|
|
end
|
|
|
|
|
|
|
|
def correct(input)
|
2020-05-23 00:17:10 +03:00
|
|
|
plausibles = plausible_dimensions(input)
|
|
|
|
return fall_back_to_normal_spell_check(input) if plausibles.empty?
|
2019-10-29 17:08:37 +03:00
|
|
|
|
2020-05-23 00:17:10 +03:00
|
|
|
suggestions = find_suggestions(input, plausibles)
|
|
|
|
return fall_back_to_normal_spell_check(input) if suggestions.empty?
|
2019-10-29 17:08:37 +03:00
|
|
|
|
2020-05-23 00:17:10 +03:00
|
|
|
suggestions
|
2019-10-29 17:08:37 +03:00
|
|
|
end
|
|
|
|
|
2020-05-23 00:17:10 +03:00
|
|
|
def dictionary_without_leaves
|
|
|
|
@dictionary_without_leaves ||= dictionary.map { |word| word.split(separator)[0..-2] }.uniq
|
2019-10-29 17:08:37 +03:00
|
|
|
end
|
|
|
|
|
2020-05-23 00:17:10 +03:00
|
|
|
def tree_depth
|
|
|
|
@tree_depth ||= dictionary_without_leaves.max { |a, b| a.size <=> b.size }.size
|
2019-10-29 17:08:37 +03:00
|
|
|
end
|
|
|
|
|
2020-05-23 00:17:10 +03:00
|
|
|
def dimensions
|
|
|
|
@dimensions ||= tree_depth.times.map do |index|
|
|
|
|
dictionary_without_leaves.map { |element| element[index] }.compact.uniq
|
|
|
|
end
|
2019-10-29 17:08:37 +03:00
|
|
|
end
|
|
|
|
|
2020-05-23 00:17:10 +03:00
|
|
|
def find_leaves(path)
|
|
|
|
path_with_separator = "#{path}#{separator}"
|
|
|
|
|
|
|
|
dictionary
|
|
|
|
.select {|str| str.include?(path_with_separator) }
|
|
|
|
.map {|str| str.gsub(path_with_separator, '') }
|
2019-10-29 17:08:37 +03:00
|
|
|
end
|
|
|
|
|
2020-05-23 00:17:10 +03:00
|
|
|
def plausible_dimensions(input)
|
|
|
|
input.split(separator)[0..-2]
|
|
|
|
.map
|
|
|
|
.with_index { |element, index| correct_element(dimensions[index], element) if dimensions[index] }
|
|
|
|
.compact
|
2019-10-29 17:08:37 +03:00
|
|
|
end
|
|
|
|
|
|
|
|
def possible_paths(states)
|
2020-05-23 00:17:10 +03:00
|
|
|
states.map { |state| state.join(separator) }
|
2019-10-29 17:08:37 +03:00
|
|
|
end
|
|
|
|
|
2020-05-23 00:17:10 +03:00
|
|
|
private
|
2019-10-29 17:08:37 +03:00
|
|
|
|
2020-05-23 00:17:10 +03:00
|
|
|
def find_suggestions(input, plausibles)
|
|
|
|
states = plausibles[0].product(*plausibles[1..-1])
|
|
|
|
paths = possible_paths(states)
|
|
|
|
leaf = input.split(separator).last
|
|
|
|
|
|
|
|
find_ideas(paths, leaf)
|
2019-10-29 17:08:37 +03:00
|
|
|
end
|
|
|
|
|
2020-05-23 00:17:10 +03:00
|
|
|
def fall_back_to_normal_spell_check(input)
|
|
|
|
return [] unless augment
|
|
|
|
|
|
|
|
::DidYouMean::SpellChecker.new(dictionary: dictionary).correct(input)
|
2019-10-29 17:08:37 +03:00
|
|
|
end
|
|
|
|
|
2020-05-23 00:17:10 +03:00
|
|
|
def find_ideas(paths, leaf)
|
|
|
|
paths.flat_map do |path|
|
|
|
|
names = find_leaves(path)
|
|
|
|
ideas = correct_element(names, leaf)
|
2019-10-29 17:08:37 +03:00
|
|
|
|
2020-05-23 00:17:10 +03:00
|
|
|
ideas_to_paths(ideas, leaf, names, path)
|
|
|
|
end.compact
|
2019-10-29 17:08:37 +03:00
|
|
|
end
|
|
|
|
|
2020-05-23 00:17:10 +03:00
|
|
|
def ideas_to_paths(ideas, leaf, names, path)
|
|
|
|
if ideas.empty?
|
|
|
|
nil
|
|
|
|
elsif names.include?(leaf)
|
|
|
|
["#{path}#{separator}#{leaf}"]
|
|
|
|
else
|
|
|
|
ideas.map {|str| "#{path}#{separator}#{str}" }
|
2019-10-29 17:08:37 +03:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-05-23 00:17:10 +03:00
|
|
|
def correct_element(names, element)
|
|
|
|
return names if names.size == 1
|
2019-10-29 17:08:37 +03:00
|
|
|
|
2020-05-23 00:17:10 +03:00
|
|
|
str = normalize(element)
|
2019-10-29 17:08:37 +03:00
|
|
|
|
2020-05-23 00:17:10 +03:00
|
|
|
return [str] if names.include?(str)
|
2019-10-29 17:08:37 +03:00
|
|
|
|
2020-05-23 00:17:10 +03:00
|
|
|
::DidYouMean::SpellChecker.new(dictionary: names).correct(str)
|
|
|
|
end
|
2019-10-29 17:08:37 +03:00
|
|
|
|
2020-05-23 00:17:10 +03:00
|
|
|
def normalize(str)
|
2019-10-29 17:08:37 +03:00
|
|
|
str.downcase!
|
2020-05-23 00:17:10 +03:00
|
|
|
str.tr!('@', ' ') if str.include?('@')
|
|
|
|
str
|
2019-10-29 17:08:37 +03:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|