зеркало из https://github.com/github/ruby.git
85 строки
1.8 KiB
Ruby
85 строки
1.8 KiB
Ruby
module DidYouMean
|
|
module Jaro
|
|
module_function
|
|
|
|
def distance(str1, str2)
|
|
str1, str2 = str2, str1 if str1.length > str2.length
|
|
length1, length2 = str1.length, str2.length
|
|
|
|
m = 0.0
|
|
t = 0.0
|
|
range = length2 > 3 ? length2 / 2 - 1 : 0
|
|
flags1 = 0
|
|
flags2 = 0
|
|
|
|
# Avoid duplicating enumerable objects
|
|
str1_codepoints = str1.codepoints
|
|
str2_codepoints = str2.codepoints
|
|
|
|
i = 0
|
|
while i < length1
|
|
last = i + range
|
|
j = (i >= range) ? i - range : 0
|
|
|
|
while j <= last
|
|
if flags2[j] == 0 && str1_codepoints[i] == str2_codepoints[j]
|
|
flags2 |= (1 << j)
|
|
flags1 |= (1 << i)
|
|
m += 1
|
|
break
|
|
end
|
|
|
|
j += 1
|
|
end
|
|
|
|
i += 1
|
|
end
|
|
|
|
k = i = 0
|
|
while i < length1
|
|
if flags1[i] != 0
|
|
j = index = k
|
|
|
|
k = while j < length2
|
|
index = j
|
|
break(j + 1) if flags2[j] != 0
|
|
|
|
j += 1
|
|
end
|
|
|
|
t += 1 if str1_codepoints[i] != str2_codepoints[index]
|
|
end
|
|
|
|
i += 1
|
|
end
|
|
t = (t / 2).floor
|
|
|
|
m == 0 ? 0 : (m / length1 + m / length2 + (m - t) / m) / 3
|
|
end
|
|
end
|
|
|
|
module JaroWinkler
|
|
WEIGHT = 0.1
|
|
THRESHOLD = 0.7
|
|
|
|
module_function
|
|
|
|
def distance(str1, str2)
|
|
jaro_distance = Jaro.distance(str1, str2)
|
|
|
|
if jaro_distance > THRESHOLD
|
|
codepoints2 = str2.codepoints
|
|
prefix_bonus = 0
|
|
|
|
str1.each_codepoint do |char1|
|
|
char1 == codepoints2[prefix_bonus] && prefix_bonus < 4 ? prefix_bonus += 1 : break
|
|
end
|
|
|
|
jaro_distance + (prefix_bonus * WEIGHT * (1 - jaro_distance))
|
|
else
|
|
jaro_distance
|
|
end
|
|
end
|
|
end
|
|
end
|