collations: fix sorting in UCA900 collations (#12555) (#12562)

* collations: fix sorting in UCA900 collations

When using the fast iterator to _compare_ two strings with an UCA
collation, we need to keep in mind that the weights in the collation are
in BIG ENDIAN (this is the output format for the weight strings, so we
store the weights this way), so comparing them directly will not result
in the proper collation order. They need to be byte-swapped before they
can be compared with an arithmetic operation!



* collations: comment



---------

Signed-off-by: Vicent Marti <vmg@strn.cat>
This commit is contained in:
Vicent Martí 2023-03-08 11:38:14 +01:00 коммит произвёл GitHub
Родитель af42116c8a
Коммит 01e4d8e364
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
1 изменённых файлов: 10 добавлений и 6 удалений

Просмотреть файл

@ -66,7 +66,7 @@ func (it *FastIterator900) FastForward32(it2 *FastIterator900) int {
p1 := it.input
p2 := it2.input
var w1, w2 uint32
var w1, w2 uint16
for len(p1) >= 4 && len(p2) >= 4 {
dword1 := *(*uint32)(unsafe.Pointer(&p1[0]))
@ -75,17 +75,20 @@ func (it *FastIterator900) FastForward32(it2 *FastIterator900) int {
if nonascii == 0 {
if dword1 != dword2 {
// Use the weight string fast tables for quick weight comparisons;
// see (*FastIterator900).NextWeightBlock64 for a description of
// the table format
table := it.fastTable
if w1, w2 = table[p1[0]], table[p2[0]]; w1 != w2 {
if w1, w2 = uint16(table[p1[0]]), uint16(table[p2[0]]); w1 != w2 {
goto mismatch
}
if w1, w2 = table[p1[1]], table[p2[1]]; w1 != w2 {
if w1, w2 = uint16(table[p1[1]]), uint16(table[p2[1]]); w1 != w2 {
goto mismatch
}
if w1, w2 = table[p1[2]], table[p2[2]]; w1 != w2 {
if w1, w2 = uint16(table[p1[2]]), uint16(table[p2[2]]); w1 != w2 {
goto mismatch
}
if w1, w2 = table[p1[3]], table[p2[3]]; w1 != w2 {
if w1, w2 = uint16(table[p1[3]]), uint16(table[p2[3]]); w1 != w2 {
goto mismatch
}
}
@ -114,7 +117,8 @@ mismatch:
it.unicode++
return 0
}
return int(w1) - int(w2)
// The weights must be byte-swapped before comparison because they're stored in big endian
return int(bits.ReverseBytes16(w1)) - int(bits.ReverseBytes16(w2))
}
// NextWeightBlock64 takes a byte slice of 16 bytes and fills it with the next