Bug 754824 - The highlight is off by a few characters in the search result view when some characters are UTF8 encoded on 4 bytes - Follow-up to only match UTF-16 surrogate halves, r=asuth.

This commit is contained in:
Florian Quèze 2012-06-28 16:48:14 +02:00
Родитель f2bd2e409e
Коммит f7e759c416
1 изменённых файлов: 8 добавлений и 5 удалений

Просмотреть файл

@ -834,11 +834,14 @@ ircAccount.prototype = {
// Count the number of bytes in a UTF-8 encoded string.
function charCodeToByteCount(c) {
// Unicode characters with a code point > 127 are 2 bytes long.
// Unicode characters with a code point > 2047 are 3 bytes long.
// Unicode characters with a code point >= 32768 are on 4 bytes,
// split by JS to 2 UTF16 characters of 2 bytes.
return c < 128 ? 1 : (c < 2048 || c >= 32768) ? 2 : 3;
// UTF-8 stores:
// - code points below U+0080 on 1 byte,
// - code points below U+0800 on 2 bytes,
// - code points U+D800 through U+DFFF are UTF-16 surrogate halves
// (they indicate that JS has split a 4 bytes UTF-8 character
// in two halves of 2 bytes each),
// - other code points on 3 bytes.
return c < 0x80 ? 1 : (c < 0x800 || (c >= 0xD800 && c <= 0xDFFF)) ? 2 : 3;
}
let bytes = 0;
for (let i = 0; i < aStr.length; i++)