Add code comments from bug 242315 comment 17. r+sr=bzbarsky.

This commit is contained in:
smontagu%smontagu.org 2004-05-07 22:21:26 +00:00
Родитель 10f466a6e8
Коммит bd087f358d
1 изменённых файлов: 11 добавлений и 1 удалений

Просмотреть файл

@ -207,7 +207,7 @@ class ConvertUTF8toUTF16
/** /**
* A character sink (see |copy_string| in nsAlgorithm.h) for computing * A character sink (see |copy_string| in nsAlgorithm.h) for computing
* the length of a UTF-8 string. * the length of the UTF-16 string equivalent to a UTF-8 string.
*/ */
class CalculateUTF8Length class CalculateUTF8Length
{ {
@ -238,6 +238,16 @@ class CalculateUTF8Length
p += 3; p += 3;
else if ( UTF8traits::is4byte(*p) ) { else if ( UTF8traits::is4byte(*p) ) {
p += 4; p += 4;
// Because a UTF-8 sequence of 4 bytes represents a codepoint
// greater than 0xFFFF, it will become a surrogate pair in the
// UTF-16 string, so add 1 more to mLength.
// This doesn't happen with is5byte and is6byte because they
// are illegal UTF-8 sequences (greater than 0x10FFFF) so get
// converted to a single replacement character.
//
// XXX: if the 4-byte sequence is an illegal non-shortest form,
// it also gets converted to a replacement character, so
// mLength will be off by one in this case.
++mLength; ++mLength;
} }
else if ( UTF8traits::is5byte(*p) ) else if ( UTF8traits::is5byte(*p) )