зеркало из https://github.com/mozilla/gecko-dev.git
Add code comments from bug 242315 comment 17. r+sr=bzbarsky.
This commit is contained in:
Родитель
10f466a6e8
Коммит
bd087f358d
|
@ -207,7 +207,7 @@ class ConvertUTF8toUTF16
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A character sink (see |copy_string| in nsAlgorithm.h) for computing
|
* A character sink (see |copy_string| in nsAlgorithm.h) for computing
|
||||||
* the length of a UTF-8 string.
|
* the length of the UTF-16 string equivalent to a UTF-8 string.
|
||||||
*/
|
*/
|
||||||
class CalculateUTF8Length
|
class CalculateUTF8Length
|
||||||
{
|
{
|
||||||
|
@ -238,6 +238,16 @@ class CalculateUTF8Length
|
||||||
p += 3;
|
p += 3;
|
||||||
else if ( UTF8traits::is4byte(*p) ) {
|
else if ( UTF8traits::is4byte(*p) ) {
|
||||||
p += 4;
|
p += 4;
|
||||||
|
// Because a UTF-8 sequence of 4 bytes represents a codepoint
|
||||||
|
// greater than 0xFFFF, it will become a surrogate pair in the
|
||||||
|
// UTF-16 string, so add 1 more to mLength.
|
||||||
|
// This doesn't happen with is5byte and is6byte because they
|
||||||
|
// are illegal UTF-8 sequences (greater than 0x10FFFF) so get
|
||||||
|
// converted to a single replacement character.
|
||||||
|
//
|
||||||
|
// XXX: if the 4-byte sequence is an illegal non-shortest form,
|
||||||
|
// it also gets converted to a replacement character, so
|
||||||
|
// mLength will be off by one in this case.
|
||||||
++mLength;
|
++mLength;
|
||||||
}
|
}
|
||||||
else if ( UTF8traits::is5byte(*p) )
|
else if ( UTF8traits::is5byte(*p) )
|
||||||
|
|
Загрузка…
Ссылка в новой задаче