Fix UTF-16 LE and BE handling.

Thanks to John Lenton for the test cases.
This commit is contained in:
Gustavo Niemeyer 2015-12-01 13:41:20 -02:00
Родитель 53feefa255
Коммит bd61a856f8
2 изменённых файлов: 17 добавлений и 2 удалений

Просмотреть файл

@ -559,6 +559,18 @@ var unmarshalTests = []struct {
"a: []", "a: []",
&struct{ A []int }{[]int{}}, &struct{ A []int }{[]int{}},
}, },
// UTF-16-LE
{
"\xff\xfe\xf1\x00o\x00\xf1\x00o\x00:\x00 \x00v\x00e\x00r\x00y\x00 \x00y\x00e\x00s\x00\n\x00",
M{"ñoño":"very yes"},
},
// UTF-16-BE
{
"\xfe\xff\x00\xf1\x00o\x00\xf1\x00o\x00:\x00 \x00v\x00e\x00r\x00y\x00 \x00y\x00e\x00s\x00\n",
M{"ñoño":"very yes"},
},
} }
type M map[interface{}]interface{} type M map[interface{}]interface{}

Просмотреть файл

@ -247,7 +247,7 @@ func yaml_parser_update_buffer(parser *yaml_parser_t, length int) bool {
if parser.encoding == yaml_UTF16LE_ENCODING { if parser.encoding == yaml_UTF16LE_ENCODING {
low, high = 0, 1 low, high = 0, 1
} else { } else {
high, low = 1, 0 low, high = 1, 0
} }
// The UTF-16 encoding is not as simple as one might // The UTF-16 encoding is not as simple as one might
@ -357,23 +357,26 @@ func yaml_parser_update_buffer(parser *yaml_parser_t, length int) bool {
if value <= 0x7F { if value <= 0x7F {
// 0000 0000-0000 007F . 0xxxxxxx // 0000 0000-0000 007F . 0xxxxxxx
parser.buffer[buffer_len+0] = byte(value) parser.buffer[buffer_len+0] = byte(value)
buffer_len += 1
} else if value <= 0x7FF { } else if value <= 0x7FF {
// 0000 0080-0000 07FF . 110xxxxx 10xxxxxx // 0000 0080-0000 07FF . 110xxxxx 10xxxxxx
parser.buffer[buffer_len+0] = byte(0xC0 + (value >> 6)) parser.buffer[buffer_len+0] = byte(0xC0 + (value >> 6))
parser.buffer[buffer_len+1] = byte(0x80 + (value & 0x3F)) parser.buffer[buffer_len+1] = byte(0x80 + (value & 0x3F))
buffer_len += 2
} else if value <= 0xFFFF { } else if value <= 0xFFFF {
// 0000 0800-0000 FFFF . 1110xxxx 10xxxxxx 10xxxxxx // 0000 0800-0000 FFFF . 1110xxxx 10xxxxxx 10xxxxxx
parser.buffer[buffer_len+0] = byte(0xE0 + (value >> 12)) parser.buffer[buffer_len+0] = byte(0xE0 + (value >> 12))
parser.buffer[buffer_len+1] = byte(0x80 + ((value >> 6) & 0x3F)) parser.buffer[buffer_len+1] = byte(0x80 + ((value >> 6) & 0x3F))
parser.buffer[buffer_len+2] = byte(0x80 + (value & 0x3F)) parser.buffer[buffer_len+2] = byte(0x80 + (value & 0x3F))
buffer_len += 3
} else { } else {
// 0001 0000-0010 FFFF . 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx // 0001 0000-0010 FFFF . 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
parser.buffer[buffer_len+0] = byte(0xF0 + (value >> 18)) parser.buffer[buffer_len+0] = byte(0xF0 + (value >> 18))
parser.buffer[buffer_len+1] = byte(0x80 + ((value >> 12) & 0x3F)) parser.buffer[buffer_len+1] = byte(0x80 + ((value >> 12) & 0x3F))
parser.buffer[buffer_len+2] = byte(0x80 + ((value >> 6) & 0x3F)) parser.buffer[buffer_len+2] = byte(0x80 + ((value >> 6) & 0x3F))
parser.buffer[buffer_len+3] = byte(0x80 + (value & 0x3F)) parser.buffer[buffer_len+3] = byte(0x80 + (value & 0x3F))
buffer_len += 4
} }
buffer_len += width
parser.unread++ parser.unread++
} }