Bug 1471884 [wpt PR 11707] - Consolidated fixes to character encoders/decoders, a=testonly

Automatic update from web-platform-testsConsolidated fixes to character encoders/decoders (#11707)

* Fix EUC-JP reference decoder

The relevant step fixed by this pull request says "Return error."; thus, the rest of the process should continue with the next iteration, rather than run the rest of the handler for the given byte.

* Fix ISO-2022-JP reference implementation

The relevant step in the Encoding Standard says "Prepend _lead_ and _byte_ to _stream_." However, the two bytes are prepended in the wrong order in the reference implementation.  Note that under the Encoding Standard, "[w]hen one or more tokens are prepended to a stream, those tokens must be inserted, _in given order_, before the first token in the stream."  (Note that the code, at the time of this request, moves _lead_ to the front of the array, then moves _byte_ to the front of the array.)

There may be other issues like this elsewhere in the multiple-byte encoder reference implementations.

* Fix bug in EUC-KR reference implementation

Makes one "if" statement conditional rather than unconditional

* Fix bug in Shift_JIS encoder

* Fix bug in Big5 reference implementation

* Fix Shift_JIS reference decoder

To conform to the most recent Encoding Standard.

--

wpt-commits: 0589266b84851660a1c70efb916f5d34fa5ebd7c
wpt-pr: 11707
This commit is contained in:
Peter Occil 2018-07-30 14:56:06 +00:00 коммит произвёл moz-wptsync-bot
Родитель a37f593c10
Коммит faee564332
7 изменённых файлов: 25 добавлений и 24 удалений

Просмотреть файл

@ -569073,7 +569073,7 @@
"testharness"
],
"encoding/legacy-mb-japanese/euc-jp/eucjp-decoder.js": [
"3bd6d8db793696c1827c31990e3e40e5181d9cac",
"f0c9547639a3ab41a4b956c1d2afa2b1c0f27272",
"support"
],
"encoding/legacy-mb-japanese/euc-jp/eucjp-encode-form-cseucpkdfmtjapanese.html": [
@ -569213,7 +569213,7 @@
"testharness"
],
"encoding/legacy-mb-japanese/iso-2022-jp/iso2022jp-decoder.js": [
"3f9d6377755867c9b9b7d05ccaa88f459d0ca436",
"09c51e38cafd3c0a7a506d8ffa3fa96700576fb3",
"support"
],
"encoding/legacy-mb-japanese/iso-2022-jp/iso2022jp-encode-form-csiso2022jp.html": [
@ -569365,7 +569365,7 @@
"testharness"
],
"encoding/legacy-mb-japanese/shift_jis/sjis-decoder.js": [
"b62da9bead74bfa85fc74022aafbe5a56870e49d",
"290e214851483bd34f3c876271b3d42f1613603a",
"support"
],
"encoding/legacy-mb-japanese/shift_jis/sjis-encode-form-csshiftjis.html": [
@ -569489,7 +569489,7 @@
"support"
],
"encoding/legacy-mb-japanese/shift_jis/sjis-encoder.js": [
"16f0b721a8c8a5609e31725359d8d44ea566fb69",
"9383879790108e834d7e81c88a97ca86c2dad956",
"support"
],
"encoding/legacy-mb-japanese/shift_jis/sjis_chars-csshiftjis.html": [
@ -569609,7 +569609,7 @@
"testharness"
],
"encoding/legacy-mb-korean/euc-kr/euckr-decoder.js": [
"ee317acdc4c6ad8a737ac9f47a292015b0fab081",
"ac458864fe09dac0f63cf18e86db0f3e5a88c2bf",
"support"
],
"encoding/legacy-mb-korean/euc-kr/euckr-encode-form-cseuckr.html": [
@ -569857,7 +569857,7 @@
"testharness"
],
"encoding/legacy-mb-tchinese/big5/big5-decoder.js": [
"895fb1caf07ea90bea9d34db70b0974f11d6c149",
"dc90defd4da64abac6efbd1abf0aeadb9aef2b7b",
"support"
],
"encoding/legacy-mb-tchinese/big5/big5-enc-ascii.html": [

Просмотреть файл

@ -16,8 +16,9 @@ function eucjpDecoder(stream) {
stream = stream.replace(/%/g, " ");
stream = stream.replace(/[\s]+/g, " ").trim();
var bytes = stream.split(" ");
for (i = 0; i < bytes.length; i++) bytes[i] = parseInt(bytes[i], 16);
for (var i = 0; i < bytes.length; i++) bytes[i] = parseInt(bytes[i], 16);
var out = "";
var lead, byte, offset, ptr, cp;
var jis0212flag = false;
var eucjpLead = 0x00;
@ -68,6 +69,7 @@ function eucjpDecoder(stream) {
}
if (byte >= 0x00 && byte <= 0x7f) bytes.unshift(byte);
out += "<22>";
continue;
}
if (byte >= 0x00 && byte <= 0x7f) {
out += dec2char(byte);

Просмотреть файл

@ -25,7 +25,7 @@ function iso2022jpDecoder(stream) {
stream = stream.replace(/%/g, " ");
stream = stream.replace(/[\s]+/g, " ").trim();
var bytes = stream.split(" ");
for (i = 0; i < bytes.length; i++) bytes[i] = parseInt(bytes[i], 16);
for (var i = 0; i < bytes.length; i++) bytes[i] = parseInt(bytes[i], 16);
var endofstream = 2000000;
//bytes.push(endofstream)
var out = "";
@ -193,8 +193,8 @@ function iso2022jpDecoder(stream) {
continue;
}
}
bytes.unshift(lead);
bytes.unshift(byte);
// Prepend the sequence (lead, byte) to the stream
bytes.unshift(lead, byte);
outFlag = false;
decState = outState;
out += "<22>";

Просмотреть файл

@ -16,7 +16,7 @@ function sjisDecoder(stream) {
stream = stream.replace(/%/g, " ");
stream = stream.replace(/[\s]+/g, " ").trim();
var bytes = stream.split(" ");
for (i = 0; i < bytes.length; i++) bytes[i] = parseInt(bytes[i], 16);
for (var i = 0; i < bytes.length; i++) bytes[i] = parseInt(bytes[i], 16);
var out = "";
var lead, byte, leadoffset, offset, ptr, cp;
var sjisLead = 0x00;
@ -46,7 +46,7 @@ function sjisDecoder(stream) {
else leadoffset = 0xc1;
if ((byte >= 0x40 && byte <= 0x7e) || (byte >= 0x80 && byte <= 0xfc))
ptr = (lead - leadoffset) * 188 + byte - offset;
if (cp == null && ptr >= 8836 && ptr <= 10528) {
if (ptr != null && ptr >= 8836 && ptr <= 10715) {
out += dec2char(0xe000 + ptr - 8836);
continue;
}

Просмотреть файл

@ -50,7 +50,7 @@ function sjisEncoder(stream) {
var cp;
var finished = false;
var endofstream = 2000000;
var temp, offset, leadoffset, first, second;
while (!finished) {
if (cps.length == 0) cp = endofstream;
else cp = cps.shift();
@ -73,7 +73,7 @@ function sjisEncoder(stream) {
}
if (cp >= 0xff61 && cp <= 0xff9f) {
temp = cp - 0xff61 + 0xa1;
out += temp.toString(16).toUpperCase();
out += " " + temp.toString(16).toUpperCase();
continue;
}
if (cp == 0x2212) {

Просмотреть файл

@ -16,7 +16,7 @@ function euckrDecoder(stream) {
stream = stream.replace(/%/g, " ");
stream = stream.replace(/[\s]+/g, " ").trim();
var bytes = stream.split(" ");
for (i = 0; i < bytes.length; i++) bytes[i] = parseInt(bytes[i], 16);
for (var i = 0; i < bytes.length; i++) bytes[i] = parseInt(bytes[i], 16);
var out = "";
var lead, byte, offset, ptr, cp;
var euckrLead = 0x00;
@ -41,7 +41,7 @@ function euckrDecoder(stream) {
lead = euckrLead;
ptr = null;
euckrLead = 0x00;
if (byte >= 0x41 || byte <= 0xfe)
if (byte >= 0x41 && byte <= 0xfe)
ptr = (lead - 0x81) * 190 + (byte - 0x41);
if (ptr == null) cp = null;
else cp = euckr[ptr];

Просмотреть файл

@ -16,7 +16,7 @@ function big5Decoder(stream) {
stream = stream.replace(/%/g, " ");
stream = stream.replace(/[\s]+/g, " ").trim();
var bytes = stream.split(" ");
for (i = 0; i < bytes.length; i++) bytes[i] = parseInt(bytes[i], 16);
for (var i = 0; i < bytes.length; i++) bytes[i] = parseInt(bytes[i], 16);
var out = "";
var lead, byte, offset, ptr, cp;
var big5lead = 0x00;
@ -47,24 +47,23 @@ function big5Decoder(stream) {
ptr = (lead - 0x81) * 157 + (byte - offset);
// "If there is a row in the table below whose first column is pointer, return the two code points listed in its second column"
switch (ptr) {
case "1133":
case 1133:
out += "Ê̄";
continue;
case "1135":
case 1135:
out += "Ê̌";
continue;
case "1164":
case 1164:
out += "ê̄";
continue;
case "1166":
case 1166:
out += "ê̌";
continue;
}
if (ptr == null) cp = null;
else cp = big5[ptr];
if (cp == null && byte >= 0x00 && byte < 0x7f) {
if (cp == null && byte >= 0x00 && byte <= 0x7f) {
bytes.unshift(byte);
continue;
}
if (cp == null) {
out += "<22>";
@ -73,7 +72,7 @@ function big5Decoder(stream) {
out += dec2char(cp);
continue;
}
if (byte >= 0x00 && byte < 0x7f) {
if (byte >= 0x00 && byte <= 0x7f) {
out += dec2char(byte);
continue;
}