Bug 1495067 - Make HasRTLChars() consider Hebrew presentation forms as RTL (again) and not consider U+FEFF as RTL (again). r=jfkthame

* Update encoding_rs to 0.8.8. * Change U+FEFD and U+FEFE to RTL in IS_RTL_PRESENTATION_FORM to make the Rust and C++ code agree on what's RTL. MozReview-Commit-ID: CuK6fN4pojG Differential Revision: https://phabricator.services.mozilla.com/D7285 --HG-- extra : moz-landing-system : lando
2018-10-02 14:24:15 +00:00 · 2018-10-02 14:24:15 +00:00 · b8545f2cf8
--- a/Cargo.lock
+++ b/Cargo.lock
@ -726,21 +726,21 @@ name = "encoding_c"
 version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
- "encoding_rs 0.8.7 (registry+https://github.com/rust-lang/crates.io-index)",
+ "encoding_rs 0.8.8 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

 [[package]]
 name = "encoding_glue"
 version = "0.1.0"
 dependencies = [
- "encoding_rs 0.8.7 (registry+https://github.com/rust-lang/crates.io-index)",
+ "encoding_rs 0.8.8 (registry+https://github.com/rust-lang/crates.io-index)",
 "nserror 0.1.0",
 "nsstring 0.1.0",
 ]

 [[package]]
 name = "encoding_rs"
-version = "0.8.7"
+version = "0.8.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1604,7 +1604,7 @@ name = "nsstring"
 version = "0.1.0"
 dependencies = [
 "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
- "encoding_rs 0.8.7 (registry+https://github.com/rust-lang/crates.io-index)",
+ "encoding_rs 0.8.8 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

 [[package]]
@ -3069,7 +3069,7 @@ dependencies = [
 "checksum either 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18785c1ba806c258137c937e44ada9ee7e69a37e3c72077542cd2f069d78562a"
 "checksum ena 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "cabe5a5078ac8c506d3e4430763b1ba9b609b1286913e7d08e581d1c2de9b7e5"
 "checksum encoding_c 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "769ecb8b33323998e482b218c0d13cd64c267609023b4b7ec3ee740714c318ee"
-"checksum encoding_rs 0.8.7 (registry+https://github.com/rust-lang/crates.io-index)" = "21a550ec129ca2f8593227888625c7c5331c6ad878e2cee6b7ac25e1c7d05746"
+"checksum encoding_rs 0.8.8 (registry+https://github.com/rust-lang/crates.io-index)" = "cc9945e460ad969220c1061b9574fb02ed097c6f0704ce2f3e336cb443c40c73"
 "checksum env_logger 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0561146661ae44c579e993456bc76d11ce1e0c7d745e57b2fa7146b6e49fa2ad"
 "checksum error-chain 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ff511d5dc435d703f4971bc399647c9bc38e20cb41452e3b9feb4765419ed3f3"
 "checksum euclid 0.19.0 (registry+https://github.com/rust-lang/crates.io-index)" = "70a2ebdf55fb9d6329046e026329a55ef8fbaae5ea833f56e170beb3125a8a5f"
--- a/intl/unicharutil/util/nsBidiUtils.h
+++ b/intl/unicharutil/util/nsBidiUtils.h
@ -265,7 +265,7 @@ typedef enum nsCharType nsCharType;

 #define IS_IN_BMP_RTL_BLOCK(c) ((0x590 <= (c)) && ((c) <= 0x8ff))
 #define IS_RTL_PRESENTATION_FORM(c) (((0xfb1d <= (c)) && ((c) <= 0xfdff)) || \
-                                     ((0xfe70 <= (c)) && ((c) <= 0xfefc)))
+                                     ((0xfe70 <= (c)) && ((c) <= 0xfefe)))
 #define IS_IN_SMP_RTL_BLOCK(c) (((0x10800 <= (c)) && ((c) <= 0x10fff)) || \
                                ((0x1e800 <= (c)) && ((c) <= 0x1eFFF)))
 // Due to the supplementary-plane RTL blocks being identifiable from the
--- a/testing/web-platform/meta/css/selectors/floating-first-letter-05d0.html.ini
+++ b/testing/web-platform/meta/css/selectors/floating-first-letter-05d0.html.ini
@ -0,0 +1,5 @@
+[floating-first-letter-05d0.html]
+    type: reftest
+    disabled: https://bugzilla.mozilla.org/show_bug.cgi?id=1495674
+    expected:
+        FAIL
--- a/testing/web-platform/tests/css/selectors/floating-first-letter-05d0.html
+++ b/testing/web-platform/tests/css/selectors/floating-first-letter-05d0.html
@ -0,0 +1,20 @@
+<!doctype html>
+<meta charset=utf-8>
+<title>Drop cap with U+05D0 in the document</title>
+<meta name="assert" content="The text placement within :first-line should not be affected by later presence of a right-to-left character.">
+<link rel=help href=https://drafts.csswg.org/css-pseudo-4/#first-line-styling>
+<link rel=match href=/css/selectors/floating-first-letter-ref.html>
+<style>
+  p:first-line {
+    background: lightblue;
+  }
+
+  p::first-letter {
+    float: left;
+    font-size: 4rem;
+  }
+  div {
+  	color: transparent;
+  }
+</style>
+<p>Ab</p><div>&#x05D0;</div>
--- a/testing/web-platform/tests/css/selectors/floating-first-letter-feff.html
+++ b/testing/web-platform/tests/css/selectors/floating-first-letter-feff.html
@ -0,0 +1,17 @@
+<!doctype html>
+<meta charset=utf-8>
+<title>Drop cap with U+FEFF in the document</title>
+<meta name="assert" content="The text placement within :first-line should not be affected by later presence of U+FEFF.">
+<link rel=help href=https://drafts.csswg.org/css-pseudo-4/#first-line-styling>
+<link rel=match href=/css/selectors/floating-first-letter-ref.html>
+<style>
+  p:first-line {
+    background: lightblue;
+  }
+
+  p::first-letter {
+    float: left;
+    font-size: 4rem;
+  }
+</style>
+<p>Ab</p>&#xFEFF;
--- a/testing/web-platform/tests/css/selectors/floating-first-letter-ref.html
+++ b/testing/web-platform/tests/css/selectors/floating-first-letter-ref.html
@ -0,0 +1,14 @@
+<!doctype html>
+<meta charset=utf-8>
+<title>Drop cap with no bidi in the document</title>
+<style>
+  p:first-line {
+    background: lightblue;
+  }
+
+  p::first-letter {
+    float: left;
+    font-size: 4rem;
+  }
+</style>
+<p>Ab</p>
--- a/third_party/rust/encoding_rs/.cargo-checksum.json
+++ b/third_party/rust/encoding_rs/.cargo-checksum.json
--- a/third_party/rust/encoding_rs/.cargo_vcs_info.json
+++ b/third_party/rust/encoding_rs/.cargo_vcs_info.json
@ -1,5 +1,5 @@
 {
  "git": {
-    "sha1": "b67c60025bfebbf186e8b22f03edc9b6dc96df59"
+    "sha1": "ffe863483bcfeb069edf645738ac3650899d2801"
  }
 }
--- a/third_party/rust/encoding_rs/Cargo.toml
+++ b/third_party/rust/encoding_rs/Cargo.toml
@ -12,7 +12,7 @@

 [package]
 name = "encoding_rs"
-version = "0.8.7"
+version = "0.8.8"
 authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
 description = "A Gecko-oriented implementation of the Encoding Standard"
 homepage = "https://docs.rs/encoding_rs/"
--- a/third_party/rust/encoding_rs/README.md
+++ b/third_party/rust/encoding_rs/README.md
@ -244,6 +244,14 @@ used in Firefox.

 ## Release Notes

+### 0.8.8
+
+* Made the `is_foo_bidi()` not treat U+FEFF (ZERO WIDTH NO-BREAK SPACE
+  aka. BYTE ORDER MARK) as right-to-left.
+* Made the `is_foo_bidi()` functions report `true` if the input contains
+  Hebrew presentations forms (which are right-to-left but not in a
+  right-to-left-roadmapped block).
+
 ### 0.8.7

 * Fixed a panic in the UTF-16LE/UTF-16BE decoder when decoding to UTF-8.
--- a/third_party/rust/encoding_rs/src/lib.rs
+++ b/third_party/rust/encoding_rs/src/lib.rs
@ -8,7 +8,7 @@
 // except according to those terms.

 #![cfg_attr(feature = "cargo-clippy", allow(doc_markdown, inline_always, new_ret_no_self))]
-#![doc(html_root_url = "https://docs.rs/encoding_rs/0.8.7")]
+#![doc(html_root_url = "https://docs.rs/encoding_rs/0.8.8")]

 //! encoding_rs is a Gecko-oriented Free Software / Open Source implementation
 //! of the [Encoding Standard](https://encoding.spec.whatwg.org/) in Rust.
--- a/third_party/rust/encoding_rs/src/mem.rs
+++ b/third_party/rust/encoding_rs/src/mem.rs
@ -669,11 +669,14 @@ pub fn is_utf16_latin1(buffer: &[u16]) -> bool {
 /// that trigger right-to-left processing.
 ///
 /// The check is done on a Unicode block basis without regard to assigned
-/// vs. unassigned code points in the block. Additionally, the four
-/// RIGHT-TO-LEFT FOO controls in General Punctuation are checked for.
-/// Control characters that are technically bidi controls but do not cause
-/// right-to-left behavior without the presence of right-to-left characters
-/// or right-to-left controls are not checked for.
+/// vs. unassigned code points in the block. Hebrew presentation forms in
+/// the Alphabetic Presentation Forms block are treated as if they formed
+/// a block on their own (i.e. it treated as right-to-left). Additionally,
+/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
+/// for. Control characters that are technically bidi controls but do not
+/// cause right-to-left behavior without the presence of right-to-left
+/// characters or right-to-left controls are not checked for. As a special
+/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
 ///
 /// Returns `true` if the input is invalid UTF-8 or the input contains an
 /// RTL character. Returns `false` if the input is valid UTF-8 and contains
@ -699,15 +702,15 @@ pub fn is_utf8_bidi(buffer: &[u8]) -> bool {
    // U+202E: E2 80 AE
    // U+2067: E2 81 A7
    //
-    // U+FB4F: EF AD 8F
-    // U+FB50: EF AD 90
+    // U+FB1C: EF AC 9C
+    // U+FB1D: EF AC 9D
    // U+FDFF: EF B7 BF
    // U+FE00: EF B8 80
    //
    // U+FE6F: EF B9 AF
    // U+FE70: EF B9 B0
+    // U+FEFE: EF BB BE
    // U+FEFF: EF BB BF
-    // U+FF00: EF BC 80
    //
    // U+107FF: F0 90 9F BF
    // U+10800: F0 90 A0 80
@ -797,9 +800,9 @@ pub fn is_utf8_bidi(buffer: &[u8]) -> bool {
                            {
                                return true;
                            }
-                            if in_inclusive_range8(second, 0xAD, 0xB7) {
-                                if second == 0xAD {
-                                    if third > 0x8F {
+                            if in_inclusive_range8(second, 0xAC, 0xB7) {
+                                if second == 0xAC {
+                                    if third > 0x9C {
                                        return true;
                                    }
                                } else {
@ -810,6 +813,10 @@ pub fn is_utf8_bidi(buffer: &[u8]) -> bool {
                                    if third > 0xAF {
                                        return true;
                                    }
+                                } else if second == 0xBB {
+                                    if third != 0xBF {
+                                        return true;
+                                    }
                                } else {
                                    return true;
                                }
@ -1013,9 +1020,9 @@ pub fn is_utf8_bidi(buffer: &[u8]) -> bool {
                    {
                        return true;
                    }
-                    if in_inclusive_range8(second, 0xAD, 0xB7) {
-                        if second == 0xAD {
-                            if third > 0x8F {
+                    if in_inclusive_range8(second, 0xAC, 0xB7) {
+                        if second == 0xAC {
+                            if third > 0x9C {
                                return true;
                            }
                        } else {
@ -1026,6 +1033,10 @@ pub fn is_utf8_bidi(buffer: &[u8]) -> bool {
                            if third > 0xAF {
                                return true;
                            }
+                        } else if second == 0xBB {
+                            if third != 0xBF {
+                                return true;
+                            }
                        } else {
                            return true;
                        }
@ -1083,11 +1094,14 @@ pub fn is_utf8_bidi(buffer: &[u8]) -> bool {
 /// right-to-left processing.
 ///
 /// The check is done on a Unicode block basis without regard to assigned
-/// vs. unassigned code points in the block. Additionally, the four
-/// RIGHT-TO-LEFT FOO controls in General Punctuation are checked for.
-/// Control characters that are technically bidi controls but do not cause
-/// right-to-left behavior without the presence of right-to-left characters
-/// or right-to-left controls are not checked for.
+/// vs. unassigned code points in the block. Hebrew presentation forms in
+/// the Alphabetic Presentation Forms block are treated as if they formed
+/// a block on their own (i.e. it treated as right-to-left). Additionally,
+/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
+/// for. Control characters that are technically bidi controls but do not
+/// cause right-to-left behavior without the presence of right-to-left
+/// characters or right-to-left controls are not checked for. As a special
+/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
 #[inline]
 pub fn is_str_bidi(buffer: &str) -> bool {
    // U+058F: D6 8F
@ -1100,15 +1114,15 @@ pub fn is_str_bidi(buffer: &str) -> bool {
    // U+202E: E2 80 AE
    // U+2067: E2 81 A7
    //
-    // U+FB4F: EF AD 8F
-    // U+FB50: EF AD 90
+    // U+FB1C: EF AC 9C
+    // U+FB1D: EF AC 9D
    // U+FDFF: EF B7 BF
    // U+FE00: EF B8 80
    //
    // U+FE6F: EF B9 AF
    // U+FE70: EF B9 B0
+    // U+FEFE: EF BB BE
    // U+FEFF: EF BB BF
-    // U+FF00: EF BC 80
    //
    // U+107FF: F0 90 9F BF
    // U+10800: F0 90 A0 80
@ -1178,10 +1192,10 @@ pub fn is_str_bidi(buffer: &str) -> bool {
                            }
                        } else {
                            debug_assert_eq!(byte, 0xEF);
-                            if in_inclusive_range8(second, 0xAD, 0xB7) {
-                                if second == 0xAD {
+                            if in_inclusive_range8(second, 0xAC, 0xB7) {
+                                if second == 0xAC {
                                    let third = bytes[read + 2];
-                                    if third > 0x8F {
+                                    if third > 0x9C {
                                        return true;
                                    }
                                } else {
@ -1193,6 +1207,11 @@ pub fn is_str_bidi(buffer: &str) -> bool {
                                    if third > 0xAF {
                                        return true;
                                    }
+                                } else if second == 0xBB {
+                                    let third = bytes[read + 2];
+                                    if third != 0xBF {
+                                        return true;
+                                    }
                                } else {
                                    return true;
                                }
@ -1230,11 +1249,14 @@ pub fn is_str_bidi(buffer: &str) -> bool {
 /// right-to-left processing.
 ///
 /// The check is done on a Unicode block basis without regard to assigned
-/// vs. unassigned code points in the block. Additionally, the four
-/// RIGHT-TO-LEFT FOO controls in General Punctuation are checked for.
-/// Control characters that are technically bidi controls but do not cause
-/// right-to-left behavior without the presence of right-to-left characters
-/// or right-to-left controls are not checked for.
+/// vs. unassigned code points in the block. Hebrew presentation forms in
+/// the Alphabetic Presentation Forms block are treated as if they formed
+/// a block on their own (i.e. it treated as right-to-left). Additionally,
+/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
+/// for. Control characters that are technically bidi controls but do not
+/// cause right-to-left behavior without the presence of right-to-left
+/// characters or right-to-left controls are not checked for. As a special
+/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
 ///
 /// Returns `true` if the input contains an RTL character or an unpaired
 /// high surrogate that could be the high half of an RTL character.
@ -1248,11 +1270,14 @@ pub fn is_utf16_bidi(buffer: &[u16]) -> bool {
 /// Checks whether a code point triggers right-to-left processing.
 ///
 /// The check is done on a Unicode block basis without regard to assigned
-/// vs. unassigned code points in the block. Additionally, the four
-/// RIGHT-TO-LEFT FOO controls in General Punctuation are checked for.
-/// Control characters that are technically bidi controls but do not cause
-/// right-to-left behavior without the presence of right-to-left characters
-/// or right-to-left controls are not checked for.
+/// vs. unassigned code points in the block. Hebrew presentation forms in
+/// the Alphabetic Presentation Forms block are treated as if they formed
+/// a block on their own (i.e. it treated as right-to-left). Additionally,
+/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
+/// for. Control characters that are technically bidi controls but do not
+/// cause right-to-left behavior without the presence of right-to-left
+/// characters or right-to-left controls are not checked for. As a special
+/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
 #[inline(always)]
 pub fn is_char_bidi(c: char) -> bool {
    // Controls:
@ -1266,8 +1291,9 @@ pub fn is_char_bidi(c: char) -> bool {
    // BMP RTL:
    // https://www.unicode.org/roadmaps/bmp/
    // U+0590...U+08FF
-    // U+FB50...U+FDFF Arabic Presentation Forms A
-    // U+FE70...U+FEFF Arabic Presentation Forms B
+    // U+FB1D...U+FDFF Hebrew presentation forms and
+    //                 Arabic Presentation Forms A
+    // U+FE70...U+FEFE Arabic Presentation Forms B (excl. BOM)
    //
    // Supplementary RTL:
    // https://www.unicode.org/roadmaps/smp/
@ -1278,8 +1304,8 @@ pub fn is_char_bidi(c: char) -> bool {
        // Below Hebrew
        return false;
    }
-    if in_range32(code_point, 0x0900, 0xFB50) {
-        // Above Arabic Extended-A and below Arabic Presentation Forms
+    if in_range32(code_point, 0x0900, 0xFB1D) {
+        // Above Arabic Extended-A and below Hebrew presentation forms
        if in_inclusive_range32(code_point, 0x200F, 0x2067) {
            // In the range that contains the RTL controls
            return code_point == 0x200F
@ -1297,8 +1323,8 @@ pub fn is_char_bidi(c: char) -> bool {
        // Between astral RTL blocks
        return false;
    }
-    if in_range32(code_point, 0xFF00, 0x10800) {
-        // Above Arabic Presentations Forms B and below first
+    if in_range32(code_point, 0xFEFF, 0x10800) {
+        // Above Arabic Presentations Forms B (excl. BOM) and below first
        // astral RTL
        return false;
    }
@ -1312,11 +1338,14 @@ pub fn is_char_bidi(c: char) -> bool {
 /// Checks whether a UTF-16 code unit triggers right-to-left processing.
 ///
 /// The check is done on a Unicode block basis without regard to assigned
-/// vs. unassigned code points in the block. Additionally, the four
-/// RIGHT-TO-LEFT FOO controls in General Punctuation are checked for.
-/// Control characters that are technically bidi controls but do not cause
-/// right-to-left behavior without the presence of right-to-left characters
-/// or right-to-left controls are not checked for.
+/// vs. unassigned code points in the block. Hebrew presentation forms in
+/// the Alphabetic Presentation Forms block are treated as if they formed
+/// a block on their own (i.e. it treated as right-to-left). Additionally,
+/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
+/// for. Control characters that are technically bidi controls but do not
+/// cause right-to-left behavior without the presence of right-to-left
+/// characters or right-to-left controls are not checked for. As a special
+/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
 ///
 /// Since supplementary-plane right-to-left blocks are identifiable from the
 /// high surrogate without examining the low surrogate, this function returns
@ -1338,8 +1367,8 @@ pub fn is_utf16_code_unit_bidi(u: u16) -> bool {
        }
        return false;
    }
-    if in_range16(u, 0xD83C, 0xFB50) {
-        // Between astral RTL high surrogates and Arabic Presentation Forms
+    if in_range16(u, 0xD83C, 0xFB1D) {
+        // Between astral RTL high surrogates and Hebrew presentation forms
        // (Emoji is here)
        return false;
    }
@ -1347,8 +1376,8 @@ pub fn is_utf16_code_unit_bidi(u: u16) -> bool {
        // Between RTL high surragates
        return false;
    }
-    if u > 0xFEFF {
-        // Above Arabic Presentation Forms
+    if u > 0xFEFE {
+        // Above Arabic Presentation Forms (excl. BOM)
        return false;
    }
    if in_range16(u, 0xFE00, 0xFE70) {
@ -2370,13 +2399,14 @@ mod tests {
        assert!(!is_char_bidi('\u{1F4A9}'));
        assert!(!is_char_bidi('\u{FE00}'));
        assert!(!is_char_bidi('\u{202C}'));
+        assert!(!is_char_bidi('\u{FEFF}'));
        assert!(is_char_bidi('\u{0590}'));
        assert!(is_char_bidi('\u{08FF}'));
        assert!(is_char_bidi('\u{061C}'));
        assert!(is_char_bidi('\u{FB50}'));
        assert!(is_char_bidi('\u{FDFF}'));
        assert!(is_char_bidi('\u{FE70}'));
-        assert!(is_char_bidi('\u{FEFF}'));
+        assert!(is_char_bidi('\u{FEFE}'));
        assert!(is_char_bidi('\u{200F}'));
        assert!(is_char_bidi('\u{202B}'));
        assert!(is_char_bidi('\u{202E}'));
@ -2395,13 +2425,15 @@ mod tests {
        assert!(!is_utf16_code_unit_bidi(0xD801));
        assert!(!is_utf16_code_unit_bidi(0xFE00));
        assert!(!is_utf16_code_unit_bidi(0x202C));
+        assert!(!is_utf16_code_unit_bidi(0xFEFF));
        assert!(is_utf16_code_unit_bidi(0x0590));
        assert!(is_utf16_code_unit_bidi(0x08FF));
        assert!(is_utf16_code_unit_bidi(0x061C));
+        assert!(is_utf16_code_unit_bidi(0xFB1D));
        assert!(is_utf16_code_unit_bidi(0xFB50));
        assert!(is_utf16_code_unit_bidi(0xFDFF));
        assert!(is_utf16_code_unit_bidi(0xFE70));
-        assert!(is_utf16_code_unit_bidi(0xFEFF));
+        assert!(is_utf16_code_unit_bidi(0xFEFE));
        assert!(is_utf16_code_unit_bidi(0x200F));
        assert!(is_utf16_code_unit_bidi(0x202B));
        assert!(is_utf16_code_unit_bidi(0x202E));
@ -2420,13 +2452,14 @@ mod tests {
        assert!(!is_str_bidi("abcdefghijklmnop\u{1F4A9}abcdefghijklmnop"));
        assert!(!is_str_bidi("abcdefghijklmnop\u{FE00}abcdefghijklmnop"));
        assert!(!is_str_bidi("abcdefghijklmnop\u{202C}abcdefghijklmnop"));
+        assert!(!is_str_bidi("abcdefghijklmnop\u{FEFF}abcdefghijklmnop"));
        assert!(is_str_bidi("abcdefghijklmnop\u{0590}abcdefghijklmnop"));
        assert!(is_str_bidi("abcdefghijklmnop\u{08FF}abcdefghijklmnop"));
        assert!(is_str_bidi("abcdefghijklmnop\u{061C}abcdefghijklmnop"));
        assert!(is_str_bidi("abcdefghijklmnop\u{FB50}abcdefghijklmnop"));
        assert!(is_str_bidi("abcdefghijklmnop\u{FDFF}abcdefghijklmnop"));
        assert!(is_str_bidi("abcdefghijklmnop\u{FE70}abcdefghijklmnop"));
-        assert!(is_str_bidi("abcdefghijklmnop\u{FEFF}abcdefghijklmnop"));
+        assert!(is_str_bidi("abcdefghijklmnop\u{FEFE}abcdefghijklmnop"));
        assert!(is_str_bidi("abcdefghijklmnop\u{200F}abcdefghijklmnop"));
        assert!(is_str_bidi("abcdefghijklmnop\u{202B}abcdefghijklmnop"));
        assert!(is_str_bidi("abcdefghijklmnop\u{202E}abcdefghijklmnop"));
@ -2457,6 +2490,9 @@ mod tests {
        assert!(!is_utf8_bidi(
            "abcdefghijklmnop\u{202C}abcdefghijklmnop".as_bytes()
        ));
+        assert!(!is_utf8_bidi(
+            "abcdefghijklmnop\u{FEFF}abcdefghijklmnop".as_bytes()
+        ));
        assert!(is_utf8_bidi(
            "abcdefghijklmnop\u{0590}abcdefghijklmnop".as_bytes()
        ));
@ -2476,7 +2512,7 @@ mod tests {
            "abcdefghijklmnop\u{FE70}abcdefghijklmnop".as_bytes()
        ));
        assert!(is_utf8_bidi(
-            "abcdefghijklmnop\u{FEFF}abcdefghijklmnop".as_bytes()
+            "abcdefghijklmnop\u{FEFE}abcdefghijklmnop".as_bytes()
        ));
        assert!(is_utf8_bidi(
            "abcdefghijklmnop\u{200F}abcdefghijklmnop".as_bytes()
@ -2530,6 +2566,10 @@ mod tests {
            0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x202C, 0x62, 0x63, 0x64, 0x65, 0x66,
            0x67, 0x68, 0x69,
        ]));
+        assert!(!is_utf16_bidi(&[
+            0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFEFF, 0x62, 0x63, 0x64, 0x65, 0x66,
+            0x67, 0x68, 0x69,
+        ]));
        assert!(is_utf16_bidi(&[
            0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x0590, 0x62, 0x63, 0x64, 0x65, 0x66,
            0x67, 0x68, 0x69,
@ -2542,6 +2582,10 @@ mod tests {
            0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x061C, 0x62, 0x63, 0x64, 0x65, 0x66,
            0x67, 0x68, 0x69,
        ]));
+        assert!(is_utf16_bidi(&[
+            0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFB1D, 0x62, 0x63, 0x64, 0x65, 0x66,
+            0x67, 0x68, 0x69,
+        ]));
        assert!(is_utf16_bidi(&[
            0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFB50, 0x62, 0x63, 0x64, 0x65, 0x66,
            0x67, 0x68, 0x69,
@ -2555,7 +2599,7 @@ mod tests {
            0x67, 0x68, 0x69,
        ]));
        assert!(is_utf16_bidi(&[
-            0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFEFF, 0x62, 0x63, 0x64, 0x65, 0x66,
+            0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFEFE, 0x62, 0x63, 0x64, 0x65, 0x66,
            0x67, 0x68, 0x69,
        ]));
        assert!(is_utf16_bidi(&[
@ -2623,6 +2667,10 @@ mod tests {
            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{202C}abcdefghijklmnop"),
            Latin1Bidi::Bidi
        );
+        assert_ne!(
+            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{FEFF}abcdefghijklmnop"),
+            Latin1Bidi::Bidi
+        );
        assert_eq!(
            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{0590}abcdefghijklmnop"),
            Latin1Bidi::Bidi
@ -2648,7 +2696,7 @@ mod tests {
            Latin1Bidi::Bidi
        );
        assert_eq!(
-            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{FEFF}abcdefghijklmnop"),
+            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{FEFE}abcdefghijklmnop"),
            Latin1Bidi::Bidi
        );
        assert_eq!(
@ -2711,6 +2759,10 @@ mod tests {
            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{202C}abcdefghijklmnop".as_bytes()),
            Latin1Bidi::Bidi
        );
+        assert_ne!(
+            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{FEFF}abcdefghijklmnop".as_bytes()),
+            Latin1Bidi::Bidi
+        );
        assert_eq!(
            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{0590}abcdefghijklmnop".as_bytes()),
            Latin1Bidi::Bidi
@ -2736,7 +2788,7 @@ mod tests {
            Latin1Bidi::Bidi
        );
        assert_eq!(
-            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{FEFF}abcdefghijklmnop".as_bytes()),
+            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{FEFE}abcdefghijklmnop".as_bytes()),
            Latin1Bidi::Bidi
        );
        assert_eq!(
@ -2817,6 +2869,13 @@ mod tests {
            ]),
            Latin1Bidi::Bidi
        );
+        assert_ne!(
+            check_utf16_for_latin1_and_bidi(&[
+                0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFEFF, 0x62, 0x63, 0x64, 0x65,
+                0x66, 0x67, 0x68, 0x69,
+            ]),
+            Latin1Bidi::Bidi
+        );
        assert_eq!(
            check_utf16_for_latin1_and_bidi(&[
                0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x0590, 0x62, 0x63, 0x64, 0x65,
@ -2838,6 +2897,13 @@ mod tests {
            ]),
            Latin1Bidi::Bidi
        );
+        assert_eq!(
+            check_utf16_for_latin1_and_bidi(&[
+                0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFB1D, 0x62, 0x63, 0x64, 0x65,
+                0x66, 0x67, 0x68, 0x69,
+            ]),
+            Latin1Bidi::Bidi
+        );
        assert_eq!(
            check_utf16_for_latin1_and_bidi(&[
                0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFB50, 0x62, 0x63, 0x64, 0x65,
@ -2861,7 +2927,7 @@ mod tests {
        );
        assert_eq!(
            check_utf16_for_latin1_and_bidi(&[
-                0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFEFF, 0x62, 0x63, 0x64, 0x65,
+                0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFEFE, 0x62, 0x63, 0x64, 0x65,
                0x66, 0x67, 0x68, 0x69,
            ]),
            Latin1Bidi::Bidi
@ -2936,8 +3002,8 @@ mod tests {
    pub fn reference_is_char_bidi(c: char) -> bool {
        match c {
            '\u{0590}'...'\u{08FF}'
-            | '\u{FB50}'...'\u{FDFF}'
-            | '\u{FE70}'...'\u{FEFF}'
+            | '\u{FB1D}'...'\u{FDFF}'
+            | '\u{FE70}'...'\u{FEFE}'
            | '\u{10800}'...'\u{10FFF}'
            | '\u{1E800}'...'\u{1EFFF}'
            | '\u{200F}'
@ -2952,8 +3018,8 @@ mod tests {
    pub fn reference_is_utf16_code_unit_bidi(u: u16) -> bool {
        match u {
            0x0590...0x08FF
-            | 0xFB50...0xFDFF
-            | 0xFE70...0xFEFF
+            | 0xFB1D...0xFDFF
+            | 0xFE70...0xFEFE
            | 0xD802
            | 0xD803
            | 0xD83A
@ -3049,6 +3115,19 @@ mod tests {
        }
    }

+    #[test]
+    fn test_is_utf16_bidi_thoroughly() {
+        let mut buf = [0; 32];
+        for i in 0..0x10000u32 {
+            let u = i as u16;
+            buf[15] = u;
+            assert_eq!(
+                is_utf16_bidi(&buf[..]),
+                reference_is_utf16_code_unit_bidi(u)
+            );
+        }
+    }
+
    #[test]
    fn test_is_utf8_bidi_edge_cases() {
        assert!(!is_utf8_bidi(b"\xD5\xBF\x61"));
--- a/third_party/rust/encoding_rs/src/simd_funcs.rs
+++ b/third_party/rust/encoding_rs/src/simd_funcs.rs
@ -278,8 +278,8 @@ pub fn is_u16x8_bidi(s: u16x8) -> bool {
    // Quick refutation failed. Let's do the full check.

    (in_range16x8!(s, 0x0590, 0x0900)
-        | in_range16x8!(s, 0xFB50, 0xFE00)
-        | in_range16x8!(s, 0xFE70, 0xFF00)
+        | in_range16x8!(s, 0xFB1D, 0xFE00)
+        | in_range16x8!(s, 0xFE70, 0xFEFF)
        | in_range16x8!(s, 0xD802, 0xD804)
        | in_range16x8!(s, 0xD83A, 0xD83C)
        | s.eq(u16x8::splat(0x200F))
--- a/third_party/rust/encoding_rs/src/utf_16.rs
+++ b/third_party/rust/encoding_rs/src/utf_16.rs
@ -401,4 +401,58 @@ mod tests {
            assert_eq!(output[0], 0xFFFD);
        }
    }
+
+    #[test]
+    fn test_utf_16le_decode_near_end() {
+        let mut output = [0u8; 4];
+        let mut decoder = UTF_16LE.new_decoder();
+        {
+            let (result, read, written, had_errors) =
+                decoder.decode_to_utf8(&[0x03], &mut output[..], false);
+            assert_eq!(result, CoderResult::InputEmpty);
+            assert_eq!(read, 1);
+            assert_eq!(written, 0);
+            assert!(!had_errors);
+            assert_eq!(output[0], 0x0);
+        }
+        {
+            let (result, read, written, had_errors) =
+                decoder.decode_to_utf8(&[0x26, 0x03, 0x26], &mut output[..], false);
+            assert_eq!(result, CoderResult::OutputFull);
+            assert_eq!(read, 1);
+            assert_eq!(written, 3);
+            assert!(!had_errors);
+            assert_eq!(output[0], 0xE2);
+            assert_eq!(output[1], 0x98);
+            assert_eq!(output[2], 0x83);
+            assert_eq!(output[3], 0x00);
+        }
+    }
+
+    #[test]
+    fn test_utf_16be_decode_near_end() {
+        let mut output = [0u8; 4];
+        let mut decoder = UTF_16BE.new_decoder();
+        {
+            let (result, read, written, had_errors) =
+                decoder.decode_to_utf8(&[0x26], &mut output[..], false);
+            assert_eq!(result, CoderResult::InputEmpty);
+            assert_eq!(read, 1);
+            assert_eq!(written, 0);
+            assert!(!had_errors);
+            assert_eq!(output[0], 0x0);
+        }
+        {
+            let (result, read, written, had_errors) =
+                decoder.decode_to_utf8(&[0x03, 0x26, 0x03], &mut output[..], false);
+            assert_eq!(result, CoderResult::OutputFull);
+            assert_eq!(read, 1);
+            assert_eq!(written, 3);
+            assert!(!had_errors);
+            assert_eq!(output[0], 0xE2);
+            assert_eq!(output[1], 0x98);
+            assert_eq!(output[2], 0x83);
+            assert_eq!(output[3], 0x00);
+        }
+    }
 }