Bug 1773399 - Update idna to 0.2.3. r=emilio,supply-chain-reviewers

Differential Revision: https://phabricator.services.mozilla.com/D148737
2022-06-14 20:32:28 +00:00 · 2022-06-14 20:32:28 +00:00 · 527752ca63
--- a/Cargo.lock
+++ b/Cargo.lock
@ -2621,9 +2621,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"

 [[package]]
 name = "idna"
-version = "0.2.1"
+version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de910d521f7cc3135c4de8db1cb910e0b5ed1dc6f57c381cd07e8e661ce10094"
+checksum = "418a0a6fab821475f634efe3ccc45c013f742efe03d853e8d3355d5cb850ecf8"
 dependencies = [
 "matches",
 "unicode-bidi",
@ -5279,6 +5279,13 @@ version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "29738eedb4388d9ea620eeab9384884fc3f06f586a2eddb56bedc5885126c7c1"

+[[package]]
+name = "tinyvec"
+version = "1.999.999"
+dependencies = [
+ "smallvec",
+]
+
 [[package]]
 name = "to_shmem"
 version = "0.0.1"
@ -5636,9 +5643,12 @@ checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee"

 [[package]]
 name = "unicode-normalization"
-version = "0.1.7"
+version = "0.1.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a0180bc61fc5a987082bfa111f4cc95c4caff7f9799f3e46df09163a937aa25"
+checksum = "d54590932941a9e9266f0832deed84ebe1bf2e4c9e4a3554d393d18f5e854bf9"
+dependencies = [
+ "tinyvec",
+]

 [[package]]
 name = "unicode-segmentation"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -132,6 +132,9 @@ nom = { path = "build/rust/nom" }
 # dependencies on windows-sys.
 parking_lot = { path = "build/rust/parking_lot" }

+# Override tinyvec with smallvec
+tinyvec = { path = "build/rust/tinyvec" }
+
 # Patch autocfg to hide rustc output. Workaround for https://github.com/cuviper/autocfg/issues/30
 autocfg = { path = "third_party/rust/autocfg" }

--- a/build/rust/tinyvec/Cargo.toml
+++ b/build/rust/tinyvec/Cargo.toml
@ -0,0 +1,16 @@
+[package]
+name = "tinyvec"
+version = "1.999.999"
+edition = "2018"
+license = "MPL-2.0"
+
+[lib]
+path = "lib.rs"
+
+[dependencies]
+smallvec = "1"
+
+[features]
+alloc = []
+default = []
+std = ["alloc"]
--- a/build/rust/tinyvec/lib.rs
+++ b/build/rust/tinyvec/lib.rs
@ -0,0 +1,6 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+pub use smallvec::SmallVec as ArrayVec;
+pub use smallvec::SmallVec as TinyVec;
--- a/supply-chain/config.toml
+++ b/supply-chain/config.toml
@ -768,7 +768,7 @@ version = "1.0.1"
 criteria = "safe-to-deploy"

 [[unaudited.idna]]
-version = "0.2.1"
+version = "0.2.3"
 criteria = "safe-to-deploy"

 [[unaudited.indexmap]]
@ -1552,7 +1552,7 @@ version = "1.0.0"
 criteria = "safe-to-deploy"

 [[unaudited.unicode-normalization]]
-version = "0.1.7"
+version = "0.1.19"
 criteria = "safe-to-deploy"

 [[unaudited.unicode-segmentation]]
--- a/third_party/rust/idna/.cargo-checksum.json
+++ b/third_party/rust/idna/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"Cargo.toml":"6f1fd46d4d9575d5a7f46873cb40a93e973e9fb8f574b28a1b21b596df618a89","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"20c7855c364d57ea4c97889a5e8d98470a9952dade37bd9248b9a54431670e5e","benches/all.rs":"e734b9c9092ed66986725f86cfe90f3756cfddb058af308b796ba494f9beefc2","src/IdnaMappingTable.txt":"87d6553a4b86bc49dcade38bf26b745cd81800eb8af295dc3fb99b4729eaea38","src/lib.rs":"d61b2bfcf4265b9a41eedd1de33ab49ea615e3c06df944321b30c57950a85342","src/make_uts46_mapping_table.py":"d420883d17b44c42109317ffaf1c273e611864eaeb1c5f1b9d93634a5d586835","src/punycode.rs":"dceeb0467197f892d2c777711b3c6647238f52f3976dfca5a8f8957500fd3599","src/uts46.rs":"49aaae3c5a9503bc7ef59b1a2e76ba158154132515e7c85ab670130ed5da318f","src/uts46_mapping_table.rs":"90c4180dd865b919bf1b2f13459c9c5b9de0cbbdff6584f742a7ecc0c14d3cdd","tests/IdnaTestV2.txt":"c6f3778b0545fd150c8063286c7f5adc901e16557eddccc3751213646d07593d","tests/punycode.rs":"8efdaae0902a8ffe483ae69236c9d0a38979cfd2430e69b87f33975e6946d577","tests/punycode_tests.json":"3d4ac0cf25984c37b9ce197f5df680a0136f728fb8ec82bc76624e42139eb3a8","tests/tests.rs":"de7425a3e4e6e871255721107803704d1431246601fa9c87105224d88dfe60d6","tests/unit.rs":"9600ec4f67ae44e8457fb64a9872d190a1a4d807e32d9688c8fa3ef9135c7a5d","tests/uts46.rs":"ca91d48811d366fb9e32d7aa79cfda1261b93c271b6ed7fb5535de9a2500205b"},"package":"de910d521f7cc3135c4de8db1cb910e0b5ed1dc6f57c381cd07e8e661ce10094"}
+{"files":{"Cargo.toml":"fa141dcb135262e5fda9f680671699045326d96779bb1acf38d48c70c712bcdf","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"20c7855c364d57ea4c97889a5e8d98470a9952dade37bd9248b9a54431670e5e","benches/all.rs":"e734b9c9092ed66986725f86cfe90f3756cfddb058af308b796ba494f9beefc2","src/IdnaMappingTable.txt":"87d6553a4b86bc49dcade38bf26b745cd81800eb8af295dc3fb99b4729eaea38","src/lib.rs":"d61b2bfcf4265b9a41eedd1de33ab49ea615e3c06df944321b30c57950a85342","src/make_uts46_mapping_table.py":"917055fa841f813de2bcf79cc79b595da3d5551559ee768db8660ab77cb26c34","src/punycode.rs":"07edf5293bc384a164eebb01bc18fe3d4b2d009b4565a36b74a3030978ea6e04","src/uts46.rs":"40521a01e5b8c38667252d5b1e0141c5a71f63aeae2f451b986792984e633b09","src/uts46_mapping_table.rs":"942fff78147c61da942f5f3a7ff4e90f9d7a00a29285733ac3fc3357eb2ed06f","tests/IdnaTestV2.txt":"c6f3778b0545fd150c8063286c7f5adc901e16557eddccc3751213646d07593d","tests/punycode.rs":"e6fb978f48445d1525a6b97351c41c5393a1612a35f85b9a7f45b8794fce9aba","tests/punycode_tests.json":"3d4ac0cf25984c37b9ce197f5df680a0136f728fb8ec82bc76624e42139eb3a8","tests/tests.rs":"de7425a3e4e6e871255721107803704d1431246601fa9c87105224d88dfe60d6","tests/unit.rs":"be025a7d9bab3bd1ce134c87f9d848269e157b31ca5ba0ea03426c1ac736b69e","tests/uts46.rs":"ca91d48811d366fb9e32d7aa79cfda1261b93c271b6ed7fb5535de9a2500205b"},"package":"418a0a6fab821475f634efe3ccc45c013f742efe03d853e8d3355d5cb850ecf8"}
--- a/third_party/rust/idna/Cargo.toml
+++ b/third_party/rust/idna/Cargo.toml
@ -13,7 +13,7 @@
 [package]
 edition = "2018"
 name = "idna"
-version = "0.2.1"
+version = "0.2.3"
 authors = ["The rust-url developers"]
 autotests = false
 description = "IDNA (Internationalizing Domain Names in Applications) and Punycode."
@ -40,7 +40,7 @@ version = "0.1"
 version = "0.3"

 [dependencies.unicode-normalization]
-version = "0.1.5"
+version = "0.1.17"
 [dev-dependencies.assert_matches]
 version = "1.3"

--- a/third_party/rust/idna/src/make_uts46_mapping_table.py
+++ b/third_party/rust/idna/src/make_uts46_mapping_table.py
@ -78,6 +78,12 @@ for line in txt:
            unicode_str = u''.join(char(c) for c in fields[2].strip().split(' '))
        elif mapping == "Deviation":
            unicode_str = u''
+
+    if len(fields) > 3:
+        assert fields[3].strip() in ('NV8', 'XV8'), fields[3]
+        assert mapping == 'Valid', mapping
+        mapping = 'DisallowedIdna2008'
+
    ranges.append((first, last, mapping, unicode_str))

 def mergeable_key(r):
@ -86,7 +92,7 @@ def mergeable_key(r):
    # These types have associated data, so we should not merge them.
    if mapping in ('Mapped', 'Deviation', 'DisallowedStd3Mapped'):
        return r
-    assert mapping in ('Valid', 'Ignored', 'Disallowed', 'DisallowedStd3Valid')
+    assert mapping in ('Valid', 'Ignored', 'Disallowed', 'DisallowedStd3Valid', 'DisallowedIdna2008')
    return mapping

 grouped_ranges = itertools.groupby(ranges, key=mergeable_key)
@ -116,11 +122,7 @@ for (k, g) in grouped_ranges:
        # Assert we're seeing the surrogate case here.
        assert last_char == 0xd7ff
        assert next_char == 0xe000
-    first = group[0][0]
-    last = group[-1][1]
-    mapping = group[0][2]
-    unicode_str = group[0][3]
-    optimized_ranges.append((first, last, mapping, unicode_str))
+    optimized_ranges.append((group[0][0], group[-1][1]) + group[0][2:])

 def is_single_char_range(r):
    (first, last, _, _) = r
@ -148,30 +150,22 @@ def merge_single_char_ranges(ranges):

 optimized_ranges = list(merge_single_char_ranges(optimized_ranges))

-
-print("static TABLE: &[Range] = &[")
-
-for ranges in optimized_ranges:
-    first = ranges[0][0]
-    last = ranges[-1][1]
-    print("    Range { from: '%s', to: '%s', }," % (escape_char(char(first)),
-                                                            escape_char(char(last))))
-
-print("];\n")
-
-print("static INDEX_TABLE: &[u16] = &[")
-
 SINGLE_MARKER = 1 << 15

+print("static TABLE: &[(char, u16)] = &[")
+
 offset = 0
 for ranges in optimized_ranges:
    assert offset < SINGLE_MARKER

    block_len = len(ranges)
    single = SINGLE_MARKER if block_len == 1 else 0
-    print("    %s," % (offset | single))
+    index = offset | single
    offset += block_len

+    start = escape_char(char(ranges[0][0]))
+    print("    ('%s', %s)," % (start, index))
+
 print("];\n")

 print("static MAPPING_TABLE: &[Mapping] = &[")
--- a/third_party/rust/idna/src/punycode.rs
+++ b/third_party/rust/idna/src/punycode.rs
@ -78,6 +78,10 @@ impl Decoder {
            ),
        };

+        if !base.is_ascii() {
+            return Err(());
+        }
+
        let base_len = base.len();
        let mut length = base_len as u32;
        let mut code_point = INITIAL_N;
--- a/third_party/rust/idna/src/uts46.rs
+++ b/third_party/rust/idna/src/uts46.rs
@ -11,7 +11,6 @@

 use self::Mapping::*;
 use crate::punycode;
-use std::cmp::Ordering::{Equal, Greater, Less};
 use std::{error::Error as StdError, fmt};
 use unicode_bidi::{bidi_class, BidiClass};
 use unicode_normalization::char::is_combining_mark;
@ -48,38 +47,26 @@ enum Mapping {
    Disallowed,
    DisallowedStd3Valid,
    DisallowedStd3Mapped(StringTableSlice),
-}
-
-struct Range {
-    from: char,
-    to: char,
+    DisallowedIdna2008,
 }

 fn find_char(codepoint: char) -> &'static Mapping {
-    let r = TABLE.binary_search_by(|ref range| {
-        if codepoint > range.to {
-            Less
-        } else if codepoint < range.from {
-            Greater
-        } else {
-            Equal
-        }
-    });
-    r.ok()
-        .map(|i| {
-            const SINGLE_MARKER: u16 = 1 << 15;
+    let idx = match TABLE.binary_search_by_key(&codepoint, |&val| val.0) {
+        Ok(idx) => idx,
+        Err(idx) => idx - 1,
+    };

-            let x = INDEX_TABLE[i];
-            let single = (x & SINGLE_MARKER) != 0;
-            let offset = !SINGLE_MARKER & x;
+    const SINGLE_MARKER: u16 = 1 << 15;

-            if single {
-                &MAPPING_TABLE[offset as usize]
-            } else {
-                &MAPPING_TABLE[(offset + (codepoint as u16 - TABLE[i].from as u16)) as usize]
-            }
-        })
-        .unwrap()
+    let (base, x) = TABLE[idx];
+    let single = (x & SINGLE_MARKER) != 0;
+    let offset = !SINGLE_MARKER & x;
+
+    if single {
+        &MAPPING_TABLE[offset as usize]
+    } else {
+        &MAPPING_TABLE[(offset + (codepoint as u16 - base as u16)) as usize]
+    }
 }

 struct Mapper<'a> {
@ -140,6 +127,12 @@ impl<'a> Iterator for Mapper<'a> {
                    self.slice = Some(decode_slice(slice).chars());
                    continue;
                }
+                Mapping::DisallowedIdna2008 => {
+                    if self.config.use_idna_2008_rules {
+                        self.errors.disallowed_in_idna_2008 = true;
+                    }
+                    codepoint
+                }
            });
        }
    }
@ -310,13 +303,12 @@ fn check_validity(label: &str, config: Config, errors: &mut Errors) {

    // V6: Check against Mapping Table
    if label.chars().any(|c| match *find_char(c) {
-        Mapping::Valid => false,
+        Mapping::Valid | Mapping::DisallowedIdna2008 => false,
        Mapping::Deviation(_) => config.transitional_processing,
        Mapping::DisallowedStd3Valid => config.use_std3_ascii_rules,
        _ => true,
    }) {
        errors.invalid_mapping = true;
-        return;
    }

    // V7: ContextJ rules
@ -510,6 +502,7 @@ pub struct Config {
    transitional_processing: bool,
    verify_dns_length: bool,
    check_hyphens: bool,
+    use_idna_2008_rules: bool,
 }

 /// The defaults are that of https://url.spec.whatwg.org/#idna
@ -524,6 +517,7 @@ impl Default for Config {

            // Only use for to_ascii, not to_unicode
            verify_dns_length: false,
+            use_idna_2008_rules: false,
        }
    }
 }
@ -553,6 +547,12 @@ impl Config {
        self
    }

+    #[inline]
+    pub fn use_idna_2008_rules(mut self, value: bool) -> Self {
+        self.use_idna_2008_rules = value;
+        self
+    }
+
    /// http://www.unicode.org/reports/tr46/#ToASCII
    pub fn to_ascii(self, domain: &str) -> Result<String, Errors> {
        let mut result = String::new();
@ -599,6 +599,7 @@ pub struct Errors {
    disallowed_character: bool,
    too_long_for_dns: bool,
    too_short_for_dns: bool,
+    disallowed_in_idna_2008: bool,
 }

 impl Errors {
@ -615,6 +616,7 @@ impl Errors {
            disallowed_character,
            too_long_for_dns,
            too_short_for_dns,
+            disallowed_in_idna_2008,
        } = *self;
        punycode
            || check_hyphens
@ -627,6 +629,7 @@ impl Errors {
            || disallowed_character
            || too_long_for_dns
            || too_short_for_dns
+            || disallowed_in_idna_2008
    }
 }

@ -644,6 +647,7 @@ impl fmt::Debug for Errors {
            disallowed_character,
            too_long_for_dns,
            too_short_for_dns,
+            disallowed_in_idna_2008,
        } = *self;

        let fields = [
@ -661,6 +665,7 @@ impl fmt::Debug for Errors {
            ("disallowed_character", disallowed_character),
            ("too_long_for_dns", too_long_for_dns),
            ("too_short_for_dns", too_short_for_dns),
+            ("disallowed_in_idna_2008", disallowed_in_idna_2008),
        ];

        let mut empty = true;
--- a/third_party/rust/idna/src/uts46_mapping_table.rs
+++ b/third_party/rust/idna/src/uts46_mapping_table.rs
--- a/third_party/rust/idna/tests/punycode.rs
+++ b/third_party/rust/idna/tests/punycode.rs
@ -19,10 +19,10 @@ fn one_test(decoded: &str, encoded: &str) {
            let result = result.into_iter().collect::<String>();
            assert!(
                result == decoded,
-                format!(
-                    "Incorrect decoding of \"{}\":\n   \"{}\"\n!= \"{}\"\n",
-                    encoded, result, decoded
-                )
+                "Incorrect decoding of \"{}\":\n   \"{}\"\n!= \"{}\"\n",
+                encoded,
+                result,
+                decoded
            )
        }
    }
@ -31,10 +31,10 @@ fn one_test(decoded: &str, encoded: &str) {
        None => panic!("Encoding {} failed.", decoded),
        Some(result) => assert!(
            result == encoded,
-            format!(
-                "Incorrect encoding of \"{}\":\n   \"{}\"\n!= \"{}\"\n",
-                decoded, result, encoded
-            )
+            "Incorrect encoding of \"{}\":\n   \"{}\"\n!= \"{}\"\n",
+            decoded,
+            result,
+            encoded
        ),
    }
 }
--- a/third_party/rust/idna/tests/unit.rs
+++ b/third_party/rust/idna/tests/unit.rs
@ -114,3 +114,26 @@ fn test_v8_bidi_rules() {
    // Bidi chars may be punycode-encoded
    assert!(config.to_ascii("xn--0ca24w").is_err());
 }
+
+#[test]
+fn emoji_domains() {
+    // HOT BEVERAGE is allowed here...
+    let config = idna::Config::default()
+        .verify_dns_length(true)
+        .use_std3_ascii_rules(true);
+    assert_eq!(config.to_ascii("☕.com").unwrap(), "xn--53h.com");
+
+    // ... but not here
+    let config = idna::Config::default()
+        .verify_dns_length(true)
+        .use_std3_ascii_rules(true)
+        .use_idna_2008_rules(true);
+    let error = format!("{:?}", config.to_ascii("☕.com").unwrap_err());
+    assert!(error.contains("disallowed_in_idna_2008"));
+}
+
+#[test]
+fn unicode_before_delimiter() {
+    let config = idna::Config::default();
+    assert!(config.to_ascii("xn--f\u{34a}-PTP").is_err());
+}
--- a/third_party/rust/unicode-normalization/.cargo-checksum.json
+++ b/third_party/rust/unicode-normalization/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"COPYRIGHT":"23860c2a7b5d96b21569afedf033469bab9fe14a1b24a35068b8641c578ce24d","Cargo.toml":"de7af66ede1e1b369adcdf82174fd97782a26cf11d66deb2bdb518741675e15a","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"7b63ecd5f1902af1b63729947373683c32745c16a10e8e6292e2e2dcd7e90ae0","README.md":"4e01af0960f3a9abb8a06b64bc903d730a9b285098ec9a1af9bceb135d08a660","benches/bench.rs":"eb8b04b99ac55f8583893ff23385194002472e9b5182e3c74636b989caa163db","scripts/unicode.py":"06e074696ea85b24a82bcad360b7ec765e4bd9ebc574e722689ea2434e8a0548","src/decompose.rs":"7cf48297bfeace89e43e7a0402ca05f4d508c732bf7befddf63ad1e95d14c8c4","src/lib.rs":"5cb3d00fffe5c3cb8f0f1cb4317894946c93247e08f7c612275bfd8948db7a02","src/normalization_tests.rs":"de293b9aa396b1b4235b7bfb460e216e2dc874f4ee58bbf54458173e22363cb1","src/normalize.rs":"82f1a3511432349799b42a360ef4a993a4df7e492d88fdc918adf317317c0ed6","src/quick_check.rs":"73335b915e483604c7d10491bc925fda1bbd29e32ce5dd7529cbe4982034780a","src/recompose.rs":"bf04c41bbcfce4717944f1974b87b97619ba66ca7ebec86745dd53493564e170","src/stream_safe.rs":"18f48fbb6afaa6d75289fe1c473bf9e610e76b3119acf7358b1b12d77b0a85fa","src/tables.rs":"c9c0a7cbdd27c11eb444de215153ba02e08cb9cd485c09855005bf23d30f8502","src/test.rs":"5b51a97954f053c251181277faf7ca8ab8f1a7167104f535fbfad97568442571"},"package":"6a0180bc61fc5a987082bfa111f4cc95c4caff7f9799f3e46df09163a937aa25"}
+{"files":{"COPYRIGHT":"23860c2a7b5d96b21569afedf033469bab9fe14a1b24a35068b8641c578ce24d","Cargo.toml":"34370ae727c107ec51fd6809e01ff76220a1bcc2b849b8d277bf9c7bf1875abd","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"7b63ecd5f1902af1b63729947373683c32745c16a10e8e6292e2e2dcd7e90ae0","README.md":"80e4415e2f0941aac11b7e5c1db946d00139db2f1a67774fcd0c0bfde52217fe","benches/bench.rs":"827e5343b059a732904be29717c2797203bfd0a633edf08042afea65372a3e2c","scripts/unicode.py":"c00cb48507e4564a2dcf17a95a5fb1206830f748a8444d296f95b5d2dd09b72c","src/__test_api.rs":"78e21bfa0b98894f545c8ed3e31cec20d7a48951a7f3ed69a6130c4b3d463aee","src/decompose.rs":"c0eb774843a545356e63bbcd7fb926f80d3c97ef4601ca3701fc34154f2e9905","src/lib.rs":"3eaa16b8b4d2d8e15d38b56760fb432ec7665e22360fd4c587c9b724486ba90e","src/lookups.rs":"ca7022bf19a82108df1f5bd78c7fc30806f931d932a65538be818caaa5f7049d","src/no_std_prelude.rs":"602e81e67b8952b6571826f431e3b6787be3073bc10f38a0d3374278f81a6a1f","src/normalize.rs":"de2670b4437d335d42884af844a750f70e541467ecd34077dfe032103cb9b041","src/perfect_hash.rs":"400c84e2f467f61bd55d55d08672da6a9ad7a57c938ce5d0c701a6994b1b273b","src/quick_check.rs":"9756312d75fc31b67fca954e44a4812945a7e436b03ba18b9a2441f6de570f6f","src/recompose.rs":"a6228ad7561a5c7a1ef1d510159bdde1eea8a161007c80e470432e9b844d5536","src/replace.rs":"b24c904f3e00851a78820e30ddfa4ff10c795f8925fd0ee7f5870f31fdfa770b","src/stream_safe.rs":"383d71f0da401af8e735877e43855c7e16cb06deb2263539cdec2a407dbe257d","src/tables.rs":"d24cf5a2a6d5059543b39eec6806c93fa8c314b52b251ddd354affcf91ef7f0b","src/test.rs":"0def2cb0a013fba29938262b3cd3533fbb10eacaf6bcd82eef1f91759fe0a2eb"},"package":"d54590932941a9e9266f0832deed84ebe1bf2e4c9e4a3554d393d18f5e854bf9"}
--- a/third_party/rust/unicode-normalization/Cargo.toml
+++ b/third_party/rust/unicode-normalization/Cargo.toml
@ -3,7 +3,7 @@
 # When uploading crates to the registry Cargo will automatically
 # "normalize" Cargo.toml files for maximal compatibility
 # with all versions of Cargo and also rewrite `path` dependencies
-# to registry (e.g. crates.io) dependencies
+# to registry (e.g., crates.io) dependencies
 #
 # If you believe there's an error in this file please file an
 # issue against the rust-lang/cargo repository. If you're
@ -11,14 +11,22 @@
 # will likely look very different (and much more reasonable)

 [package]
+edition = "2018"
 name = "unicode-normalization"
-version = "0.1.7"
-authors = ["kwantam <kwantam@gmail.com>"]
-exclude = ["target/*", "Cargo.lock", "scripts/tmp", "*.txt"]
+version = "0.1.19"
+authors = ["kwantam <kwantam@gmail.com>", "Manish Goregaokar <manishsmail@gmail.com>"]
+exclude = ["target/*", "Cargo.lock", "scripts/tmp", "*.txt", "tests/*"]
 description = "This crate provides functions for normalization of\nUnicode strings, including Canonical and Compatible\nDecomposition and Recomposition, as described in\nUnicode Standard Annex #15.\n"
 homepage = "https://github.com/unicode-rs/unicode-normalization"
-documentation = "https://unicode-rs.github.io/unicode-normalization"
+documentation = "https://docs.rs/unicode-normalization/"
 readme = "README.md"
 keywords = ["text", "unicode", "normalization", "decomposition", "recomposition"]
 license = "MIT/Apache-2.0"
 repository = "https://github.com/unicode-rs/unicode-normalization"
+[dependencies.tinyvec]
+version = "1"
+features = ["alloc"]
+
+[features]
+default = ["std"]
+std = []
--- a/third_party/rust/unicode-normalization/README.md
+++ b/third_party/rust/unicode-normalization/README.md
@ -1,10 +1,13 @@
+# unicode-normalization
+
+[![Build Status](https://travis-ci.org/unicode-rs/unicode-normalization.svg)](https://travis-ci.org/unicode-rs/unicode-normalization)
+[![Docs](https://docs.rs/unicode-normalization/badge.svg)](https://docs.rs/unicode-normalization/)
+
 Unicode character composition and decomposition utilities
 as described in
 [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/).

-[![Build Status](https://travis-ci.org/unicode-rs/unicode-normalization.svg)](https://travis-ci.org/unicode-rs/unicode-normalization)
-
-[Documentation](https://unicode-rs.github.io/unicode-normalization/unicode_normalization/index.html)
+This crate requires Rust 1.36+.

 ```rust
 extern crate unicode_normalization;
@ -21,12 +24,16 @@ fn main() {
 }
 ```

-# crates.io
+## crates.io

 You can use this package in your project by adding the following
 to your `Cargo.toml`:

 ```toml
 [dependencies]
-unicode-normalization = "0.1.7"
+unicode-normalization = "0.1.19"
 ```
+
+## `no_std` + `alloc` support
+
+This crate is completely `no_std` + `alloc` compatible. This can be enabled by disabling the `std` feature, i.e. specifying `default-features = false` for this crate on your `Cargo.toml`.
--- a/third_party/rust/unicode-normalization/benches/bench.rs
+++ b/third_party/rust/unicode-normalization/benches/bench.rs
@ -1,8 +1,9 @@
 #![feature(test)]
-#![feature(iterator_step_by)]
-extern crate unicode_normalization;
-extern crate test;

+extern crate test;
+extern crate unicode_normalization;
+
+use std::fs;
 use test::Bencher;
 use unicode_normalization::UnicodeNormalization;

@ -80,6 +81,40 @@ fn bench_nfd_ascii(b: &mut Bencher) {
    b.iter(|| ASCII.nfd().count());
 }

+#[bench]
+fn bench_nfc_long(b: &mut Bencher) {
+    let long = fs::read_to_string("benches/long.txt").unwrap();
+    b.iter(|| long.nfc().count());
+}
+
+#[bench]
+fn bench_nfd_long(b: &mut Bencher) {
+    let long = fs::read_to_string("benches/long.txt").unwrap();
+    b.iter(|| long.nfd().count());
+}
+
+#[bench]
+fn bench_nfkc_ascii(b: &mut Bencher) {
+    b.iter(|| ASCII.nfkc().count());
+}
+
+#[bench]
+fn bench_nfkd_ascii(b: &mut Bencher) {
+    b.iter(|| ASCII.nfkd().count());
+}
+
+#[bench]
+fn bench_nfkc_long(b: &mut Bencher) {
+    let long = fs::read_to_string("benches/long.txt").unwrap();
+    b.iter(|| long.nfkc().count());
+}
+
+#[bench]
+fn bench_nfkd_long(b: &mut Bencher) {
+    let long = fs::read_to_string("benches/long.txt").unwrap();
+    b.iter(|| long.nfkd().count());
+}
+
 #[bench]
 fn bench_streamsafe_ascii(b: &mut Bencher) {
    b.iter(|| ASCII.stream_safe().count());
--- a/third_party/rust/unicode-normalization/scripts/unicode.py
+++ b/third_party/rust/unicode-normalization/scripts/unicode.py
@ -14,13 +14,14 @@
 # - DerivedNormalizationProps.txt
 # - NormalizationTest.txt
 # - UnicodeData.txt
+# - StandardizedVariants.txt
 #
 # Since this should not require frequent updates, we just store this
-# out-of-line and check the unicode.rs file into git.
+# out-of-line and check the tables.rs and normalization_tests.rs files into git.
 import collections
-import requests
+import urllib.request

-UNICODE_VERSION = "9.0.0"
+UNICODE_VERSION = "13.0.0"
 UCD_URL = "https://www.unicode.org/Public/%s/ucd/" % UNICODE_VERSION

 PREAMBLE = """// Copyright 2012-2018 The Rust Project Developers. See the COPYRIGHT
@ -57,6 +58,11 @@ expanded_categories = {
    'Cc': ['C'], 'Cf': ['C'], 'Cs': ['C'], 'Co': ['C'], 'Cn': ['C'],
 }

+# Constants from Unicode 9.0.0 Section 3.12 Conjoining Jamo Behavior
+# http://www.unicode.org/versions/Unicode9.0.0/ch03.pdf#M9.32468.Heading.310.Combining.Jamo.Behavior
+S_BASE, L_COUNT, V_COUNT, T_COUNT = 0xAC00, 19, 21, 28
+S_COUNT = L_COUNT * V_COUNT * T_COUNT
+
 class UnicodeData(object):
    def __init__(self):
        self._load_unicode_data()
@ -66,35 +72,48 @@ class UnicodeData(object):
        self.canon_comp = self._compute_canonical_comp()
        self.canon_fully_decomp, self.compat_fully_decomp = self._compute_fully_decomposed()

+        self.cjk_compat_variants_fully_decomp = {}
+        self._load_cjk_compat_ideograph_variants()
+
        def stats(name, table):
            count = sum(len(v) for v in table.values())
-            print "%s: %d chars => %d decomposed chars" % (name, len(table), count)
+            print("%s: %d chars => %d decomposed chars" % (name, len(table), count))

-        print "Decomposition table stats:"
+        print("Decomposition table stats:")
        stats("Canonical decomp", self.canon_decomp)
        stats("Compatible decomp", self.compat_decomp)
        stats("Canonical fully decomp", self.canon_fully_decomp)
        stats("Compatible fully decomp", self.compat_fully_decomp)
+        stats("CJK Compat Variants fully decomp", self.cjk_compat_variants_fully_decomp)

        self.ss_leading, self.ss_trailing = self._compute_stream_safe_tables()

    def _fetch(self, filename):
-        resp = requests.get(UCD_URL + filename)
-        return resp.text
+        resp = urllib.request.urlopen(UCD_URL + filename)
+        return resp.read().decode('utf-8')

    def _load_unicode_data(self):
+        self.name_to_char_int = {}
        self.combining_classes = {}
        self.compat_decomp = {}
        self.canon_decomp = {}
        self.general_category_mark = []
+        self.general_category_public_assigned = []
+
+        assigned_start = 0;
+        prev_char_int = -1;
+        prev_name = "";

        for line in self._fetch("UnicodeData.txt").splitlines():
            # See ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html
            pieces = line.split(';')
            assert len(pieces) == 15
-            char, category, cc, decomp = pieces[0], pieces[2], pieces[3], pieces[5]
+            char, name, category, cc, decomp = pieces[0], pieces[1], pieces[2], pieces[3], pieces[5]
            char_int = int(char, 16)

+            name = pieces[1].strip()
+            self.name_to_char_int[name] = char_int
+
            if cc != '0':
                self.combining_classes[char_int] = cc

@ -106,6 +125,51 @@ class UnicodeData(object):
            if category == 'M' or 'M' in expanded_categories.get(category, []):
                self.general_category_mark.append(char_int)

+            assert category != 'Cn', "Unexpected: Unassigned codepoint in UnicodeData.txt"
+            if category not in ['Co', 'Cs']:
+                if char_int != prev_char_int + 1 and not is_first_and_last(prev_name, name):
+                    self.general_category_public_assigned.append((assigned_start, prev_char_int))
+                    assigned_start = char_int
+                prev_char_int = char_int
+                prev_name = name;
+
+        self.general_category_public_assigned.append((assigned_start, prev_char_int))
+
+    def _load_cjk_compat_ideograph_variants(self):
+        for line in self._fetch("StandardizedVariants.txt").splitlines():
+            strip_comments = line.split('#', 1)[0].strip()
+            if not strip_comments:
+                continue
+
+            variation_sequence, description, differences = strip_comments.split(';')
+            description = description.strip()
+
+            # Don't use variations that only apply in particular shaping environments.
+            if differences:
+                continue
+
+            # Look for entries where the description field is a codepoint name.
+            if description not in self.name_to_char_int:
+                continue
+
+            # Only consider the CJK Compatibility Ideographs.
+            if not description.startswith('CJK COMPATIBILITY IDEOGRAPH-'):
+                continue
+
+            char_int = self.name_to_char_int[description]
+
+            assert not char_int in self.combining_classes, "Unexpected: CJK compat variant with a combining class"
+            assert not char_int in self.compat_decomp, "Unexpected: CJK compat variant and compatibility decomposition"
+            assert len(self.canon_decomp[char_int]) == 1, "Unexpected: CJK compat variant and non-singleton canonical decomposition"
+            # If we ever need to handle Hangul here, we'll need to handle it separately.
+            assert not (S_BASE <= char_int < S_BASE + S_COUNT)
+
+            cjk_compat_variant_parts = [int(c, 16) for c in variation_sequence.split()]
+            for c in cjk_compat_variant_parts:
+                assert not c in self.canon_decomp, "Unexpected: CJK compat variant is unnormalized (canon)"
+                assert not c in self.compat_decomp, "Unexpected: CJK compat variant is unnormalized (compat)"
+            self.cjk_compat_variants_fully_decomp[char_int] = cjk_compat_variant_parts
+
    def _load_norm_props(self):
        props = collections.defaultdict(list)

@ -178,11 +242,6 @@ class UnicodeData(object):
        The upshot is that decomposition code is very simple and easy to inline
        at mild code size cost.
        """
-        # Constants from Unicode 9.0.0 Section 3.12 Conjoining Jamo Behavior
-        # http://www.unicode.org/versions/Unicode9.0.0/ch03.pdf#M9.32468.Heading.310.Combining.Jamo.Behavior
-        S_BASE, L_COUNT, V_COUNT, T_COUNT = 0xAC00, 19, 21, 28
-        S_COUNT = L_COUNT * V_COUNT * T_COUNT
-
        def _decompose(char_int, compatible):
            # 7-bit ASCII never decomposes
            if char_int <= 0x7f:
@ -234,7 +293,7 @@ class UnicodeData(object):
        # need to store their overlap when they agree.  When they don't agree,
        # store the decomposition in the compatibility table since we'll check
        # that first when normalizing to NFKD.
-        assert canon_fully_decomp <= compat_fully_decomp
+        assert set(canon_fully_decomp) <= set(compat_fully_decomp)

        for ch in set(canon_fully_decomp) & set(compat_fully_decomp):
            if canon_fully_decomp[ch] == compat_fully_decomp[ch]:
@ -284,47 +343,57 @@ class UnicodeData(object):

        return leading_nonstarters, trailing_nonstarters

-hexify = lambda c: hex(c)[2:].upper().rjust(4, '0')
+hexify = lambda c: '{:04X}'.format(c)
+
+# Test whether `first` and `last` are corresponding "<..., First>" and
+# "<..., Last>" markers.
+def is_first_and_last(first, last):
+    if not first.startswith('<') or not first.endswith(', First>'):
+        return False
+    if not last.startswith('<') or not last.endswith(', Last>'):
+        return False
+    return first[1:-8] == last[1:-7]
+
+def gen_mph_data(name, d, kv_type, kv_callback):
+    (salt, keys) = minimal_perfect_hash(d)
+    out.write("pub(crate) const %s_SALT: &[u16] = &[\n" % name.upper())
+    for s in salt:
+        out.write("    0x{:x},\n".format(s))
+    out.write("];\n")
+    out.write("pub(crate) const {}_KV: &[{}] = &[\n".format(name.upper(), kv_type))
+    for k in keys:
+        out.write("    {},\n".format(kv_callback(k)))
+    out.write("];\n\n")

 def gen_combining_class(combining_classes, out):
-    out.write("#[inline]\n")
-    out.write("pub fn canonical_combining_class(c: char) -> u8 {\n")
-    out.write("    match c {\n")
-
-    for char, combining_class in sorted(combining_classes.items()):
-        out.write("        '\u{%s}' => %s,\n" % (hexify(char), combining_class))
-
-    out.write("        _ => 0,\n")
-    out.write("    }\n")
-    out.write("}\n")
+    gen_mph_data('canonical_combining_class', combining_classes, 'u32',
+        lambda k: "0x{:X}".format(int(combining_classes[k]) | (k << 8)))

 def gen_composition_table(canon_comp, out):
-    out.write("#[inline]\n")
-    out.write("pub fn composition_table(c1: char, c2: char) -> Option<char> {\n")
-    out.write("    match (c1, c2) {\n")
+    table = {}
+    for (c1, c2), c3 in canon_comp.items():
+        if c1 < 0x10000 and c2 < 0x10000:
+            table[(c1 << 16) | c2] = c3
+    (salt, keys) = minimal_perfect_hash(table)
+    gen_mph_data('COMPOSITION_TABLE', table, '(u32, char)',
+        lambda k: "(0x%s, '\\u{%s}')" % (hexify(k), hexify(table[k])))

+    out.write("pub(crate) fn composition_table_astral(c1: char, c2: char) -> Option<char> {\n")
+    out.write("    match (c1, c2) {\n")
    for (c1, c2), c3 in sorted(canon_comp.items()):
-        out.write("        ('\u{%s}', '\u{%s}') => Some('\u{%s}'),\n" % (hexify(c1), hexify(c2), hexify(c3)))
+        if c1 >= 0x10000 and c2 >= 0x10000:
+            out.write("        ('\\u{%s}', '\\u{%s}') => Some('\\u{%s}'),\n" % (hexify(c1), hexify(c2), hexify(c3)))

    out.write("        _ => None,\n")
    out.write("    }\n")
    out.write("}\n")

-def gen_decomposition_tables(canon_decomp, compat_decomp, out):
-    tables = [(canon_decomp, 'canonical'), (compat_decomp, 'compatibility')]
+def gen_decomposition_tables(canon_decomp, compat_decomp, cjk_compat_variants_decomp, out):
+    tables = [(canon_decomp, 'canonical'), (compat_decomp, 'compatibility'), (cjk_compat_variants_decomp, 'cjk_compat_variants')]
    for table, name in tables:
-        out.write("#[inline]\n")
-        out.write("pub fn %s_fully_decomposed(c: char) -> Option<&'static [char]> {\n" % name)
-        out.write("    match c {\n")
-
-        for char, chars in sorted(table.items()):
-            d = ", ".join("'\u{%s}'" % hexify(c) for c in chars)
-            out.write("        '\u{%s}' => Some(&[%s]),\n" % (hexify(char), d))
-
-        out.write("        _ => None,\n")
-        out.write("    }\n")
-        out.write("}\n")
-        out.write("\n")
+        gen_mph_data(name + '_decomposed', table, "(u32, &'static [char])",
+            lambda k: "(0x{:x}, &[{}])".format(k,
+                ", ".join("'\\u{%s}'" % hexify(c) for c in table[k])))

 def gen_qc_match(prop_table, out):
    out.write("    match c {\n")
@ -343,51 +412,76 @@ def gen_qc_match(prop_table, out):

 def gen_nfc_qc(prop_tables, out):
    out.write("#[inline]\n")
+    out.write("#[allow(ellipsis_inclusive_range_patterns)]\n")
    out.write("pub fn qc_nfc(c: char) -> IsNormalized {\n")
    gen_qc_match(prop_tables['NFC_QC'], out)
    out.write("}\n")

+def gen_nfkc_qc(prop_tables, out):
+    out.write("#[inline]\n")
+    out.write("#[allow(ellipsis_inclusive_range_patterns)]\n")
+    out.write("pub fn qc_nfkc(c: char) -> IsNormalized {\n")
+    gen_qc_match(prop_tables['NFKC_QC'], out)
+    out.write("}\n")
+
 def gen_nfd_qc(prop_tables, out):
    out.write("#[inline]\n")
+    out.write("#[allow(ellipsis_inclusive_range_patterns)]\n")
    out.write("pub fn qc_nfd(c: char) -> IsNormalized {\n")
    gen_qc_match(prop_tables['NFD_QC'], out)
    out.write("}\n")

-def gen_combining_mark(general_category_mark, out):
+def gen_nfkd_qc(prop_tables, out):
    out.write("#[inline]\n")
-    out.write("pub fn is_combining_mark(c: char) -> bool {\n")
+    out.write("#[allow(ellipsis_inclusive_range_patterns)]\n")
+    out.write("pub fn qc_nfkd(c: char) -> IsNormalized {\n")
+    gen_qc_match(prop_tables['NFKD_QC'], out)
+    out.write("}\n")
+
+def gen_combining_mark(general_category_mark, out):
+    gen_mph_data('combining_mark', general_category_mark, 'u32',
+        lambda k: '0x{:04x}'.format(k))
+
+def gen_public_assigned(general_category_public_assigned, out):
+    # This could be done as a hash but the table is somewhat small.
+    out.write("#[inline]\n")
+    out.write("pub fn is_public_assigned(c: char) -> bool {\n")
    out.write("    match c {\n")

-    for char in general_category_mark:
-        out.write("        '\u{%s}' => true,\n" % hexify(char))
+    start = True
+    for first, last in general_category_public_assigned:
+        if start:
+            out.write("        ")
+            start = False
+        else:
+            out.write("        | ")
+        if first == last:
+            out.write("'\\u{%s}'\n" % hexify(first))
+        else:
+            out.write("'\\u{%s}'..='\\u{%s}'\n" % (hexify(first), hexify(last)))
+    out.write("        => true,\n")

    out.write("        _ => false,\n")
    out.write("    }\n")
    out.write("}\n")
+    out.write("\n")

 def gen_stream_safe(leading, trailing, out):
+    # This could be done as a hash but the table is very small.
    out.write("#[inline]\n")
    out.write("pub fn stream_safe_leading_nonstarters(c: char) -> usize {\n")
    out.write("    match c {\n")

-    for char, num_leading in leading.items():
-        out.write("        '\u{%s}' => %d,\n" % (hexify(char), num_leading))
+    for char, num_leading in sorted(leading.items()):
+        out.write("        '\\u{%s}' => %d,\n" % (hexify(char), num_leading))

    out.write("        _ => 0,\n")
    out.write("    }\n")
    out.write("}\n")
    out.write("\n")

-    out.write("#[inline]\n")
-    out.write("pub fn stream_safe_trailing_nonstarters(c: char) -> usize {\n")
-    out.write("    match c {\n")
-
-    for char, num_trailing in trailing.items():
-        out.write("        '\u{%s}' => %d,\n" % (hexify(char), num_trailing))
-
-    out.write("        _ => 0,\n")
-    out.write("    }\n")
-    out.write("}\n")
+    gen_mph_data('trailing_nonstarters', trailing, 'u32',
+        lambda k: "0x{:X}".format(int(trailing[k]) | (k << 8)))

 def gen_tests(tests, out):
    out.write("""#[derive(Debug)]
@ -402,7 +496,7 @@ pub struct NormalizationTest {
 """)

    out.write("pub const NORMALIZATION_TESTS: &[NormalizationTest] = &[\n")
-    str_literal = lambda s: '"%s"' % "".join("\u{%s}" % c for c in s)
+    str_literal = lambda s: '"%s"' % "".join("\\u{%s}" % c for c in s)

    for test in tests:
        out.write("    NormalizationTest {\n")
@ -415,17 +509,73 @@ pub struct NormalizationTest {

    out.write("];\n")

+# Guaranteed to be less than n.
+def my_hash(x, salt, n):
+    # This is hash based on the theory that multiplication is efficient
+    mask_32 = 0xffffffff
+    y = ((x + salt) * 2654435769) & mask_32
+    y ^= (x * 0x31415926) & mask_32
+    return (y * n) >> 32
+
+# Compute minimal perfect hash function, d can be either a dict or list of keys.
+def minimal_perfect_hash(d):
+    n = len(d)
+    buckets = dict((h, []) for h in range(n))
+    for key in d:
+        h = my_hash(key, 0, n)
+        buckets[h].append(key)
+    bsorted = [(len(buckets[h]), h) for h in range(n)]
+    bsorted.sort(reverse = True)
+    claimed = [False] * n
+    salts = [0] * n
+    keys = [0] * n
+    for (bucket_size, h) in bsorted:
+        # Note: the traditional perfect hashing approach would also special-case
+        # bucket_size == 1 here and assign any empty slot, rather than iterating
+        # until rehash finds an empty slot. But we're not doing that so we can
+        # avoid the branch.
+        if bucket_size == 0:
+            break
+        else:
+            for salt in range(1, 32768):
+                rehashes = [my_hash(key, salt, n) for key in buckets[h]]
+                # Make sure there are no rehash collisions within this bucket.
+                if all(not claimed[hash] for hash in rehashes):
+                    if len(set(rehashes)) < bucket_size:
+                        continue
+                    salts[h] = salt
+                    for key in buckets[h]:
+                        rehash = my_hash(key, salt, n)
+                        claimed[rehash] = True
+                        keys[rehash] = key
+                    break
+            if salts[h] == 0:
+                print("minimal perfect hashing failed")
+                # Note: if this happens (because of unfortunate data), then there are
+                # a few things that could be done. First, the hash function could be
+                # tweaked. Second, the bucket order could be scrambled (especially the
+                # singletons). Right now, the buckets are sorted, which has the advantage
+                # of being deterministic.
+                #
+                # As a more extreme approach, the singleton bucket optimization could be
+                # applied (give the direct address for singleton buckets, rather than
+                # relying on a rehash). That is definitely the more standard approach in
+                # the minimal perfect hashing literature, but in testing the branch was a
+                # significant slowdown.
+                exit(1)
+    return (salts, keys)
+
 if __name__ == '__main__':
    data = UnicodeData()
-    with open("tables.rs", "w") as out:
+    with open("tables.rs", "w", newline = "\n") as out:
        out.write(PREAMBLE)
-        out.write("use quick_check::IsNormalized;\n")
-        out.write("use quick_check::IsNormalized::*;\n")
+        out.write("use crate::quick_check::IsNormalized;\n")
+        out.write("use crate::quick_check::IsNormalized::*;\n")
        out.write("\n")

        version = "(%s, %s, %s)" % tuple(UNICODE_VERSION.split("."))
        out.write("#[allow(unused)]\n")
-        out.write("pub const UNICODE_VERSION: (u64, u64, u64) = %s;\n\n" % version)
+        out.write("pub const UNICODE_VERSION: (u8, u8, u8) = %s;\n\n" % version)

        gen_combining_class(data.combining_classes, out)
        out.write("\n")
@ -433,20 +583,29 @@ if __name__ == '__main__':
        gen_composition_table(data.canon_comp, out)
        out.write("\n")

-        gen_decomposition_tables(data.canon_fully_decomp, data.compat_fully_decomp, out)
+        gen_decomposition_tables(data.canon_fully_decomp, data.compat_fully_decomp, data.cjk_compat_variants_fully_decomp, out)

        gen_combining_mark(data.general_category_mark, out)
        out.write("\n")

+        gen_public_assigned(data.general_category_public_assigned, out)
+        out.write("\n")
+
        gen_nfc_qc(data.norm_props, out)
        out.write("\n")

+        gen_nfkc_qc(data.norm_props, out)
+        out.write("\n")
+
        gen_nfd_qc(data.norm_props, out)
        out.write("\n")

+        gen_nfkd_qc(data.norm_props, out)
+        out.write("\n")
+
        gen_stream_safe(data.ss_leading, data.ss_trailing, out)
        out.write("\n")

-    with open("normalization_tests.rs", "w") as out:
+    with open("normalization_tests.rs", "w", newline = "\n") as out:
        out.write(PREAMBLE)
        gen_tests(data.norm_tests, out)
--- a/third_party/rust/unicode-normalization/src/__test_api.rs
+++ b/third_party/rust/unicode-normalization/src/__test_api.rs
@ -0,0 +1,18 @@
+// This crate comprises hacks and glue required to test private functions from tests/
+//
+// Keep this as slim as possible.
+//
+// If you're caught using this outside this crates tests/, you get to clean up the mess.
+
+#[cfg(not(feature = "std"))]
+use crate::no_std_prelude::*;
+
+use crate::stream_safe::StreamSafe;
+
+pub fn stream_safe(s: &str) -> String {
+    StreamSafe::new(s.chars()).collect()
+}
+
+pub mod quick_check {
+    pub use crate::quick_check::*;
+}
--- a/third_party/rust/unicode-normalization/src/decompose.rs
+++ b/third_party/rust/unicode-normalization/src/decompose.rs
@ -7,51 +7,52 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-use std::fmt::{self, Write};
+use core::fmt::{self, Write};
+use core::iter::Fuse;
+use core::ops::Range;
+use tinyvec::TinyVec;

 #[derive(Clone)]
 enum DecompositionType {
    Canonical,
-    Compatible
+    Compatible,
 }

 /// External iterator for a string decomposition's characters.
 #[derive(Clone)]
 pub struct Decompositions<I> {
    kind: DecompositionType,
-    iter: I,
-    done: bool,
+    iter: Fuse<I>,

    // This buffer stores pairs of (canonical combining class, character),
    // pushed onto the end in text order.
    //
-    // It's split into two contiguous regions by the `ready` offset.  The first
-    // `ready` pairs are sorted and ready to emit on demand.  The "pending"
-    // suffix afterwards still needs more characters for us to be able to sort
-    // in canonical order and is not safe to emit.
-    buffer: Vec<(u8, char)>,
-    ready: usize,
+    // It's divided into up to three sections:
+    // 1) A prefix that is free space;
+    // 2) "Ready" characters which are sorted and ready to emit on demand;
+    // 3) A "pending" block which stills needs more characters for us to be able
+    //    to sort in canonical order and is not safe to emit.
+    buffer: TinyVec<[(u8, char); 4]>,
+    ready: Range<usize>,
 }

 #[inline]
-pub fn new_canonical<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> {
+pub fn new_canonical<I: Iterator<Item = char>>(iter: I) -> Decompositions<I> {
    Decompositions {
        kind: self::DecompositionType::Canonical,
-        iter: iter,
-        done: false,
-        buffer: Vec::new(),
-        ready: 0,
+        iter: iter.fuse(),
+        buffer: TinyVec::new(),
+        ready: 0..0,
    }
 }

 #[inline]
-pub fn new_compatible<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> {
+pub fn new_compatible<I: Iterator<Item = char>>(iter: I) -> Decompositions<I> {
    Decompositions {
        kind: self::DecompositionType::Compatible,
-        iter: iter,
-        done: false,
-        buffer: Vec::new(),
-        ready: 0,
+        iter: iter.fuse(),
+        buffer: TinyVec::new(),
+        ready: 0..0,
    }
 }

@ -59,55 +60,89 @@ impl<I> Decompositions<I> {
    #[inline]
    fn push_back(&mut self, ch: char) {
        let class = super::char::canonical_combining_class(ch);
+
        if class == 0 {
            self.sort_pending();
+            self.buffer.push((class, ch));
+            self.ready.end = self.buffer.len();
+        } else {
+            self.buffer.push((class, ch));
        }
-        self.buffer.push((class, ch));
    }

    #[inline]
    fn sort_pending(&mut self) {
-        if self.ready == 0 && self.buffer.is_empty() {
-            return;
-        }
-
        // NB: `sort_by_key` is stable, so it will preserve the original text's
        // order within a combining class.
-        self.buffer[self.ready..].sort_by_key(|k| k.0);
-        self.ready = self.buffer.len();
+        self.buffer[self.ready.end..].sort_by_key(|k| k.0);
    }

    #[inline]
-    fn pop_front(&mut self) -> Option<char> {
-        if self.ready == 0 {
-            None
+    fn reset_buffer(&mut self) {
+        // Equivalent to `self.buffer.drain(0..self.ready.end)`
+        // but faster than drain() if the buffer is a SmallVec or TinyVec
+        let pending = self.buffer.len() - self.ready.end;
+        for i in 0..pending {
+            self.buffer[i] = self.buffer[i + self.ready.end];
+        }
+        self.buffer.truncate(pending);
+        self.ready = 0..0;
+    }
+
+    #[inline]
+    fn increment_next_ready(&mut self) {
+        let next = self.ready.start + 1;
+        if next == self.ready.end {
+            self.reset_buffer();
        } else {
-            self.ready -= 1;
-            Some(self.buffer.remove(0).1)
+            self.ready.start = next;
        }
    }
 }

-impl<I: Iterator<Item=char>> Iterator for Decompositions<I> {
+impl<I: Iterator<Item = char>> Iterator for Decompositions<I> {
    type Item = char;

    #[inline]
    fn next(&mut self) -> Option<char> {
-        while self.ready == 0 && !self.done {
+        while self.ready.end == 0 {
            match (self.iter.next(), &self.kind) {
                (Some(ch), &DecompositionType::Canonical) => {
                    super::char::decompose_canonical(ch, |d| self.push_back(d));
-                },
+                }
                (Some(ch), &DecompositionType::Compatible) => {
                    super::char::decompose_compatible(ch, |d| self.push_back(d));
-                },
+                }
                (None, _) => {
-                    self.sort_pending();
-                    self.done = true;
-                },
+                    if self.buffer.is_empty() {
+                        return None;
+                    } else {
+                        self.sort_pending();
+                        self.ready.end = self.buffer.len();
+
+                        // This implementation means that we can call `next`
+                        // on an exhausted iterator; the last outer `next` call
+                        // will result in an inner `next` call. To make this
+                        // safe, we use `fuse`.
+                        break;
+                    }
+                }
            }
        }
-        self.pop_front()
+
+        // We can assume here that, if `self.ready.end` is greater than zero,
+        // it's also greater than `self.ready.start`. That's because we only
+        // increment `self.ready.start` inside `increment_next_ready`, and
+        // whenever it reaches equality with `self.ready.end`, we reset both
+        // to zero, maintaining the invariant that:
+        //      self.ready.start < self.ready.end || self.ready.end == self.ready.start == 0
+        //
+        // This less-than-obviously-safe implementation is chosen for performance,
+        // minimizing the number & complexity of branches in `next` in the common
+        // case of buffering then unbuffering a single character with each call.
+        let (_, ch) = self.buffer[self.ready.start];
+        self.increment_next_ready();
+        Some(ch)
    }

    fn size_hint(&self) -> (usize, Option<usize>) {
@ -116,7 +151,7 @@ impl<I: Iterator<Item=char>> Iterator for Decompositions<I> {
    }
 }

-impl<I: Iterator<Item=char> + Clone> fmt::Display for Decompositions<I> {
+impl<I: Iterator<Item = char> + Clone> fmt::Display for Decompositions<I> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        for c in self.clone() {
            f.write_char(c)?;
--- a/third_party/rust/unicode-normalization/src/lib.rs
+++ b/third_party/rust/unicode-normalization/src/lib.rs
@ -34,81 +34,103 @@
 //!
 //! ```toml
 //! [dependencies]
-//! unicode-normalization = "0.1.7"
+//! unicode-normalization = "0.1.19"
 //! ```

 #![deny(missing_docs, unsafe_code)]
-#![doc(html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
-       html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png")]
+#![doc(
+    html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
+    html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png"
+)]
+#![cfg_attr(not(feature = "std"), no_std)]

-pub use tables::UNICODE_VERSION;
-pub use decompose::Decompositions;
-pub use quick_check::{
+#[cfg(not(feature = "std"))]
+extern crate alloc;
+
+#[cfg(feature = "std")]
+extern crate core;
+
+extern crate tinyvec;
+
+pub use crate::decompose::Decompositions;
+pub use crate::quick_check::{
+    is_nfc, is_nfc_quick, is_nfc_stream_safe, is_nfc_stream_safe_quick, is_nfd, is_nfd_quick,
+    is_nfd_stream_safe, is_nfd_stream_safe_quick, is_nfkc, is_nfkc_quick, is_nfkd, is_nfkd_quick,
    IsNormalized,
-    is_nfc,
-    is_nfc_quick,
-    is_nfc_stream_safe,
-    is_nfc_stream_safe_quick,
-    is_nfd,
-    is_nfd_quick,
-    is_nfd_stream_safe,
-    is_nfd_stream_safe_quick,
 };
-pub use recompose::Recompositions;
-pub use stream_safe::StreamSafe;
-use std::str::Chars;
+pub use crate::recompose::Recompositions;
+pub use crate::replace::Replacements;
+pub use crate::stream_safe::StreamSafe;
+pub use crate::tables::UNICODE_VERSION;
+use core::str::Chars;
+
+mod no_std_prelude;

 mod decompose;
+mod lookups;
 mod normalize;
-mod recompose;
+mod perfect_hash;
 mod quick_check;
+mod recompose;
+mod replace;
 mod stream_safe;
+
+#[rustfmt::skip]
 mod tables;

+#[doc(hidden)]
+pub mod __test_api;
 #[cfg(test)]
 mod test;
-#[cfg(test)]
-mod normalization_tests;

 /// Methods for composing and decomposing characters.
 pub mod char {
-    pub use normalize::{decompose_canonical, decompose_compatible, compose};
+    pub use crate::normalize::{
+        compose, decompose_canonical, decompose_cjk_compat_variants, decompose_compatible,
+    };

-    /// Look up the canonical combining class of a character.
-    pub use tables::canonical_combining_class;
+    pub use crate::lookups::{canonical_combining_class, is_combining_mark};

-    /// Return whether the given character is a combining mark (`General_Category=Mark`)
-    pub use tables::is_combining_mark;
+    /// Return whether the given character is assigned (`General_Category` != `Unassigned`)
+    /// and not Private-Use (`General_Category` != `Private_Use`), in the supported version
+    /// of Unicode.
+    pub use crate::tables::is_public_assigned;
 }

-
 /// Methods for iterating over strings while applying Unicode normalizations
 /// as described in
 /// [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/).
-pub trait UnicodeNormalization<I: Iterator<Item=char>> {
+pub trait UnicodeNormalization<I: Iterator<Item = char>> {
    /// Returns an iterator over the string in Unicode Normalization Form D
    /// (canonical decomposition).
-    #[inline]
    fn nfd(self) -> Decompositions<I>;

    /// Returns an iterator over the string in Unicode Normalization Form KD
    /// (compatibility decomposition).
-    #[inline]
    fn nfkd(self) -> Decompositions<I>;

    /// An Iterator over the string in Unicode Normalization Form C
    /// (canonical decomposition followed by canonical composition).
-    #[inline]
    fn nfc(self) -> Recompositions<I>;

    /// An Iterator over the string in Unicode Normalization Form KC
    /// (compatibility decomposition followed by canonical composition).
-    #[inline]
    fn nfkc(self) -> Recompositions<I>;

+    /// A transformation which replaces CJK Compatibility Ideograph codepoints
+    /// with normal forms using Standardized Variation Sequences. This is not
+    /// part of the canonical or compatibility decomposition algorithms, but
+    /// performing it before those algorithms produces normalized output which
+    /// better preserves the intent of the original text.
+    ///
+    /// Note that many systems today ignore variation selectors, so these
+    /// may not immediately help text display as intended, but they at
+    /// least preserve the information in a standardized form, giving
+    /// implementations the option to recognize them.
+    fn cjk_compat_variants(self) -> Replacements<I>;
+
    /// An Iterator over the string with Conjoining Grapheme Joiner characters
    /// inserted according to the Stream-Safe Text Process (UAX15-D4)
-    #[inline]
    fn stream_safe(self) -> StreamSafe<I>;
 }

@ -133,13 +155,18 @@ impl<'a> UnicodeNormalization<Chars<'a>> for &'a str {
        recompose::new_compatible(self.chars())
    }

+    #[inline]
+    fn cjk_compat_variants(self) -> Replacements<Chars<'a>> {
+        replace::new_cjk_compat_variants(self.chars())
+    }
+
    #[inline]
    fn stream_safe(self) -> StreamSafe<Chars<'a>> {
        StreamSafe::new(self.chars())
    }
 }

-impl<I: Iterator<Item=char>> UnicodeNormalization<I> for I {
+impl<I: Iterator<Item = char>> UnicodeNormalization<I> for I {
    #[inline]
    fn nfd(self) -> Decompositions<I> {
        decompose::new_canonical(self)
@ -160,6 +187,11 @@ impl<I: Iterator<Item=char>> UnicodeNormalization<I> for I {
        recompose::new_compatible(self)
    }

+    #[inline]
+    fn cjk_compat_variants(self) -> Replacements<I> {
+        replace::new_cjk_compat_variants(self)
+    }
+
    #[inline]
    fn stream_safe(self) -> StreamSafe<I> {
        StreamSafe::new(self)
--- a/third_party/rust/unicode-normalization/src/lookups.rs
+++ b/third_party/rust/unicode-normalization/src/lookups.rs
@ -0,0 +1,135 @@
+// Copyright 2019 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Lookups of unicode properties using minimal perfect hashing.
+
+use crate::perfect_hash::mph_lookup;
+use crate::tables::*;
+
+/// Look up the canonical combining class for a codepoint.
+///
+/// The value returned is as defined in the Unicode Character Database.
+pub fn canonical_combining_class(c: char) -> u8 {
+    mph_lookup(
+        c.into(),
+        CANONICAL_COMBINING_CLASS_SALT,
+        CANONICAL_COMBINING_CLASS_KV,
+        u8_lookup_fk,
+        u8_lookup_fv,
+        0,
+    )
+}
+
+pub(crate) fn composition_table(c1: char, c2: char) -> Option<char> {
+    if c1 < '\u{10000}' && c2 < '\u{10000}' {
+        mph_lookup(
+            (c1 as u32) << 16 | (c2 as u32),
+            COMPOSITION_TABLE_SALT,
+            COMPOSITION_TABLE_KV,
+            pair_lookup_fk,
+            pair_lookup_fv_opt,
+            None,
+        )
+    } else {
+        composition_table_astral(c1, c2)
+    }
+}
+
+pub(crate) fn canonical_fully_decomposed(c: char) -> Option<&'static [char]> {
+    mph_lookup(
+        c.into(),
+        CANONICAL_DECOMPOSED_SALT,
+        CANONICAL_DECOMPOSED_KV,
+        pair_lookup_fk,
+        pair_lookup_fv_opt,
+        None,
+    )
+}
+
+pub(crate) fn compatibility_fully_decomposed(c: char) -> Option<&'static [char]> {
+    mph_lookup(
+        c.into(),
+        COMPATIBILITY_DECOMPOSED_SALT,
+        COMPATIBILITY_DECOMPOSED_KV,
+        pair_lookup_fk,
+        pair_lookup_fv_opt,
+        None,
+    )
+}
+
+pub(crate) fn cjk_compat_variants_fully_decomposed(c: char) -> Option<&'static [char]> {
+    mph_lookup(
+        c.into(),
+        CJK_COMPAT_VARIANTS_DECOMPOSED_SALT,
+        CJK_COMPAT_VARIANTS_DECOMPOSED_KV,
+        pair_lookup_fk,
+        pair_lookup_fv_opt,
+        None,
+    )
+}
+
+/// Return whether the given character is a combining mark (`General_Category=Mark`)
+pub fn is_combining_mark(c: char) -> bool {
+    mph_lookup(
+        c.into(),
+        COMBINING_MARK_SALT,
+        COMBINING_MARK_KV,
+        bool_lookup_fk,
+        bool_lookup_fv,
+        false,
+    )
+}
+
+pub fn stream_safe_trailing_nonstarters(c: char) -> usize {
+    mph_lookup(
+        c.into(),
+        TRAILING_NONSTARTERS_SALT,
+        TRAILING_NONSTARTERS_KV,
+        u8_lookup_fk,
+        u8_lookup_fv,
+        0,
+    ) as usize
+}
+
+/// Extract the key in a 24 bit key and 8 bit value packed in a u32.
+#[inline]
+fn u8_lookup_fk(kv: u32) -> u32 {
+    kv >> 8
+}
+
+/// Extract the value in a 24 bit key and 8 bit value packed in a u32.
+#[inline]
+fn u8_lookup_fv(kv: u32) -> u8 {
+    (kv & 0xff) as u8
+}
+
+/// Extract the key for a boolean lookup.
+#[inline]
+fn bool_lookup_fk(kv: u32) -> u32 {
+    kv
+}
+
+/// Extract the value for a boolean lookup.
+#[inline]
+fn bool_lookup_fv(_kv: u32) -> bool {
+    true
+}
+
+/// Extract the key in a pair.
+#[inline]
+fn pair_lookup_fk<T>(kv: (u32, T)) -> u32 {
+    kv.0
+}
+
+/// Extract the value in a pair, returning an option.
+#[inline]
+fn pair_lookup_fv_opt<T>(kv: (u32, T)) -> Option<T> {
+    Some(kv.1)
+}
--- a/third_party/rust/unicode-normalization/src/no_std_prelude.rs
+++ b/third_party/rust/unicode-normalization/src/no_std_prelude.rs
@ -0,0 +1,6 @@
+#[cfg(not(feature = "std"))]
+pub use alloc::{
+    str::Chars,
+    string::{String, ToString},
+    vec::Vec,
+};
--- a/third_party/rust/unicode-normalization/src/normalization_tests.rs
+++ b/third_party/rust/unicode-normalization/src/normalization_tests.rs
--- a/third_party/rust/unicode-normalization/src/normalize.rs
+++ b/third_party/rust/unicode-normalization/src/normalize.rs
@ -9,16 +9,22 @@
 // except according to those terms.

 //! Functions for computing canonical and compatible decompositions for Unicode characters.
-use std::char;
-use std::ops::FnMut;
-use tables;
+use crate::lookups::{
+    canonical_fully_decomposed, cjk_compat_variants_fully_decomposed,
+    compatibility_fully_decomposed, composition_table,
+};
+
+use core::{char, ops::FnMut};

 /// Compute canonical Unicode decomposition for character.
 /// See [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/)
 /// for more information.
 #[inline]
-pub fn decompose_canonical<F>(c: char, emit_char: F) where F: FnMut(char) {
-    decompose(c, tables::canonical_fully_decomposed, emit_char)
+pub fn decompose_canonical<F>(c: char, emit_char: F)
+where
+    F: FnMut(char),
+{
+    decompose(c, canonical_fully_decomposed, emit_char)
 }

 /// Compute canonical or compatible Unicode decomposition for character.
@ -26,14 +32,49 @@ pub fn decompose_canonical<F>(c: char, emit_char: F) where F: FnMut(char) {
 /// for more information.
 #[inline]
 pub fn decompose_compatible<F: FnMut(char)>(c: char, emit_char: F) {
-    let decompose_char = |c| tables::compatibility_fully_decomposed(c)
-        .or_else(|| tables::canonical_fully_decomposed(c));
+    let decompose_char =
+        |c| compatibility_fully_decomposed(c).or_else(|| canonical_fully_decomposed(c));
    decompose(c, decompose_char, emit_char)
 }

+/// Compute standard-variation decomposition for character.
+///
+/// [Standardized Variation Sequences] are used instead of the standard canonical
+/// decompositions, notably for CJK codepoints with singleton canonical decompositions,
+/// to avoid losing information. See the
+/// [Unicode Variation Sequence FAQ](http://unicode.org/faq/vs.html) and the
+/// "Other Enhancements" section of the
+/// [Unicode 6.3 Release Summary](https://www.unicode.org/versions/Unicode6.3.0/#Summary)
+/// for more information.
+#[inline]
+pub fn decompose_cjk_compat_variants<F>(c: char, mut emit_char: F)
+where
+    F: FnMut(char),
+{
+    // 7-bit ASCII never decomposes
+    if c <= '\x7f' {
+        emit_char(c);
+        return;
+    }
+
+    // Don't perform decomposition for Hangul
+
+    if let Some(decomposed) = cjk_compat_variants_fully_decomposed(c) {
+        for &d in decomposed {
+            emit_char(d);
+        }
+        return;
+    }
+
+    // Finally bottom out.
+    emit_char(c);
+}
+
 #[inline]
 fn decompose<D, F>(c: char, decompose_char: D, mut emit_char: F)
-    where D: Fn(char) -> Option<&'static [char]>, F: FnMut(char)
+where
+    D: Fn(char) -> Option<&'static [char]>,
+    F: FnMut(char),
 {
    // 7-bit ASCII never decomposes
    if c <= '\x7f' {
@ -62,7 +103,7 @@ fn decompose<D, F>(c: char, decompose_char: D, mut emit_char: F)
 /// See [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/)
 /// for more information.
 pub fn compose(a: char, b: char) -> Option<char> {
-    compose_hangul(a, b).or_else(|| tables::composition_table(a, b))
+    compose_hangul(a, b).or_else(|| composition_table(a, b))
 }

 // Constants from Unicode 9.0.0 Section 3.12 Conjoining Jamo Behavior
@ -74,8 +115,8 @@ const T_BASE: u32 = 0x11A7;
 const L_COUNT: u32 = 19;
 const V_COUNT: u32 = 21;
 const T_COUNT: u32 = 28;
-const N_COUNT: u32 = (V_COUNT * T_COUNT);
-const S_COUNT: u32 = (L_COUNT * N_COUNT);
+const N_COUNT: u32 = V_COUNT * T_COUNT;
+const S_COUNT: u32 = L_COUNT * N_COUNT;

 const S_LAST: u32 = S_BASE + S_COUNT - 1;
 const L_LAST: u32 = L_BASE + L_COUNT - 1;
@ -93,7 +134,10 @@ pub(crate) fn is_hangul_syllable(c: char) -> bool {
 // Decompose a precomposed Hangul syllable
 #[allow(unsafe_code)]
 #[inline(always)]
-fn decompose_hangul<F>(s: char, mut emit_char: F) where F: FnMut(char) {
+fn decompose_hangul<F>(s: char, mut emit_char: F)
+where
+    F: FnMut(char),
+{
    let s_index = s as u32 - S_BASE;
    let l_index = s_index / N_COUNT;
    unsafe {
@ -113,27 +157,32 @@ fn decompose_hangul<F>(s: char, mut emit_char: F) where F: FnMut(char) {
 pub(crate) fn hangul_decomposition_length(s: char) -> usize {
    let si = s as u32 - S_BASE;
    let ti = si % T_COUNT;
-    if ti > 0 { 3 } else { 2 }
+    if ti > 0 {
+        3
+    } else {
+        2
+    }
 }

 // Compose a pair of Hangul Jamo
 #[allow(unsafe_code)]
 #[inline(always)]
+#[allow(ellipsis_inclusive_range_patterns)]
 fn compose_hangul(a: char, b: char) -> Option<char> {
    let (a, b) = (a as u32, b as u32);
    match (a, b) {
        // Compose a leading consonant and a vowel together into an LV_Syllable
-        (L_BASE ... L_LAST, V_BASE ... V_LAST) => {
+        (L_BASE...L_LAST, V_BASE...V_LAST) => {
            let l_index = a - L_BASE;
            let v_index = b - V_BASE;
            let lv_index = l_index * N_COUNT + v_index * T_COUNT;
            let s = S_BASE + lv_index;
-            Some(unsafe {char::from_u32_unchecked(s)})
-        },
+            Some(unsafe { char::from_u32_unchecked(s) })
+        }
        // Compose an LV_Syllable and a trailing consonant into an LVT_Syllable
-        (S_BASE ... S_LAST, T_FIRST ... T_LAST) if (a - S_BASE) % T_COUNT == 0 => {
-            Some(unsafe {char::from_u32_unchecked(a + (b - T_BASE))})
-        },
+        (S_BASE...S_LAST, T_FIRST...T_LAST) if (a - S_BASE) % T_COUNT == 0 => {
+            Some(unsafe { char::from_u32_unchecked(a + (b - T_BASE)) })
+        }
        _ => None,
    }
 }
--- a/third_party/rust/unicode-normalization/src/perfect_hash.rs
+++ b/third_party/rust/unicode-normalization/src/perfect_hash.rs
@ -0,0 +1,50 @@
+// Copyright 2019 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Support for lookups based on minimal perfect hashing.
+
+// This function is based on multiplication being fast and is "good enough". Also
+// it can share some work between the unsalted and salted versions.
+#[inline]
+fn my_hash(key: u32, salt: u32, n: usize) -> usize {
+    let y = key.wrapping_add(salt).wrapping_mul(2654435769);
+    let y = y ^ key.wrapping_mul(0x31415926);
+    (((y as u64) * (n as u64)) >> 32) as usize
+}
+
+/// Do a lookup using minimal perfect hashing.
+///
+/// The table is stored as a sequence of "salt" values, then a sequence of
+/// values that contain packed key/value pairs. The strategy is to hash twice.
+/// The first hash retrieves a salt value that makes the second hash unique.
+/// The hash function doesn't have to be very good, just good enough that the
+/// resulting map is unique.
+#[inline]
+pub(crate) fn mph_lookup<KV, V, FK, FV>(
+    x: u32,
+    salt: &[u16],
+    kv: &[KV],
+    fk: FK,
+    fv: FV,
+    default: V,
+) -> V
+where
+    KV: Copy,
+    FK: Fn(KV) -> u32,
+    FV: Fn(KV) -> V,
+{
+    let s = salt[my_hash(x, 0, salt.len())] as u32;
+    let key_val = kv[my_hash(x, s, salt.len())];
+    if x == fk(key_val) {
+        fv(key_val)
+    } else {
+        default
+    }
+}
--- a/third_party/rust/unicode-normalization/src/quick_check.rs
+++ b/third_party/rust/unicode-normalization/src/quick_check.rs
@ -1,6 +1,7 @@
-use UnicodeNormalization;
-use stream_safe;
-use tables;
+use crate::lookups::canonical_combining_class;
+use crate::stream_safe;
+use crate::tables;
+use crate::UnicodeNormalization;

 /// The QuickCheck algorithm can quickly determine if a text is or isn't
 /// normalized without any allocations in many cases, but it has to be able to
@ -18,7 +19,9 @@ pub enum IsNormalized {
 // https://unicode.org/reports/tr15/#Detecting_Normalization_Forms
 #[inline]
 fn quick_check<F, I>(s: I, is_allowed: F, stream_safe: bool) -> IsNormalized
-    where I: Iterator<Item=char>, F: Fn(char) -> IsNormalized
+where
+    I: Iterator<Item = char>,
+    F: Fn(char) -> IsNormalized,
 {
    let mut last_cc = 0u8;
    let mut nonstarter_count = 0;
@ -32,7 +35,7 @@ fn quick_check<F, I>(s: I, is_allowed: F, stream_safe: bool) -> IsNormalized
        }

        // Otherwise, lookup the combining class and QC property
-        let cc = tables::canonical_combining_class(ch);
+        let cc = canonical_combining_class(ch);
        if last_cc > cc && cc != 0 {
            return IsNormalized::No;
        }
@ -41,7 +44,7 @@ fn quick_check<F, I>(s: I, is_allowed: F, stream_safe: bool) -> IsNormalized
            IsNormalized::No => return IsNormalized::No,
            IsNormalized::Maybe => {
                result = IsNormalized::Maybe;
-            },
+            }
        }
        if stream_safe {
            let decomp = stream_safe::classify_nonstarters(ch);
@ -66,25 +69,37 @@ fn quick_check<F, I>(s: I, is_allowed: F, stream_safe: bool) -> IsNormalized
 /// `IsNormalized::Maybe` if further checks are necessary.  In this case a check
 /// like `s.chars().nfc().eq(s.chars())` should suffice.
 #[inline]
-pub fn is_nfc_quick<I: Iterator<Item=char>>(s: I) -> IsNormalized {
+pub fn is_nfc_quick<I: Iterator<Item = char>>(s: I) -> IsNormalized {
    quick_check(s, tables::qc_nfc, false)
 }

+/// Quickly check if a string is in NFKC.
+#[inline]
+pub fn is_nfkc_quick<I: Iterator<Item = char>>(s: I) -> IsNormalized {
+    quick_check(s, tables::qc_nfkc, false)
+}
+
 /// Quickly check if a string is in NFD.
 #[inline]
-pub fn is_nfd_quick<I: Iterator<Item=char>>(s: I) -> IsNormalized {
+pub fn is_nfd_quick<I: Iterator<Item = char>>(s: I) -> IsNormalized {
    quick_check(s, tables::qc_nfd, false)
 }

+/// Quickly check if a string is in NFKD.
+#[inline]
+pub fn is_nfkd_quick<I: Iterator<Item = char>>(s: I) -> IsNormalized {
+    quick_check(s, tables::qc_nfkd, false)
+}
+
 /// Quickly check if a string is Stream-Safe NFC.
 #[inline]
-pub fn is_nfc_stream_safe_quick<I: Iterator<Item=char>>(s: I) -> IsNormalized {
+pub fn is_nfc_stream_safe_quick<I: Iterator<Item = char>>(s: I) -> IsNormalized {
    quick_check(s, tables::qc_nfc, true)
 }

 /// Quickly check if a string is Stream-Safe NFD.
 #[inline]
-pub fn is_nfd_stream_safe_quick<I: Iterator<Item=char>>(s: I) -> IsNormalized {
+pub fn is_nfd_stream_safe_quick<I: Iterator<Item = char>>(s: I) -> IsNormalized {
    quick_check(s, tables::qc_nfd, true)
 }

@ -98,6 +113,16 @@ pub fn is_nfc(s: &str) -> bool {
    }
 }

+/// Authoritatively check if a string is in NFKC.
+#[inline]
+pub fn is_nfkc(s: &str) -> bool {
+    match is_nfkc_quick(s.chars()) {
+        IsNormalized::Yes => true,
+        IsNormalized::No => false,
+        IsNormalized::Maybe => s.chars().eq(s.chars().nfkc()),
+    }
+}
+
 /// Authoritatively check if a string is in NFD.
 #[inline]
 pub fn is_nfd(s: &str) -> bool {
@ -108,6 +133,16 @@ pub fn is_nfd(s: &str) -> bool {
    }
 }

+/// Authoritatively check if a string is in NFKD.
+#[inline]
+pub fn is_nfkd(s: &str) -> bool {
+    match is_nfkd_quick(s.chars()) {
+        IsNormalized::Yes => true,
+        IsNormalized::No => false,
+        IsNormalized::Maybe => s.chars().eq(s.chars().nfkd()),
+    }
+}
+
 /// Authoritatively check if a string is Stream-Safe NFC.
 #[inline]
 pub fn is_nfc_stream_safe(s: &str) -> bool {
@ -130,11 +165,7 @@ pub fn is_nfd_stream_safe(s: &str) -> bool {

 #[cfg(test)]
 mod tests {
-    use super::{
-        IsNormalized,
-        is_nfc_stream_safe_quick,
-        is_nfd_stream_safe_quick,
-    };
+    use super::{is_nfc_stream_safe_quick, is_nfd_stream_safe_quick, IsNormalized};

    #[test]
    fn test_stream_safe_nfd() {
--- a/third_party/rust/unicode-normalization/src/recompose.rs
+++ b/third_party/rust/unicode-normalization/src/recompose.rs
@ -8,15 +8,15 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.

-use std::collections::VecDeque;
-use std::fmt::{self, Write};
-use decompose::Decompositions;
+use crate::decompose::Decompositions;
+use core::fmt::{self, Write};
+use tinyvec::TinyVec;

 #[derive(Clone)]
 enum RecompositionState {
    Composing,
-    Purging,
-    Finished
+    Purging(usize),
+    Finished(usize),
 }

 /// External iterator for a string recomposition's characters.
@ -24,34 +24,34 @@ enum RecompositionState {
 pub struct Recompositions<I> {
    iter: Decompositions<I>,
    state: RecompositionState,
-    buffer: VecDeque<char>,
+    buffer: TinyVec<[char; 4]>,
    composee: Option<char>,
-    last_ccc: Option<u8>
+    last_ccc: Option<u8>,
 }

 #[inline]
-pub fn new_canonical<I: Iterator<Item=char>>(iter: I) -> Recompositions<I> {
+pub fn new_canonical<I: Iterator<Item = char>>(iter: I) -> Recompositions<I> {
    Recompositions {
        iter: super::decompose::new_canonical(iter),
        state: self::RecompositionState::Composing,
-        buffer: VecDeque::new(),
+        buffer: TinyVec::new(),
        composee: None,
        last_ccc: None,
    }
 }

 #[inline]
-pub fn new_compatible<I: Iterator<Item=char>>(iter: I) -> Recompositions<I> {
+pub fn new_compatible<I: Iterator<Item = char>>(iter: I) -> Recompositions<I> {
    Recompositions {
        iter: super::decompose::new_compatible(iter),
        state: self::RecompositionState::Composing,
-        buffer: VecDeque::new(),
+        buffer: TinyVec::new(),
        composee: None,
        last_ccc: None,
    }
 }

-impl<I: Iterator<Item=char>> Iterator for Recompositions<I> {
+impl<I: Iterator<Item = char>> Iterator for Recompositions<I> {
    type Item = char;

    #[inline]
@ -70,36 +70,34 @@ impl<I: Iterator<Item=char>> Iterator for Recompositions<I> {
                                }
                                self.composee = Some(ch);
                                continue;
-                            },
+                            }
                            Some(k) => k,
                        };
                        match self.last_ccc {
-                            None => {
-                                match super::char::compose(k, ch) {
-                                    Some(r) => {
-                                        self.composee = Some(r);
-                                        continue;
-                                    }
-                                    None => {
-                                        if ch_class == 0 {
-                                            self.composee = Some(ch);
-                                            return Some(k);
-                                        }
-                                        self.buffer.push_back(ch);
-                                        self.last_ccc = Some(ch_class);
-                                    }
+                            None => match super::char::compose(k, ch) {
+                                Some(r) => {
+                                    self.composee = Some(r);
+                                    continue;
                                }
-                            }
+                                None => {
+                                    if ch_class == 0 {
+                                        self.composee = Some(ch);
+                                        return Some(k);
+                                    }
+                                    self.buffer.push(ch);
+                                    self.last_ccc = Some(ch_class);
+                                }
+                            },
                            Some(l_class) => {
                                if l_class >= ch_class {
                                    // `ch` is blocked from `composee`
                                    if ch_class == 0 {
                                        self.composee = Some(ch);
                                        self.last_ccc = None;
-                                        self.state = Purging;
+                                        self.state = Purging(0);
                                        return Some(k);
                                    }
-                                    self.buffer.push_back(ch);
+                                    self.buffer.push(ch);
                                    self.last_ccc = Some(ch_class);
                                    continue;
                                }
@ -109,36 +107,44 @@ impl<I: Iterator<Item=char>> Iterator for Recompositions<I> {
                                        continue;
                                    }
                                    None => {
-                                        self.buffer.push_back(ch);
+                                        self.buffer.push(ch);
                                        self.last_ccc = Some(ch_class);
                                    }
                                }
                            }
                        }
                    }
-                    self.state = Finished;
+                    self.state = Finished(0);
                    if self.composee.is_some() {
                        return self.composee.take();
                    }
                }
-                Purging => {
-                    match self.buffer.pop_front() {
-                        None => self.state = Composing,
-                        s => return s
+                Purging(next) => match self.buffer.get(next).cloned() {
+                    None => {
+                        self.buffer.clear();
+                        self.state = Composing;
                    }
-                }
-                Finished => {
-                    match self.buffer.pop_front() {
-                        None => return self.composee.take(),
-                        s => return s
+                    s => {
+                        self.state = Purging(next + 1);
+                        return s;
                    }
-                }
+                },
+                Finished(next) => match self.buffer.get(next).cloned() {
+                    None => {
+                        self.buffer.clear();
+                        return self.composee.take();
+                    }
+                    s => {
+                        self.state = Finished(next + 1);
+                        return s;
+                    }
+                },
            }
        }
    }
 }

-impl<I: Iterator<Item=char> + Clone> fmt::Display for Recompositions<I> {
+impl<I: Iterator<Item = char> + Clone> fmt::Display for Recompositions<I> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        for c in self.clone() {
            f.write_char(c)?;
--- a/third_party/rust/unicode-normalization/src/replace.rs
+++ b/third_party/rust/unicode-normalization/src/replace.rs
@ -0,0 +1,61 @@
+// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+use core::fmt::{self, Write};
+use tinyvec::ArrayVec;
+
+/// External iterator for replacements for a string's characters.
+#[derive(Clone)]
+pub struct Replacements<I> {
+    iter: I,
+    // At this time, the longest replacement sequence has length 2, so we just
+    // need buffer space for 1 codepoint.
+    buffer: Option<char>,
+}
+
+#[inline]
+pub fn new_cjk_compat_variants<I: Iterator<Item = char>>(iter: I) -> Replacements<I> {
+    Replacements { iter, buffer: None }
+}
+
+impl<I: Iterator<Item = char>> Iterator for Replacements<I> {
+    type Item = char;
+
+    #[inline]
+    fn next(&mut self) -> Option<char> {
+        if let Some(c) = self.buffer.take() {
+            return Some(c);
+        }
+
+        match self.iter.next() {
+            Some(ch) => {
+                // At this time, the longest replacement sequence has length 2.
+                let mut buffer = ArrayVec::<[char; 2]>::new();
+                super::char::decompose_cjk_compat_variants(ch, |d| buffer.push(d));
+                self.buffer = buffer.get(1).copied();
+                Some(buffer[0])
+            }
+            None => None,
+        }
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let (lower, _) = self.iter.size_hint();
+        (lower, None)
+    }
+}
+
+impl<I: Iterator<Item = char> + Clone> fmt::Display for Replacements<I> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        for c in self.clone() {
+            f.write_char(c)?;
+        }
+        Ok(())
+    }
+}
--- a/third_party/rust/unicode-normalization/src/stream_safe.rs
+++ b/third_party/rust/unicode-normalization/src/stream_safe.rs
@ -1,8 +1,9 @@
-use normalize::{
-    hangul_decomposition_length,
-    is_hangul_syllable,
+use crate::lookups::{
+    canonical_combining_class, canonical_fully_decomposed, compatibility_fully_decomposed,
+    stream_safe_trailing_nonstarters,
 };
-use tables;
+use crate::normalize::{hangul_decomposition_length, is_hangul_syllable};
+use crate::tables::stream_safe_leading_nonstarters;

 pub(crate) const MAX_NONSTARTERS: usize = 30;
 const COMBINING_GRAPHEME_JOINER: char = '\u{034F}';
@ -18,34 +19,39 @@ pub struct StreamSafe<I> {

 impl<I> StreamSafe<I> {
    pub(crate) fn new(iter: I) -> Self {
-        Self { iter, nonstarter_count: 0, buffer: None }
+        Self {
+            iter,
+            nonstarter_count: 0,
+            buffer: None,
+        }
    }
 }

-impl<I: Iterator<Item=char>> Iterator for StreamSafe<I> {
+impl<I: Iterator<Item = char>> Iterator for StreamSafe<I> {
    type Item = char;

    #[inline]
    fn next(&mut self) -> Option<char> {
-        if let Some(ch) = self.buffer.take() {
-            return Some(ch);
-        }
-        let next_ch = match self.iter.next() {
+        let next_ch = match self.buffer.take().or_else(|| self.iter.next()) {
            None => return None,
            Some(c) => c,
        };
        let d = classify_nonstarters(next_ch);
        if self.nonstarter_count + d.leading_nonstarters > MAX_NONSTARTERS {
-            self.buffer = Some(next_ch);
+            // Since we're emitting a CGJ, the suffix of the emitted string in NFKD has no trailing
+            // nonstarters, so we can reset the counter to zero. Put `next_ch` back into the
+            // iterator (via `self.buffer`), and we'll reclassify it next iteration.
            self.nonstarter_count = 0;
+            self.buffer = Some(next_ch);
            return Some(COMBINING_GRAPHEME_JOINER);
        }

-        // No starters in the decomposition, so keep accumulating
+        // Is the character all nonstarters in NFKD? If so, increment our counter of contiguous
+        // nonstarters in NKFD.
        if d.leading_nonstarters == d.decomposition_len {
            self.nonstarter_count += d.decomposition_len;
        }
-        // Otherwise, restart the nonstarter counter.
+        // Otherwise, reset the counter to the decomposition's number of trailing nonstarters.
        else {
            self.nonstarter_count = d.trailing_nonstarters;
        }
@ -68,7 +74,7 @@ pub(crate) fn classify_nonstarters(c: char) -> Decomposition {
            leading_nonstarters: 0,
            trailing_nonstarters: 0,
            decomposition_len: 1,
-        }
+        };
    }
    // Next, special case Hangul, since it's not handled by our tables.
    if is_hangul_syllable(c) {
@ -78,18 +84,15 @@ pub(crate) fn classify_nonstarters(c: char) -> Decomposition {
            decomposition_len: hangul_decomposition_length(c),
        };
    }
-    let decomp = tables::compatibility_fully_decomposed(c)
-        .or_else(|| tables::canonical_fully_decomposed(c));
+    let decomp = compatibility_fully_decomposed(c).or_else(|| canonical_fully_decomposed(c));
    match decomp {
-        Some(decomp) => {
-            Decomposition {
-                leading_nonstarters: tables::stream_safe_leading_nonstarters(c),
-                trailing_nonstarters: tables::stream_safe_trailing_nonstarters(c),
-                decomposition_len: decomp.len(),
-            }
+        Some(decomp) => Decomposition {
+            leading_nonstarters: stream_safe_leading_nonstarters(c),
+            trailing_nonstarters: stream_safe_trailing_nonstarters(c),
+            decomposition_len: decomp.len(),
        },
        None => {
-            let is_nonstarter = tables::canonical_combining_class(c) != 0;
+            let is_nonstarter = canonical_combining_class(c) != 0;
            let nonstarter = if is_nonstarter { 1 } else { 0 };
            Decomposition {
                leading_nonstarters: nonstarter,
@ -102,35 +105,38 @@ pub(crate) fn classify_nonstarters(c: char) -> Decomposition {

 #[cfg(test)]
 mod tests {
-    use super::{
-        StreamSafe,
-        classify_nonstarters,
-    };
-    use std::char;
-    use normalization_tests::NORMALIZATION_TESTS;
-    use normalize::decompose_compatible;
-    use tables;
+    use super::{classify_nonstarters, StreamSafe};
+    use crate::lookups::canonical_combining_class;
+    use crate::normalize::decompose_compatible;
+
+    #[cfg(not(feature = "std"))]
+    use crate::no_std_prelude::*;
+
+    use core::char;

    fn stream_safe(s: &str) -> String {
        StreamSafe::new(s.chars()).collect()
    }

-    #[test]
-    fn test_normalization_tests_unaffected() {
-        for test in NORMALIZATION_TESTS {
-            for &s in &[test.source, test.nfc, test.nfd, test.nfkc, test.nfkd] {
-                assert_eq!(stream_safe(s), s);
-            }
-        }
-    }
-
    #[test]
    fn test_simple() {
        let technically_okay = "Da\u{0300}\u{0301}\u{0302}\u{0303}\u{0304}\u{0305}\u{0306}\u{0307}\u{0308}\u{0309}\u{030a}\u{030b}\u{030c}\u{030d}\u{030e}\u{030f}\u{0310}\u{0311}\u{0312}\u{0313}\u{0314}\u{0315}\u{0316}\u{0317}\u{0318}\u{0319}\u{031a}\u{031b}\u{031c}\u{031d}ngerzone";
        assert_eq!(stream_safe(technically_okay), technically_okay);

        let too_much = "Da\u{0300}\u{0301}\u{0302}\u{0303}\u{0304}\u{0305}\u{0306}\u{0307}\u{0308}\u{0309}\u{030a}\u{030b}\u{030c}\u{030d}\u{030e}\u{030f}\u{0310}\u{0311}\u{0312}\u{0313}\u{0314}\u{0315}\u{0316}\u{0317}\u{0318}\u{0319}\u{031a}\u{031b}\u{031c}\u{031d}\u{032e}ngerzone";
-        assert_ne!(stream_safe(too_much), too_much);
+        let fixed_it = "Da\u{0300}\u{0301}\u{0302}\u{0303}\u{0304}\u{0305}\u{0306}\u{0307}\u{0308}\u{0309}\u{030a}\u{030b}\u{030c}\u{030d}\u{030e}\u{030f}\u{0310}\u{0311}\u{0312}\u{0313}\u{0314}\u{0315}\u{0316}\u{0317}\u{0318}\u{0319}\u{031a}\u{031b}\u{031c}\u{031d}\u{034f}\u{032e}ngerzone";
+        assert_eq!(stream_safe(too_much), fixed_it);
+
+        let woah_nelly = "Da\u{0300}\u{0301}\u{0302}\u{0303}\u{0304}\u{0305}\u{0306}\u{0307}\u{0308}\u{0309}\u{030a}\u{030b}\u{030c}\u{030d}\u{030e}\u{030f}\u{0310}\u{0311}\u{0312}\u{0313}\u{0314}\u{0315}\u{0316}\u{0317}\u{0318}\u{0319}\u{031a}\u{031b}\u{031c}\u{031d}\u{032e}\u{0300}\u{0301}\u{0302}\u{0303}\u{0304}\u{0305}\u{0306}\u{0307}\u{0308}\u{0309}\u{030a}\u{030b}\u{030c}\u{030d}\u{030e}\u{030f}\u{0310}\u{0311}\u{0312}\u{0313}\u{0314}\u{0315}\u{0316}\u{0317}\u{0318}\u{0319}\u{031a}\u{031b}\u{031c}\u{031d}\u{032e}ngerzone";
+        let its_cool = "Da\u{0300}\u{0301}\u{0302}\u{0303}\u{0304}\u{0305}\u{0306}\u{0307}\u{0308}\u{0309}\u{030a}\u{030b}\u{030c}\u{030d}\u{030e}\u{030f}\u{0310}\u{0311}\u{0312}\u{0313}\u{0314}\u{0315}\u{0316}\u{0317}\u{0318}\u{0319}\u{031a}\u{031b}\u{031c}\u{031d}\u{034f}\u{032e}\u{0300}\u{0301}\u{0302}\u{0303}\u{0304}\u{0305}\u{0306}\u{0307}\u{0308}\u{0309}\u{030a}\u{030b}\u{030c}\u{030d}\u{030e}\u{030f}\u{0310}\u{0311}\u{0312}\u{0313}\u{0314}\u{0315}\u{0316}\u{0317}\u{0318}\u{0319}\u{031a}\u{031b}\u{031c}\u{034f}\u{031d}\u{032e}ngerzone";
+        assert_eq!(stream_safe(woah_nelly), its_cool);
+    }
+
+    #[test]
+    fn test_all_nonstarters() {
+        let s = "\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}";
+        let expected = "\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{034F}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}";
+        assert_eq!(stream_safe(s), expected);
    }

    #[test]
@ -142,19 +148,19 @@ mod tests {
                None => continue,
            };
            let c = classify_nonstarters(ch);
-            let mut s = vec![];
+            let mut s = Vec::new();
            decompose_compatible(ch, |c| s.push(c));

            assert_eq!(s.len(), c.decomposition_len);

            let num_leading = s
                .iter()
-                .take_while(|&c| tables::canonical_combining_class(*c) != 0)
+                .take_while(|&c| canonical_combining_class(*c) != 0)
                .count();
            let num_trailing = s
                .iter()
                .rev()
-                .take_while(|&c| tables::canonical_combining_class(*c) != 0)
+                .take_while(|&c| canonical_combining_class(*c) != 0)
                .count();

            assert_eq!(num_leading, c.leading_nonstarters);
--- a/third_party/rust/unicode-normalization/src/tables.rs
+++ b/third_party/rust/unicode-normalization/src/tables.rs
--- a/third_party/rust/unicode-normalization/src/test.rs
+++ b/third_party/rust/unicode-normalization/src/test.rs
@ -8,11 +8,12 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.

-
-use std::char;
-use super::UnicodeNormalization;
 use super::char::is_combining_mark;
+use super::UnicodeNormalization;
+use core::char;

+#[cfg(not(feature = "std"))]
+use crate::no_std_prelude::*;

 #[test]
 fn test_nfd() {
@ -21,8 +22,11 @@ fn test_nfd() {
            assert_eq!($input.nfd().to_string(), $expected);
            // A dummy iterator that is not std::str::Chars directly;
            // note that `id_func` is used to ensure `Clone` implementation
-            assert_eq!($input.chars().map(|c| c).nfd().collect::<String>(), $expected);
-        }
+            assert_eq!(
+                $input.chars().map(|c| c).nfd().collect::<String>(),
+                $expected
+            );
+        };
    }
    t!("abc", "abc");
    t!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}");
@ -41,7 +45,7 @@ fn test_nfkd() {
    macro_rules! t {
        ($input: expr, $expected: expr) => {
            assert_eq!($input.nfkd().to_string(), $expected);
-        }
+        };
    }
    t!("abc", "abc");
    t!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}");
@ -60,7 +64,7 @@ fn test_nfc() {
    macro_rules! t {
        ($input: expr, $expected: expr) => {
            assert_eq!($input.nfc().to_string(), $expected);
-        }
+        };
    }
    t!("abc", "abc");
    t!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}");
@ -72,7 +76,10 @@ fn test_nfc() {
    t!("\u{301}a", "\u{301}a");
    t!("\u{d4db}", "\u{d4db}");
    t!("\u{ac1c}", "\u{ac1c}");
-    t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
+    t!(
+        "a\u{300}\u{305}\u{315}\u{5ae}b",
+        "\u{e0}\u{5ae}\u{305}\u{315}b"
+    );
 }

 #[test]
@ -80,7 +87,7 @@ fn test_nfkc() {
    macro_rules! t {
        ($input: expr, $expected: expr) => {
            assert_eq!($input.nfkc().to_string(), $expected);
-        }
+        };
    }
    t!("abc", "abc");
    t!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}");
@ -92,85 +99,10 @@ fn test_nfkc() {
    t!("\u{301}a", "\u{301}a");
    t!("\u{d4db}", "\u{d4db}");
    t!("\u{ac1c}", "\u{ac1c}");
-    t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
-}
-
-#[test]
-fn test_official() {
-    use normalization_tests::NORMALIZATION_TESTS;
-    macro_rules! normString {
-        ($method: ident, $input: expr) => { $input.$method().collect::<String>() }
-    }
-
-    for test in NORMALIZATION_TESTS {
-        // these invariants come from the CONFORMANCE section of
-        // http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt
-        {
-            let r1 = normString!(nfc, test.source);
-            let r2 = normString!(nfc, test.nfc);
-            let r3 = normString!(nfc, test.nfd);
-            let r4 = normString!(nfc, test.nfkc);
-            let r5 = normString!(nfc, test.nfkd);
-            assert_eq!(test.nfc, &r1[..]);
-            assert_eq!(test.nfc, &r2[..]);
-            assert_eq!(test.nfc, &r3[..]);
-            assert_eq!(test.nfkc, &r4[..]);
-            assert_eq!(test.nfkc, &r5[..]);
-        }
-
-        {
-            let r1 = normString!(nfd, test.source);
-            let r2 = normString!(nfd, test.nfc);
-            let r3 = normString!(nfd, test.nfd);
-            let r4 = normString!(nfd, test.nfkc);
-            let r5 = normString!(nfd, test.nfkd);
-            assert_eq!(test.nfd, &r1[..]);
-            assert_eq!(test.nfd, &r2[..]);
-            assert_eq!(test.nfd, &r3[..]);
-            assert_eq!(test.nfkd, &r4[..]);
-            assert_eq!(test.nfkd, &r5[..]);
-        }
-
-        {
-            let r1 = normString!(nfkc, test.source);
-            let r2 = normString!(nfkc, test.nfc);
-            let r3 = normString!(nfkc, test.nfd);
-            let r4 = normString!(nfkc, test.nfkc);
-            let r5 = normString!(nfkc, test.nfkd);
-            assert_eq!(test.nfkc, &r1[..]);
-            assert_eq!(test.nfkc, &r2[..]);
-            assert_eq!(test.nfkc, &r3[..]);
-            assert_eq!(test.nfkc, &r4[..]);
-            assert_eq!(test.nfkc, &r5[..]);
-        }
-
-        {
-            let r1 = normString!(nfkd, test.source);
-            let r2 = normString!(nfkd, test.nfc);
-            let r3 = normString!(nfkd, test.nfd);
-            let r4 = normString!(nfkd, test.nfkc);
-            let r5 = normString!(nfkd, test.nfkd);
-            assert_eq!(test.nfkd, &r1[..]);
-            assert_eq!(test.nfkd, &r2[..]);
-            assert_eq!(test.nfkd, &r3[..]);
-            assert_eq!(test.nfkd, &r4[..]);
-            assert_eq!(test.nfkd, &r5[..]);
-        }
-    }
-}
-
-#[test]
-fn test_quick_check() {
-    use normalization_tests::NORMALIZATION_TESTS;
-    use quick_check;
-    for test in NORMALIZATION_TESTS {
-        assert!(quick_check::is_nfc(test.nfc));
-        assert!(quick_check::is_nfd(test.nfd));
-        if test.nfc != test.nfd {
-            assert!(!quick_check::is_nfc(test.nfd));
-            assert!(!quick_check::is_nfd(test.nfc));
-        }
-    }
+    t!(
+        "a\u{300}\u{305}\u{315}\u{5ae}b",
+        "\u{e0}\u{5ae}\u{305}\u{315}b"
+    );
 }

 #[test]