From 733ced6d8d6486ab540481bd9e585e9f2353770a Mon Sep 17 00:00:00 2001
From: Henri Sivonen <hsivonen@hsivonen.fi>
Date: Thu, 18 Jan 2018 12:26:21 +0200
Subject: [PATCH] Bug 1431356 - Update encoding_rs to 0.7.2 and simd to 0.2.1.
 r=emk.

MozReview-Commit-ID: Lp3zyF2rLxN

--HG--
extra : rebase_source : 81b515206ca5d28623cbaead16244ef258da2088
---
 .../rust/encoding_rs/.cargo-checksum.json     |    2 +-
 third_party/rust/encoding_rs/CONTRIBUTING.md  |    4 +-
 third_party/rust/encoding_rs/Cargo.toml       |   14 +-
 third_party/rust/encoding_rs/README.md        |   37 +-
 third_party/rust/encoding_rs/src/ascii.rs     |  382 ++-
 third_party/rust/encoding_rs/src/gb18030.rs   |   14 +-
 third_party/rust/encoding_rs/src/lib.rs       |   58 +-
 third_party/rust/encoding_rs/src/mem.rs       | 2873 +++++++++++++++++
 .../rust/encoding_rs/src/simd_funcs.rs        |  159 +-
 third_party/rust/encoding_rs/src/testing.rs   |  129 +-
 third_party/rust/encoding_rs/src/utf_8.rs     |   32 +-
 third_party/rust/simd/.cargo-checksum.json    |    2 +-
 third_party/rust/simd/Cargo.toml              |   43 +-
 third_party/rust/simd/examples/mandelbrot.rs  |    2 +-
 .../rust/simd/examples/matrix-inverse.rs      |    1 +
 .../rust/simd/examples/nbody-nosimd.rs        |    2 +-
 third_party/rust/simd/examples/ops.rs         |    1 +
 third_party/rust/simd/src/aarch64/neon.rs     |    2 +-
 third_party/rust/simd/src/arm/neon.rs         |    2 +-
 third_party/rust/simd/src/common.rs           |    3 +-
 third_party/rust/simd/src/lib.rs              |   17 +-
 third_party/rust/simd/src/sixty_four.rs       |    3 +-
 third_party/rust/simd/src/v256.rs             |   16 +-
 third_party/rust/simd/src/x86/avx.rs          |    2 +-
 third_party/rust/simd/src/x86/avx2.rs         |    2 +-
 third_party/rust/simd/src/x86/sse2.rs         |    2 +-
 toolkit/library/gtest/rust/Cargo.lock         |   14 +-
 toolkit/library/rust/Cargo.lock               |   14 +-
 28 files changed, 3601 insertions(+), 231 deletions(-)
 create mode 100644 third_party/rust/encoding_rs/src/mem.rs

diff --git a/third_party/rust/encoding_rs/.cargo-checksum.json b/third_party/rust/encoding_rs/.cargo-checksum.json
index 29396d9f3a9e..fccc538b34d0 100644
--- a/third_party/rust/encoding_rs/.cargo-checksum.json
+++ b/third_party/rust/encoding_rs/.cargo-checksum.json
@@ -1 +1 @@
-{"files":{".travis.yml":"dc509cc3b8f44fbdf1d806f533c3f005afaf0fd77cd266b38cb69bab3e4ea136","CONTRIBUTING.md":"e4ffa92c979c7e6ca7b676842a708ea05b84181327fcde43dfcd8038b678a057","COPYRIGHT":"20d4fff11cca11529df3f02096fbe8ffe350219cdb07cdedea34e6a762866da5","Cargo.toml":"2bed851f8857df3daf0cef25b3588a0841241624ab326e81cce188a598395352","Ideas.md":"7fbeddb0f8ba7b233673ee705997adc2fddb1636a17fe662532b35ef2810a51d","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"74aa8b6d04c36bb640ee81187a3f24a2fa94e36d4c1d4f2ca164c3784ae87a83","README.md":"cf09a31640f5d556661e2fbe1d07f76046eff94daf6ebb895d14499653b59bde","generate-encoding-data.py":"8a0a5162098d355e4df63532819769fd6626a66a0aa93f2762e315d6147aa0a5","rustfmt.toml":"c01c06dfbdfcf30730535aab911d69068febb921e2faef9571ceeb6a5c2a3eab","src/ascii.rs":"1e9f9a02130933fdba6b7606b47c2308afd6d16df064779245226060211af0ce","src/big5.rs":"780ae537353f899a5772a9e7d062441041276e1eb1506a013e4280c5cda6bb93","src/data.rs":"412c842c698c3ce1cec4a27ab19ca275372ac28940ac49cdf3e0dad71a2c2812","src/euc_jp.rs":"feda0ade5e1c3e4abd7637c59373b977662007990fd164ea7db1acc502ba3534","src/euc_kr.rs":"2699c52055882e34ba4e12d072b8161c635840f3620075ca3f10986aec0e8d3b","src/gb18030.rs":"aa9de27a41715dfb02a3b9161d86e3775f635f625f70d3abaadcd583ee7022c0","src/handles.rs":"c07a3738e43e8aae11108a30d34067c31ddc5d22074b85ef393f00abcc1f4e01","src/iso_2022_jp.rs":"1f780c3ff72f1a867d6c5782135cd01427eca6d74f0dd6cb23c1406b5163af1a","src/lib.rs":"250cabe96d561b38eef9e26141707904b66b612007098287dd2b245240c5a5be","src/macros.rs":"9ab30e7194f61f268cd7d899cabb06ff9ca7717663926fd583b20334f49ac8d3","src/replacement.rs":"782f03f04d110e9a0656262bf4296aa0ab8199e196cb63239c30d9649996caa4","src/shift_jis.rs":"84df4ff58b60e0827d6c0c7049f2cf19033f2b9e25a9186bcfb0bbb05e87b380","src/simd_funcs.rs":"6c5beb75d30c1b3a2e6e9dd86209f9748313ee75f5b43a9d7f5176be310ffabb","src/single_byte.rs":"b3fadb4fa1e66b00efc12b8850b3076580a8cd73c9ed810a19421fd3ade9bbf1","src/test_data/big5_in.txt":"4c5a8691f8dc717311889c63894026d2fb62725a86c4208ca274a9cc8d42a503","src/test_data/big5_in_ref.txt":"99d399e17750cf9c7cf30bb253dbfe35b81c4fcbdead93cfa48b1429213473c7","src/test_data/big5_out.txt":"6193ca97c297aa20e09396038d18e938bb7ea331c26f0f2454097296723a0b13","src/test_data/big5_out_ref.txt":"36567691f557df144f6cc520015a87038dfa156f296fcf103b56ae9a718be1fc","src/test_data/euc_kr_in.txt":"c86a7224f3215fa0d04e685622a752fdc72763e8ae076230c7fd62de57ec4074","src/test_data/euc_kr_in_ref.txt":"1f419f4ca47d708b54c73c461545a022ae2e20498fdbf8005a483d752a204883","src/test_data/euc_kr_out.txt":"e7f32e026f70be1e1b58e0047baf7d3d2c520269c4f9b9992e158b4decb0a1a3","src/test_data/euc_kr_out_ref.txt":"c9907857980b20b8e9e3b584482ed6567a2be6185d72237b6322f0404944924e","src/test_data/gb18030_in.txt":"ab7231b2d3e9afacdbd7d7f3b9e5361a7ff9f7e1cfdb4f3bd905b9362b309e53","src/test_data/gb18030_in_ref.txt":"dc5069421adca2043c55f5012b55a76fdff651d22e6e699fd0978f8d5706815c","src/test_data/gb18030_out.txt":"f0208d527f5ca63de7d9a0323be8d5cf12d8a104b2943d92c2701f0c3364dac1","src/test_data/gb18030_out_ref.txt":"6819fe47627e4ea01027003fc514b9f21a1322e732d7f1fb92cc6c5455bc6c07","src/test_data/iso_2022_jp_in.txt":"cd24bbdcb1834e25db54646fbf4c41560a13dc7540f6be3dba4f5d97d44513af","src/test_data/iso_2022_jp_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/iso_2022_jp_out.txt":"9b6f015329dda6c3f9ee5ce6dbd6fa9c89acc21283e886836c78b8d833480c21","src/test_data/iso_2022_jp_out_ref.txt":"78cb260093a20116ad9a42f43b05d1848c5ab100b6b9a850749809e943884b35","src/test_data/jis0208_in.txt":"6df3030553ffb0a6615bb33dc8ea9dca6d9623a9028e2ffec754ce3c3da824cc","src/test_data/jis0208_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/jis0208_out.txt":"4ec24477e1675ce750733bdc3c5add1cd27b6bd4ce1f09289564646e9654e857","src/test_data/jis0208_out_ref.txt":"c3e1cef5032b2b1d93a406f31ff940c4e2dfe8859b8b17ca2761fee7a75a0e48","src/test_data/jis0212_in.txt":"c011f0dd72bd7c8cd922df9374ef8d2769a77190514c77f6c62b415852eeb9fe","src/test_data/jis0212_in_ref.txt":"7d9458b3d2f73e7092a7f505c08ce1d233dde18aa679fbcf9889256239cc9e06","src/test_data/shift_jis_in.txt":"02e389ccef0dd2122e63f503899402cb7f797912c2444cc80ab93131116c5524","src/test_data/shift_jis_in_ref.txt":"512f985950ca902e643c88682dba9708b7c38d3c5ec2925168ab00ac94ab19f9","src/test_data/shift_jis_out.txt":"5fbc44da7bf639bf6cfe0fa1fd3eba7102b88f81919c9ea991302712f69426fb","src/test_data/shift_jis_out_ref.txt":"466322c6fed8286c64582731755290c2296508efdd258826e6279686649b481f","src/test_labels_names.rs":"c962c7aeac3d9ef2aca70c9e21983b231d4cf998cb06879374b0401e5149d1da","src/testing.rs":"60f85c6fb63fd4ab62e90dfa005920e79e0e1885795dc13a7a3c1980507925b1","src/utf_16.rs":"1d2c40857c946f6eecf724efc60a196865b4d84a59b08b42fbe4576fa8308fd0","src/utf_8.rs":"34218c7f4faa81883492fdfeb303b7e77710121b06e8342ac62ccb3d6eb16a37","src/utf_8_core.rs":"bbc010dbdfed0f5e7c48a1ab0772eaf2e27711b789bb82f71a678f2240158a65","src/variant.rs":"93dfec2dcfc9fd9711bb55d48177f4a0e9479c7fbd055f08db3853338569da83","src/x_user_defined.rs":"84d054eec249dd676452585f8eb13dc851095021ed6e1f8c79e952c6d81751df"},"package":"f5215aabf22b83153be3ee44dfe3f940214541b2ce13d419c55e7a115c8c51a9"}
\ No newline at end of file
+{"files":{".travis.yml":"dc509cc3b8f44fbdf1d806f533c3f005afaf0fd77cd266b38cb69bab3e4ea136","CONTRIBUTING.md":"6dac812ad206dbeb43b32ae01062fb79684fb01f9ee778c1c166852adc77d4c9","COPYRIGHT":"20d4fff11cca11529df3f02096fbe8ffe350219cdb07cdedea34e6a762866da5","Cargo.toml":"114f3399a97af04c9e2f8514448ccac81aac9ce7b333ec1594e733aad0c92e87","Ideas.md":"7fbeddb0f8ba7b233673ee705997adc2fddb1636a17fe662532b35ef2810a51d","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"74aa8b6d04c36bb640ee81187a3f24a2fa94e36d4c1d4f2ca164c3784ae87a83","README.md":"f60b9e7ff6d62f6cd580cbd386a039fda2d7407821da984fbe3cdb9c4a64f5d3","generate-encoding-data.py":"8a0a5162098d355e4df63532819769fd6626a66a0aa93f2762e315d6147aa0a5","rustfmt.toml":"c01c06dfbdfcf30730535aab911d69068febb921e2faef9571ceeb6a5c2a3eab","src/ascii.rs":"0fd4833571df22b0bb98e230c07b4ff733284c5b58b7b21a50f4f68c683ee706","src/big5.rs":"780ae537353f899a5772a9e7d062441041276e1eb1506a013e4280c5cda6bb93","src/data.rs":"412c842c698c3ce1cec4a27ab19ca275372ac28940ac49cdf3e0dad71a2c2812","src/euc_jp.rs":"feda0ade5e1c3e4abd7637c59373b977662007990fd164ea7db1acc502ba3534","src/euc_kr.rs":"2699c52055882e34ba4e12d072b8161c635840f3620075ca3f10986aec0e8d3b","src/gb18030.rs":"6a4d5ff9a89cdf1d89de78cd309f01385435dd9a4ffee182e13df2675cf57600","src/handles.rs":"c07a3738e43e8aae11108a30d34067c31ddc5d22074b85ef393f00abcc1f4e01","src/iso_2022_jp.rs":"1f780c3ff72f1a867d6c5782135cd01427eca6d74f0dd6cb23c1406b5163af1a","src/lib.rs":"b53cfe7009dcba83724ac2affa1f3fdd675451a33742ceb9f030eb83e702305f","src/macros.rs":"9ab30e7194f61f268cd7d899cabb06ff9ca7717663926fd583b20334f49ac8d3","src/mem.rs":"326003897f0efefa257210f4e698a2a039e7e9d2fe16e0fc9341b51a68ce1dff","src/replacement.rs":"782f03f04d110e9a0656262bf4296aa0ab8199e196cb63239c30d9649996caa4","src/shift_jis.rs":"84df4ff58b60e0827d6c0c7049f2cf19033f2b9e25a9186bcfb0bbb05e87b380","src/simd_funcs.rs":"76c4abc881f2dd91f8e936b059152fa4ee5056af0af59356fbf105436ddd673f","src/single_byte.rs":"b3fadb4fa1e66b00efc12b8850b3076580a8cd73c9ed810a19421fd3ade9bbf1","src/test_data/big5_in.txt":"4c5a8691f8dc717311889c63894026d2fb62725a86c4208ca274a9cc8d42a503","src/test_data/big5_in_ref.txt":"99d399e17750cf9c7cf30bb253dbfe35b81c4fcbdead93cfa48b1429213473c7","src/test_data/big5_out.txt":"6193ca97c297aa20e09396038d18e938bb7ea331c26f0f2454097296723a0b13","src/test_data/big5_out_ref.txt":"36567691f557df144f6cc520015a87038dfa156f296fcf103b56ae9a718be1fc","src/test_data/euc_kr_in.txt":"c86a7224f3215fa0d04e685622a752fdc72763e8ae076230c7fd62de57ec4074","src/test_data/euc_kr_in_ref.txt":"1f419f4ca47d708b54c73c461545a022ae2e20498fdbf8005a483d752a204883","src/test_data/euc_kr_out.txt":"e7f32e026f70be1e1b58e0047baf7d3d2c520269c4f9b9992e158b4decb0a1a3","src/test_data/euc_kr_out_ref.txt":"c9907857980b20b8e9e3b584482ed6567a2be6185d72237b6322f0404944924e","src/test_data/gb18030_in.txt":"ab7231b2d3e9afacdbd7d7f3b9e5361a7ff9f7e1cfdb4f3bd905b9362b309e53","src/test_data/gb18030_in_ref.txt":"dc5069421adca2043c55f5012b55a76fdff651d22e6e699fd0978f8d5706815c","src/test_data/gb18030_out.txt":"f0208d527f5ca63de7d9a0323be8d5cf12d8a104b2943d92c2701f0c3364dac1","src/test_data/gb18030_out_ref.txt":"6819fe47627e4ea01027003fc514b9f21a1322e732d7f1fb92cc6c5455bc6c07","src/test_data/iso_2022_jp_in.txt":"cd24bbdcb1834e25db54646fbf4c41560a13dc7540f6be3dba4f5d97d44513af","src/test_data/iso_2022_jp_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/iso_2022_jp_out.txt":"9b6f015329dda6c3f9ee5ce6dbd6fa9c89acc21283e886836c78b8d833480c21","src/test_data/iso_2022_jp_out_ref.txt":"78cb260093a20116ad9a42f43b05d1848c5ab100b6b9a850749809e943884b35","src/test_data/jis0208_in.txt":"6df3030553ffb0a6615bb33dc8ea9dca6d9623a9028e2ffec754ce3c3da824cc","src/test_data/jis0208_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/jis0208_out.txt":"4ec24477e1675ce750733bdc3c5add1cd27b6bd4ce1f09289564646e9654e857","src/test_data/jis0208_out_ref.txt":"c3e1cef5032b2b1d93a406f31ff940c4e2dfe8859b8b17ca2761fee7a75a0e48","src/test_data/jis0212_in.txt":"c011f0dd72bd7c8cd922df9374ef8d2769a77190514c77f6c62b415852eeb9fe","src/test_data/jis0212_in_ref.txt":"7d9458b3d2f73e7092a7f505c08ce1d233dde18aa679fbcf9889256239cc9e06","src/test_data/shift_jis_in.txt":"02e389ccef0dd2122e63f503899402cb7f797912c2444cc80ab93131116c5524","src/test_data/shift_jis_in_ref.txt":"512f985950ca902e643c88682dba9708b7c38d3c5ec2925168ab00ac94ab19f9","src/test_data/shift_jis_out.txt":"5fbc44da7bf639bf6cfe0fa1fd3eba7102b88f81919c9ea991302712f69426fb","src/test_data/shift_jis_out_ref.txt":"466322c6fed8286c64582731755290c2296508efdd258826e6279686649b481f","src/test_labels_names.rs":"c962c7aeac3d9ef2aca70c9e21983b231d4cf998cb06879374b0401e5149d1da","src/testing.rs":"16da398fdab694283d24556932ff7fce893e22cf66a180795a830541f4ddd94b","src/utf_16.rs":"1d2c40857c946f6eecf724efc60a196865b4d84a59b08b42fbe4576fa8308fd0","src/utf_8.rs":"dc7df98c65a23607071b699243aec75a461510ee0617abba289df1ebe781c08b","src/utf_8_core.rs":"bbc010dbdfed0f5e7c48a1ab0772eaf2e27711b789bb82f71a678f2240158a65","src/variant.rs":"93dfec2dcfc9fd9711bb55d48177f4a0e9479c7fbd055f08db3853338569da83","src/x_user_defined.rs":"84d054eec249dd676452585f8eb13dc851095021ed6e1f8c79e952c6d81751df"},"package":"98fd0f24d1fb71a4a6b9330c8ca04cbd4e7cc5d846b54ca74ff376bc7c9f798d"}
\ No newline at end of file
diff --git a/third_party/rust/encoding_rs/CONTRIBUTING.md b/third_party/rust/encoding_rs/CONTRIBUTING.md
index 62f808646ab5..f8232f7703b8 100644
--- a/third_party/rust/encoding_rs/CONTRIBUTING.md
+++ b/third_party/rust/encoding_rs/CONTRIBUTING.md
@@ -37,8 +37,8 @@ rustc.
 
 ## rustfmt
 
-Please install [`rustfmt`](https://github.com/rust-lang-nursery/rustfmt) and
-run `cargo fmt` before creating a pull request.
+The `rustfmt` version used for this code is 0.8.4. Please either use that
+version or avoid using `rustfmt` (so as not to reformat all the code).
 
 ## Unit tests
 
diff --git a/third_party/rust/encoding_rs/Cargo.toml b/third_party/rust/encoding_rs/Cargo.toml
index 4790409c1b86..c73dc02ee759 100644
--- a/third_party/rust/encoding_rs/Cargo.toml
+++ b/third_party/rust/encoding_rs/Cargo.toml
@@ -12,7 +12,7 @@
 
 [package]
 name = "encoding_rs"
-version = "0.7.1"
+version = "0.7.2"
 authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
 description = "A Gecko-oriented implementation of the Encoding Standard"
 homepage = "https://docs.rs/encoding_rs/"
@@ -24,6 +24,9 @@ license = "MIT/Apache-2.0"
 repository = "https://github.com/hsivonen/encoding_rs"
 [profile.release]
 lto = true
+[dependencies.cfg-if]
+version = "0.1.0"
+
 [dependencies.serde]
 version = "1.0"
 optional = true
@@ -31,20 +34,17 @@ optional = true
 [dependencies.simd]
 version = "0.2.0"
 optional = true
+[dev-dependencies.bincode]
+version = "0.8"
 
-[dependencies.cfg-if]
-version = "0.1.0"
 [dev-dependencies.serde_derive]
 version = "1.0"
 
 [dev-dependencies.serde_json]
 version = "1.0"
 
-[dev-dependencies.bincode]
-version = "0.8"
-
 [features]
-simd-accel = ["simd"]
 no-static-ideograph-encoder-tables = []
+simd-accel = ["simd"]
 [badges.travis-ci]
 repository = "hsivonen/encoding_rs"
diff --git a/third_party/rust/encoding_rs/README.md b/third_party/rust/encoding_rs/README.md
index a3398bb7a7f0..3e5d4e70fcd0 100644
--- a/third_party/rust/encoding_rs/README.md
+++ b/third_party/rust/encoding_rs/README.md
@@ -9,6 +9,11 @@ encoding_rs an implementation of the (non-JavaScript parts of) the
 [Encoding Standard](https://encoding.spec.whatwg.org/) written in Rust and
 used in Gecko (starting with Firefox 56).
 
+Additionally, the `mem` module provides various operations for dealing with
+in-RAM text (as opposed to data that's coming from or going to an IO boundary).
+The `mem` module is a module instead of a separate crate due to internal
+implementation detail efficiencies.
+
 ## Functionality
 
 Due to the Gecko use case, encoding_rs supports decoding to and encoding from
@@ -43,6 +48,26 @@ Specifically, encoding_rs does the following:
   workloads than the standard library; hopefully will get upstreamed some
   day) and ASCII.
 
+Additionally, `encoding_rs::mem` does the following:
+
+* Checks if a byte buffer contains only ASCII.
+* Checks if a potentially-invalid UTF-16 buffer contains only Basic Latin (ASCII).
+* Checks if a valid UTF-8, potentially-invalid UTF-8 or potentially-invalid UTF-16
+  buffer contains only Latin1 code points (below U+0100).
+* Checks if a valid UTF-8, potentially-invalid UTF-8 or potentially-invalid UTF-16
+  buffer or a code point or a UTF-16 code unit can trigger right-to-left behavior
+  (suitable for checking if the Unicode Bidirectional Algorithm can be optimized
+  out).
+* Combined versions of the above two checks.
+* Converts valid UTF-8, potentially-invalid UTF-8 and Latin1 to UTF-16.
+* Converts potentially-invalid UTF-16 and Latin1 to UTF-8.
+* Converts UTF-8 and UTF-16 to Latin1 (if in range).
+* Finds the first invalid code unit in a buffer of potentially-invalid UTF-16.
+* Makes a mutable buffer of potential-invalid UTF-16 contain valid UTF-16.
+* Copies ASCII from one buffer to another up to the first non-ASCII byte.
+* Converts ASCII to UTF-16 up to the first non-ASCII byte.
+* Converts UTF-16 to ASCII up to the first non-Basic Latin code unit.
+
 ## Licensing
 
 Please see the file named
@@ -63,6 +88,8 @@ using the C++ standard library and [GSL](https://github.com/Microsoft/GSL/) type
 For the Gecko context, there's a
 [C++ wrapper using the MFBT/XPCOM types](https://searchfox.org/mozilla-central/source/intl/Encoding.h#100).
 
+These bindings do not cover the `mem` module.
+
 ## Sample programs
 
 * [Rust](https://github.com/hsivonen/recode_rs)
@@ -133,9 +160,9 @@ decode-optimized tables. With realistic work loads, this seemed fast enough
 not to be user-visibly slow on Raspberry Pi 3 (which stood in for a phone
 for testing) in the Web-exposed encoder use cases.
 
-A framework for measuring performance is [available separately][1].
+A framework for measuring performance is [available separately][2].
 
-[1]: https://github.com/hsivonen/encoding_bench/
+[2]: https://github.com/hsivonen/encoding_bench/
 
 ## Rust Version Compatibility
 
@@ -193,6 +220,12 @@ used in Firefox.
 
 ## Release Notes
 
+### 0.7.2
+
+* Add the `mem` module.
+* Refactor SIMD code which can affect performance outside the `mem`
+  module.
+
 ### 0.7.1
 
 * When encoding from invalid UTF-16, correctly handle U+DC00 followed by
diff --git a/third_party/rust/encoding_rs/src/ascii.rs b/third_party/rust/encoding_rs/src/ascii.rs
index 2c89716f96a0..cbbdc70f927f 100644
--- a/third_party/rust/encoding_rs/src/ascii.rs
+++ b/third_party/rust/encoding_rs/src/ascii.rs
@@ -24,6 +24,14 @@
 #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"))))]
 use simd_funcs::*;
 
+// `as` truncates, so works on 32-bit, too.
+#[allow(dead_code)]
+pub const ASCII_MASK: usize = 0x80808080_80808080u64 as usize;
+
+// `as` truncates, so works on 32-bit, too.
+#[allow(dead_code)]
+pub const BASIC_LATIN_MASK: usize = 0xFF80FF80_FF80FF80u64 as usize;
+
 #[allow(unused_macros)]
 macro_rules! ascii_naive {
     ($name:ident,
@@ -212,6 +220,62 @@ macro_rules! basic_latin_alu {
     });
 }
 
+#[allow(unused_macros)]
+macro_rules! latin1_alu {
+    ($name:ident,
+     $src_unit:ty,
+     $dst_unit:ty,
+     $stride_fn:ident) => (
+    #[cfg_attr(feature = "cargo-clippy", allow(never_loop))]
+    #[inline(always)]
+    pub unsafe fn $name(src: *const $src_unit, dst: *mut $dst_unit, len: usize) {
+        let mut offset = 0usize;
+        // This loop is only broken out of as a `goto` forward
+        loop {
+            let mut until_alignment = {
+                if ::std::mem::size_of::<$src_unit>() < ::std::mem::size_of::<$dst_unit>() {
+                    // unpack
+                    let src_until_alignment = (ALIGNMENT - ((src as usize) & ALIGNMENT_MASK)) & ALIGNMENT_MASK;
+                    if (dst.offset(src_until_alignment as isize) as usize) & ALIGNMENT_MASK != 0 {
+                        break;
+                    }
+                    src_until_alignment
+                } else {
+                    // pack
+                    let dst_until_alignment = (ALIGNMENT - ((dst as usize) & ALIGNMENT_MASK)) & ALIGNMENT_MASK;
+                    if (src.offset(dst_until_alignment as isize) as usize) & ALIGNMENT_MASK != 0 {
+                        break;
+                    }
+                    dst_until_alignment
+                }
+            };
+            if until_alignment + STRIDE_SIZE <= len {
+                while until_alignment != 0 {
+                    let code_unit = *(src.offset(offset as isize));
+                    *(dst.offset(offset as isize)) = code_unit as $dst_unit;
+                    offset += 1;
+                    until_alignment -= 1;
+                }
+                let len_minus_stride = len - STRIDE_SIZE;
+                loop {
+                    $stride_fn(src.offset(offset as isize) as *const usize,
+                               dst.offset(offset as isize) as *mut usize);
+                    offset += STRIDE_SIZE;
+                    if offset > len_minus_stride {
+                        break;
+                    }
+                }
+            }
+            break;
+        }
+        while offset < len {
+            let code_unit = *(src.offset(offset as isize));
+            *(dst.offset(offset as isize)) = code_unit as $dst_unit;
+            offset += 1;
+        }
+    });
+}
+
 #[allow(unused_macros)]
 macro_rules! ascii_simd_check_align {
     ($name:ident,
@@ -294,6 +358,89 @@ macro_rules! ascii_simd_check_align {
     });
 }
 
+#[allow(unused_macros)]
+macro_rules! latin1_simd_check_align {
+    ($name:ident,
+     $src_unit:ty,
+     $dst_unit:ty,
+     $stride_both_aligned:ident,
+     $stride_src_aligned:ident,
+     $stride_dst_aligned:ident,
+     $stride_neither_aligned:ident) => (
+    #[inline(always)]
+    pub unsafe fn $name(src: *const $src_unit, dst: *mut $dst_unit, len: usize) {
+        let mut offset = 0usize;
+        if STRIDE_SIZE <= len {
+            let len_minus_stride = len - STRIDE_SIZE;
+            // XXX Should we first process one stride unconditinoally as unaligned to
+            // avoid the cost of the branchiness below if the first stride fails anyway?
+            // XXX Should we just use unaligned SSE2 access unconditionally? It seems that
+            // on Haswell, it would make sense to just use unaligned and not bother
+            // checking. Need to benchmark older architectures before deciding.
+            let dst_masked = (dst as usize) & ALIGNMENT_MASK;
+            if ((src as usize) & ALIGNMENT_MASK) == 0 {
+                if dst_masked == 0 {
+                    loop {
+                        $stride_both_aligned(src.offset(offset as isize),
+                                             dst.offset(offset as isize));
+                        offset += STRIDE_SIZE;
+                        if offset > len_minus_stride {
+                            break;
+                        }
+                    }
+                } else {
+                    loop {
+                        $stride_src_aligned(src.offset(offset as isize),
+                                            dst.offset(offset as isize));
+                        offset += STRIDE_SIZE;
+                        if offset > len_minus_stride {
+                            break;
+                        }
+                    }
+                }
+            } else {
+                if dst_masked == 0 {
+                    loop {
+                        $stride_dst_aligned(src.offset(offset as isize),
+                                            dst.offset(offset as isize));
+                        offset += STRIDE_SIZE;
+                        if offset > len_minus_stride {
+                            break;
+                        }
+                    }
+                } else {
+                    loop {
+                        $stride_neither_aligned(src.offset(offset as isize),
+                                                dst.offset(offset as isize));
+                        offset += STRIDE_SIZE;
+                        if offset > len_minus_stride {
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+        while offset < len {
+            let code_unit = *(src.offset(offset as isize));
+            // On x86_64, this loop autovectorizes but in the pack
+            // case there are instructions whose purpose is to make sure
+            // each u16 in the vector is truncated before packing. However,
+            // since we don't care about saturating behavior of SSE2 packing
+            // when the input isn't Latin1, those instructions are useless.
+            // Unfortunately, using the `assume` intrinsic to lie to the
+            // optimizer doesn't make LLVM omit the trunctation that we
+            // don't need. Possibly this loop could be manually optimized
+            // to do the sort of thing that LLVM does but without the
+            // ANDing the read vectors of u16 with a constant that discards
+            // the high half of each u16. As far as I can tell, the
+            // optimization assumes that doing a SIMD read past the end of
+            // the array is OK.
+            *(dst.offset(offset as isize)) = code_unit as $dst_unit;
+            offset += 1;
+        }
+    });
+}
+
 #[allow(unused_macros)]
 macro_rules! ascii_simd_unalign {
     ($name:ident,
@@ -328,6 +475,34 @@ macro_rules! ascii_simd_unalign {
     });
 }
 
+#[allow(unused_macros)]
+macro_rules! latin1_simd_unalign {
+    ($name:ident,
+     $src_unit:ty,
+     $dst_unit:ty,
+     $stride_neither_aligned:ident) => (
+    #[inline(always)]
+    pub unsafe fn $name(src: *const $src_unit, dst: *mut $dst_unit, len: usize) {
+        let mut offset = 0usize;
+        if STRIDE_SIZE <= len {
+            let len_minus_stride = len - STRIDE_SIZE;
+            loop {
+                $stride_neither_aligned(src.offset(offset as isize),
+                                        dst.offset(offset as isize));
+                offset += STRIDE_SIZE;
+                if offset > len_minus_stride {
+                    break;
+                }
+            }
+        }
+        while offset < len {
+            let code_unit = *(src.offset(offset as isize));
+            *(dst.offset(offset as isize)) = code_unit as $dst_unit;
+            offset += 1;
+        }
+    });
+}
+
 #[allow(unused_macros)]
 macro_rules! ascii_to_ascii_simd_stride {
     ($name:ident,
@@ -336,7 +511,7 @@ macro_rules! ascii_to_ascii_simd_stride {
     #[inline(always)]
     pub unsafe fn $name(src: *const u8, dst: *mut u8) -> bool {
         let simd = $load(src);
-        if !is_ascii(simd) {
+        if !simd_is_ascii(simd) {
             return false;
         }
         $store(dst, simd);
@@ -352,7 +527,7 @@ macro_rules! ascii_to_basic_latin_simd_stride {
     #[inline(always)]
     pub unsafe fn $name(src: *const u8, dst: *mut u16) -> bool {
         let simd = $load(src);
-        if !is_ascii(simd) {
+        if !simd_is_ascii(simd) {
             return false;
         }
         let (first, second) = simd_unpack(simd);
@@ -362,6 +537,20 @@ macro_rules! ascii_to_basic_latin_simd_stride {
     });
 }
 
+#[allow(unused_macros)]
+macro_rules! unpack_simd_stride {
+    ($name:ident,
+     $load:ident,
+     $store:ident) => (
+    #[inline(always)]
+    pub unsafe fn $name(src: *const u8, dst: *mut u16) {
+        let simd = $load(src);
+        let (first, second) = simd_unpack(simd);
+        $store(dst, first);
+        $store(dst.offset(8), second);
+    });
+}
+
 #[allow(unused_macros)]
 macro_rules! basic_latin_to_ascii_simd_stride {
     ($name:ident,
@@ -371,7 +560,7 @@ macro_rules! basic_latin_to_ascii_simd_stride {
     pub unsafe fn $name(src: *const u16, dst: *mut u8) -> bool {
         let first = $load(src);
         let second = $load(src.offset(8));
-        if is_basic_latin(first | second) {
+        if simd_is_basic_latin(first | second) {
             $store(dst, simd_pack(first, second));
             true
         } else {
@@ -380,23 +569,40 @@ macro_rules! basic_latin_to_ascii_simd_stride {
     });
 }
 
+#[allow(unused_macros)]
+macro_rules! pack_simd_stride {
+    ($name:ident,
+     $load:ident,
+     $store:ident) => (
+    #[inline(always)]
+    pub unsafe fn $name(src: *const u16, dst: *mut u8) {
+        let first = $load(src);
+        let second = $load(src.offset(8));
+        $store(dst, simd_pack(first, second));
+    });
+}
+
 cfg_if! {
     if #[cfg(all(feature = "simd-accel", target_endian = "little", target_arch = "aarch64"))] {
         // SIMD with the same instructions for aligned and unaligned loads and stores
 
         pub const STRIDE_SIZE: usize = 16;
 
-        const ALIGNMENT: usize = 8;
+//        pub const ALIGNMENT: usize = 8;
 
         ascii_to_ascii_simd_stride!(ascii_to_ascii_stride_neither_aligned, load16_unaligned, store16_unaligned);
 
         ascii_to_basic_latin_simd_stride!(ascii_to_basic_latin_stride_neither_aligned, load16_unaligned, store8_unaligned);
+        unpack_simd_stride!(unpack_stride_neither_aligned, load16_unaligned, store8_unaligned);
 
         basic_latin_to_ascii_simd_stride!(basic_latin_to_ascii_stride_neither_aligned, load8_unaligned, store16_unaligned);
+        pack_simd_stride!(pack_stride_neither_aligned, load8_unaligned, store16_unaligned);
 
         ascii_simd_unalign!(ascii_to_ascii, u8, u8, ascii_to_ascii_stride_neither_aligned);
         ascii_simd_unalign!(ascii_to_basic_latin, u8, u16, ascii_to_basic_latin_stride_neither_aligned);
         ascii_simd_unalign!(basic_latin_to_ascii, u16, u8, basic_latin_to_ascii_stride_neither_aligned);
+        latin1_simd_unalign!(unpack_latin1, u8, u16, unpack_stride_neither_aligned);
+        latin1_simd_unalign!(pack_latin1, u16, u8, pack_stride_neither_aligned);
     } else if #[cfg(all(feature = "simd-accel", target_feature = "sse2"))] {
         // SIMD with different instructions for aligned and unaligned loads and stores.
         //
@@ -406,7 +612,7 @@ cfg_if! {
 
         pub const STRIDE_SIZE: usize = 16;
 
-        const ALIGNMENT_MASK: usize = 15;
+        pub const ALIGNMENT_MASK: usize = 15;
 
         ascii_to_ascii_simd_stride!(ascii_to_ascii_stride_both_aligned, load16_aligned, store16_aligned);
         ascii_to_ascii_simd_stride!(ascii_to_ascii_stride_src_aligned, load16_aligned, store16_unaligned);
@@ -418,31 +624,37 @@ cfg_if! {
         ascii_to_basic_latin_simd_stride!(ascii_to_basic_latin_stride_dst_aligned, load16_unaligned, store8_aligned);
         ascii_to_basic_latin_simd_stride!(ascii_to_basic_latin_stride_neither_aligned, load16_unaligned, store8_unaligned);
 
+        unpack_simd_stride!(unpack_stride_both_aligned, load16_aligned, store8_aligned);
+        unpack_simd_stride!(unpack_stride_src_aligned, load16_aligned, store8_unaligned);
+        unpack_simd_stride!(unpack_stride_dst_aligned, load16_unaligned, store8_aligned);
+        unpack_simd_stride!(unpack_stride_neither_aligned, load16_unaligned, store8_unaligned);
+
         basic_latin_to_ascii_simd_stride!(basic_latin_to_ascii_stride_both_aligned, load8_aligned, store16_aligned);
         basic_latin_to_ascii_simd_stride!(basic_latin_to_ascii_stride_src_aligned, load8_aligned, store16_unaligned);
         basic_latin_to_ascii_simd_stride!(basic_latin_to_ascii_stride_dst_aligned, load8_unaligned, store16_aligned);
         basic_latin_to_ascii_simd_stride!(basic_latin_to_ascii_stride_neither_aligned, load8_unaligned, store16_unaligned);
 
+        pack_simd_stride!(pack_stride_both_aligned, load8_aligned, store16_aligned);
+        pack_simd_stride!(pack_stride_src_aligned, load8_aligned, store16_unaligned);
+        pack_simd_stride!(pack_stride_dst_aligned, load8_unaligned, store16_aligned);
+        pack_simd_stride!(pack_stride_neither_aligned, load8_unaligned, store16_unaligned);
+
         ascii_simd_check_align!(ascii_to_ascii, u8, u8, ascii_to_ascii_stride_both_aligned, ascii_to_ascii_stride_src_aligned, ascii_to_ascii_stride_dst_aligned, ascii_to_ascii_stride_neither_aligned);
         ascii_simd_check_align!(ascii_to_basic_latin, u8, u16, ascii_to_basic_latin_stride_both_aligned, ascii_to_basic_latin_stride_src_aligned, ascii_to_basic_latin_stride_dst_aligned, ascii_to_basic_latin_stride_neither_aligned);
         ascii_simd_check_align!(basic_latin_to_ascii, u16, u8, basic_latin_to_ascii_stride_both_aligned, basic_latin_to_ascii_stride_src_aligned, basic_latin_to_ascii_stride_dst_aligned, basic_latin_to_ascii_stride_neither_aligned);
+        latin1_simd_check_align!(unpack_latin1, u8, u16, unpack_stride_both_aligned, unpack_stride_src_aligned, unpack_stride_dst_aligned, unpack_stride_neither_aligned);
+        latin1_simd_check_align!(pack_latin1, u16, u8, pack_stride_both_aligned, pack_stride_src_aligned, pack_stride_dst_aligned, pack_stride_neither_aligned);
     } else if #[cfg(all(target_endian = "little", target_pointer_width = "64"))] {
         // Aligned ALU word, little-endian, 64-bit
 
         pub const STRIDE_SIZE: usize = 16;
 
-        const ALIGNMENT: usize = 8;
+        pub const ALIGNMENT: usize = 8;
 
-        const ALIGNMENT_MASK: usize = 7;
+        pub const ALIGNMENT_MASK: usize = 7;
 
         #[inline(always)]
-        unsafe fn ascii_to_basic_latin_stride_little_64(src: *const usize, dst: *mut usize) -> bool {
-            let word = *src;
-            let second_word = *(src.offset(1));
-            // Check if the words contains non-ASCII
-            if (word & ASCII_MASK) | (second_word & ASCII_MASK) != 0 {
-                return false;
-            }
+        unsafe fn unpack_alu(word: usize, second_word: usize, dst: *mut usize) {
             let first = ((0x00000000_FF000000usize & word) << 24) |
                         ((0x00000000_00FF0000usize & word) << 16) |
                         ((0x00000000_0000FF00usize & word) << 8) |
@@ -463,18 +675,10 @@ cfg_if! {
             *(dst.offset(1)) = second;
             *(dst.offset(2)) = third;
             *(dst.offset(3)) = fourth;
-            true
         }
 
         #[inline(always)]
-        unsafe fn basic_latin_to_ascii_stride_little_64(src: *const usize, dst: *mut usize) -> bool {
-            let first = *src;
-            let second = *(src.offset(1));
-            let third = *(src.offset(2));
-            let fourth = *(src.offset(3));
-            if (first & BASIC_LATIN_MASK) | (second & BASIC_LATIN_MASK) | (third & BASIC_LATIN_MASK) | (fourth & BASIC_LATIN_MASK) != 0 {
-                return false;
-            }
+        unsafe fn pack_alu(first: usize, second: usize, third: usize, fourth: usize, dst: *mut usize) {
             let word = ((0x00FF0000_00000000usize & second) << 8) |
                        ((0x000000FF_00000000usize & second) << 16) |
                        ((0x00000000_00FF0000usize & second) << 24) |
@@ -493,28 +697,18 @@ cfg_if! {
                               (0x00000000_000000FFusize & third);
             *dst = word;
             *(dst.offset(1)) = second_word;
-            true
         }
-
-        basic_latin_alu!(ascii_to_basic_latin, u8, u16, ascii_to_basic_latin_stride_little_64);
-        basic_latin_alu!(basic_latin_to_ascii, u16, u8, basic_latin_to_ascii_stride_little_64);
     } else if #[cfg(all(target_endian = "little", target_pointer_width = "32"))] {
         // Aligned ALU word, little-endian, 32-bit
 
         pub const STRIDE_SIZE: usize = 8;
 
-        const ALIGNMENT: usize = 4;
+        pub const ALIGNMENT: usize = 4;
 
-        const ALIGNMENT_MASK: usize = 3;
+        pub const ALIGNMENT_MASK: usize = 3;
 
         #[inline(always)]
-        unsafe fn ascii_to_basic_latin_stride_little_32(src: *const usize, dst: *mut usize) -> bool {
-            let word = *src;
-            let second_word = *(src.offset(1));
-            // Check if the words contains non-ASCII
-            if (word & ASCII_MASK) | (second_word & ASCII_MASK) != 0 {
-                return false;
-            }
+        unsafe fn unpack_alu(word: usize, second_word: usize, dst: *mut usize) {
             let first = ((0x0000FF00usize & word) << 8) |
                         (0x000000FFusize & word);
             let second = ((0xFF000000usize & word) >> 8) |
@@ -527,18 +721,10 @@ cfg_if! {
             *(dst.offset(1)) = second;
             *(dst.offset(2)) = third;
             *(dst.offset(3)) = fourth;
-            return true;
         }
 
         #[inline(always)]
-        unsafe fn basic_latin_to_ascii_stride_little_32(src: *const usize, dst: *mut usize) -> bool {
-            let first = *src;
-            let second = *(src.offset(1));
-            let third = *(src.offset(2));
-            let fourth = *(src.offset(3));
-            if (first & BASIC_LATIN_MASK) | (second & BASIC_LATIN_MASK) | (third & BASIC_LATIN_MASK) | (fourth & BASIC_LATIN_MASK) != 0 {
-                return false;
-            }
+        unsafe fn pack_alu(first: usize, second: usize, third: usize, fourth: usize, dst: *mut usize) {
             let word = ((0x00FF0000usize & second) << 8) |
                        ((0x000000FFusize & second) << 16) |
                        ((0x00FF0000usize & first) >> 8) |
@@ -549,28 +735,18 @@ cfg_if! {
                               (0x000000FFusize & third);
             *dst = word;
             *(dst.offset(1)) = second_word;
-            return true;
         }
-
-        basic_latin_alu!(ascii_to_basic_latin, u8, u16, ascii_to_basic_latin_stride_little_32);
-        basic_latin_alu!(basic_latin_to_ascii, u16, u8, basic_latin_to_ascii_stride_little_32);
     } else if #[cfg(all(target_endian = "big", target_pointer_width = "64"))] {
         // Aligned ALU word, big-endian, 64-bit
 
         pub const STRIDE_SIZE: usize = 16;
 
-        const ALIGNMENT: usize = 8;
+        pub const ALIGNMENT: usize = 8;
 
-        const ALIGNMENT_MASK: usize = 7;
+        pub const ALIGNMENT_MASK: usize = 7;
 
         #[inline(always)]
-        unsafe fn ascii_to_basic_latin_stride_big_64(src: *const usize, dst: *mut usize) -> bool {
-            let word = *src;
-            let second_word = *(src.offset(1));
-            // Check if the words contains non-ASCII
-            if (word & ASCII_MASK) | (second_word & ASCII_MASK) != 0 {
-                return false;
-            }
+        unsafe fn unpack_alu(word: usize, second_word: usize, dst: *mut usize) {
             let first = ((0xFF000000_00000000usize & word) >> 8) |
                          ((0x00FF0000_00000000usize & word) >> 16) |
                          ((0x0000FF00_00000000usize & word) >> 24) |
@@ -591,18 +767,10 @@ cfg_if! {
             *(dst.offset(1)) = second;
             *(dst.offset(2)) = third;
             *(dst.offset(3)) = fourth;
-            return true;
         }
 
         #[inline(always)]
-        unsafe fn basic_latin_to_ascii_stride_big_64(src: *const usize, dst: *mut usize) -> bool {
-            let first = *src;
-            let second = *(src.offset(1));
-            let third = *(src.offset(2));
-            let fourth = *(src.offset(3));
-            if (first & BASIC_LATIN_MASK) | (second & BASIC_LATIN_MASK) | (third & BASIC_LATIN_MASK) | (fourth & BASIC_LATIN_MASK) != 0 {
-                return false;
-            }
+        unsafe fn pack_alu(first: usize, second: usize, third: usize, fourth: usize, dst: *mut usize) {
             let word = ((0x00FF0000_00000000usize & first) << 8) |
                        ((0x000000FF_00000000usize & first) << 16) |
                        ((0x00000000_00FF0000usize & first) << 24) |
@@ -621,28 +789,18 @@ cfg_if! {
                               (0x00000000_000000FFusize &  fourth);
             *dst = word;
             *(dst.offset(1)) = second_word;
-            return true;
         }
-
-        basic_latin_alu!(ascii_to_basic_latin, u8, u16, ascii_to_basic_latin_stride_big_64);
-        basic_latin_alu!(basic_latin_to_ascii, u16, u8, basic_latin_to_ascii_stride_big_64);
     } else if #[cfg(all(target_endian = "big", target_pointer_width = "32"))] {
         // Aligned ALU word, big-endian, 32-bit
 
         pub const STRIDE_SIZE: usize = 8;
 
-        const ALIGNMENT: usize = 4;
+        pub const ALIGNMENT: usize = 4;
 
-        const ALIGNMENT_MASK: usize = 3;
+        pub const ALIGNMENT_MASK: usize = 3;
 
         #[inline(always)]
-        unsafe fn ascii_to_basic_latin_stride_big_32(src: *const usize, dst: *mut usize) -> bool {
-            let word = *src;
-            let second_word = *(src.offset(1));
-            // Check if the words contains non-ASCII
-            if (word & ASCII_MASK) | (second_word & ASCII_MASK) != 0 {
-                return false;
-            }
+        unsafe fn unpack_alu(word: usize, second_word: usize, dst: *mut usize) {
             let first = ((0xFF000000usize & word) >> 8) |
                          ((0x00FF0000usize & word) >> 16);
             let second = ((0x0000FF00usize & word) << 8) |
@@ -655,18 +813,10 @@ cfg_if! {
             *(dst.offset(1)) = second;
             *(dst.offset(2)) = third;
             *(dst.offset(3)) = fourth;
-            return true;
         }
 
         #[inline(always)]
-        unsafe fn basic_latin_to_ascii_stride_big_32(src: *const usize, dst: *mut usize) -> bool {
-            let first = *src;
-            let second = *(src.offset(1));
-            let third = *(src.offset(2));
-            let fourth = *(src.offset(3));
-            if (first & BASIC_LATIN_MASK) | (second & BASIC_LATIN_MASK) | (third & BASIC_LATIN_MASK) | (fourth & BASIC_LATIN_MASK) != 0 {
-                return false;
-            }
+        unsafe fn pack_alu(first: usize, second: usize, third: usize, fourth: usize, dst: *mut usize) {
             let word = ((0x00FF0000usize & first) << 8) |
                        ((0x000000FFusize & first) << 16) |
                        ((0x00FF0000usize & second) >> 8) |
@@ -677,11 +827,7 @@ cfg_if! {
                               (0x000000FFusize & fourth);
             *dst = word;
             *(dst.offset(1)) = second_word;
-            return true;
         }
-
-        basic_latin_alu!(ascii_to_basic_latin, u8, u16, ascii_to_basic_latin_stride_big_32);
-        basic_latin_alu!(basic_latin_to_ascii, u16, u8, basic_latin_to_ascii_stride_big_32);
     } else {
         ascii_naive!(ascii_to_ascii, u8, u8);
         ascii_naive!(ascii_to_basic_latin, u8, u16);
@@ -716,7 +862,7 @@ cfg_if! {
                 let len_minus_stride = len - STRIDE_SIZE;
                 loop {
                     let simd = unsafe { load16_unaligned(src.offset(offset as isize)) };
-                    if !is_ascii(simd) {
+                    if !simd_is_ascii(simd) {
                         break;
                     }
                     offset += STRIDE_SIZE;
@@ -787,9 +933,51 @@ cfg_if! {
             None
         }
     } else {
-        // `as` truncates, so works on 32-bit, too.
-        const ASCII_MASK: usize = 0x80808080_80808080u64 as usize;
-        const BASIC_LATIN_MASK: usize = 0xFF80FF80_FF80FF80u64 as usize;
+        #[inline(always)]
+        unsafe fn unpack_latin1_stride_alu(src: *const usize, dst: *mut usize) {
+            let word = *src;
+            let second_word = *(src.offset(1));
+            unpack_alu(word, second_word, dst);
+        }
+
+        #[inline(always)]
+        unsafe fn pack_latin1_stride_alu(src: *const usize, dst: *mut usize) {
+            let first = *src;
+            let second = *(src.offset(1));
+            let third = *(src.offset(2));
+            let fourth = *(src.offset(3));
+            pack_alu(first, second, third, fourth, dst);
+        }
+
+        #[inline(always)]
+        unsafe fn ascii_to_basic_latin_stride_alu(src: *const usize, dst: *mut usize) -> bool {
+            let word = *src;
+            let second_word = *(src.offset(1));
+            // Check if the words contains non-ASCII
+            if (word & ASCII_MASK) | (second_word & ASCII_MASK) != 0 {
+                return false;
+            }
+            unpack_alu(word, second_word, dst);
+            true
+        }
+
+        #[inline(always)]
+        unsafe fn basic_latin_to_ascii_stride_alu(src: *const usize, dst: *mut usize) -> bool {
+            let first = *src;
+            let second = *(src.offset(1));
+            let third = *(src.offset(2));
+            let fourth = *(src.offset(3));
+            if (first & BASIC_LATIN_MASK) | (second & BASIC_LATIN_MASK) | (third & BASIC_LATIN_MASK) | (fourth & BASIC_LATIN_MASK) != 0 {
+                return false;
+            }
+            pack_alu(first, second, third, fourth, dst);
+            true
+        }
+
+        basic_latin_alu!(ascii_to_basic_latin, u8, u16, ascii_to_basic_latin_stride_alu);
+        basic_latin_alu!(basic_latin_to_ascii, u16, u8, basic_latin_to_ascii_stride_alu);
+        latin1_alu!(unpack_latin1, u8, u16, unpack_latin1_stride_alu);
+        latin1_alu!(pack_latin1, u16, u8, pack_latin1_stride_alu);
 
         #[inline(always)]
         unsafe fn ascii_to_ascii_stride(src: *const usize, dst: *mut usize) -> Option<usize> {
diff --git a/third_party/rust/encoding_rs/src/gb18030.rs b/third_party/rust/encoding_rs/src/gb18030.rs
index f4198e1c298e..2835263d3260 100644
--- a/third_party/rust/encoding_rs/src/gb18030.rs
+++ b/third_party/rust/encoding_rs/src/gb18030.rs
@@ -394,9 +394,7 @@ fn gbk_encode_non_unified(bmp: u16) -> Option<(usize, usize)> {
     // PUA between Hanzi Levels
     let bmp_minus_pua_between_hanzi = bmp.wrapping_sub(0xE810);
     if bmp_minus_pua_between_hanzi < 5 {
-        return Some(
-            (0x81 + 0x56, 0xFF - 5 + bmp_minus_pua_between_hanzi as usize),
-        );
+        return Some((0x81 + 0x56, 0xFF - 5 + bmp_minus_pua_between_hanzi as usize));
     }
     None
 }
@@ -595,6 +593,14 @@ mod tests {
 
         // 0xFF
         decode_gb18030(b"\xFF\x40", "\u{FFFD}\u{0040}");
+        decode_gb18030(b"\xE3\xFF\x9A\x33", "\u{FFFD}\u{FFFD}"); // not \u{FFFD}\u{FFFD}\u{0033} !
+        decode_gb18030(b"\xFF\x32\x9A\x33", "\u{FFFD}\u{0032}\u{FFFD}"); // not \u{FFFD}\u{0032}\u{FFFD}\u{0033} !
+        decode_gb18030(b"\xFF\x40\x00", "\u{FFFD}\u{0040}\u{0000}");
+        decode_gb18030(b"\xE3\xFF\x9A\x33\x00", "\u{FFFD}\u{FFFD}\u{0033}\u{0000}");
+        decode_gb18030(
+            b"\xFF\x32\x9A\x33\x00",
+            "\u{FFFD}\u{0032}\u{FFFD}\u{0033}\u{0000}",
+        );
 
         // Four bytes
         decode_gb18030(b"\x81\x30\x81\x30", "\u{0080}");
@@ -605,7 +611,7 @@ mod tests {
         decode_gb18030(b"\xE3\x32\x9A\x36\x81\x30", "\u{FFFD}\u{FFFD}");
         decode_gb18030(b"\xE3\x32\x9A\x36\x81\x40", "\u{FFFD}\u{4E02}");
         decode_gb18030(b"\xE3\x32\x9A", "\u{FFFD}"); // not \u{FFFD}\u{0032}\u{FFFD} !
-
+        decode_gb18030(b"\xE3\x32\x9A\x00", "\u{FFFD}\u{0032}\u{FFFD}\u{0000}");
     }
 
     #[test]
diff --git a/third_party/rust/encoding_rs/src/lib.rs b/third_party/rust/encoding_rs/src/lib.rs
index 2886d68da35d..8d5b5723c55c 100644
--- a/third_party/rust/encoding_rs/src/lib.rs
+++ b/third_party/rust/encoding_rs/src/lib.rs
@@ -8,7 +8,7 @@
 // except according to those terms.
 
 #![cfg_attr(feature = "cargo-clippy", allow(doc_markdown, inline_always, new_ret_no_self))]
-#![doc(html_root_url = "https://docs.rs/encoding_rs/0.7.1")]
+#![doc(html_root_url = "https://docs.rs/encoding_rs/0.7.2")]
 
 //! encoding_rs is a Gecko-oriented Free Software / Open Source implementation
 //! of the [Encoding Standard](https://encoding.spec.whatwg.org/) in Rust.
@@ -17,6 +17,10 @@
 //! streamability goals are browser-oriented, and that FFI-friendliness is a
 //! goal.
 //!
+//! Additionally, the `mem` module provides functions that are useful for
+//! applications that need to be able to deal with legacy in-memory
+//! representations of Unicode.
+//!
 //! # Availability
 //!
 //! The code is available under the
@@ -491,7 +495,7 @@
 //! </tbody>
 //! </table>
 
-#![cfg_attr(feature = "simd-accel", feature(cfg_target_feature, platform_intrinsics))]
+#![cfg_attr(feature = "simd-accel", feature(cfg_target_feature, platform_intrinsics, core_intrinsics))]
 
 #[macro_use]
 extern crate cfg_if;
@@ -539,6 +543,8 @@ mod handles;
 mod data;
 mod variant;
 
+pub mod mem;
+
 use variant::*;
 use utf_8::utf8_valid_up_to;
 use ascii::ascii_valid_up_to;
@@ -2030,20 +2036,20 @@ static ENCODINGS_IN_LABEL_SORT: [&'static Encoding; 219] = [&WINDOWS_1252_INIT,
 /// # Streaming vs. Non-Streaming
 ///
 /// When you have the entire input in a single buffer, you can use the
-/// methods [`decode()`][1], [`decode_with_bom_removal()`][2],
-/// [`decode_without_bom_handling()`][3],
-/// [`decode_without_bom_handling_and_without_replacement()`][4] and
-/// [`encode()`][5]. (These methods are available to Rust callers only and are
+/// methods [`decode()`][3], [`decode_with_bom_removal()`][3],
+/// [`decode_without_bom_handling()`][5],
+/// [`decode_without_bom_handling_and_without_replacement()`][6] and
+/// [`encode()`][7]. (These methods are available to Rust callers only and are
 /// not available in the C API.) Unlike the rest of the API available to Rust,
 /// these methods perform heap allocations. You should the `Decoder` and
 /// `Encoder` objects when your input is split into multiple buffers or when
 /// you want to control the allocation of the output buffers.
 ///
-/// [1]: #method.decode
-/// [2]: #method.decode_with_bom_removal
-/// [3]: #method.decode_without_bom_handling
-/// [4]: #method.decode_without_bom_handling_and_without_replacement
-/// [5]: #method.encode
+/// [3]: #method.decode
+/// [4]: #method.decode_with_bom_removal
+/// [5]: #method.decode_without_bom_handling
+/// [6]: #method.decode_without_bom_handling_and_without_replacement
+/// [7]: #method.encode
 ///
 /// # Instances
 ///
@@ -2222,6 +2228,7 @@ impl Encoding {
     /// unsafe fallback for labels that `for_label()` maps to `Some(REPLACEMENT)`.
     ///
     /// Available via the C wrapper.
+    #[inline]
     pub fn for_label_no_replacement(label: &[u8]) -> Option<&'static Encoding> {
         match Encoding::for_label(label) {
             None => None,
@@ -2246,6 +2253,7 @@ impl Encoding {
     /// or UTF-16BE BOM or `None` otherwise.
     ///
     /// Available via the C wrapper.
+    #[inline]
     pub fn for_bom(buffer: &[u8]) -> Option<(&'static Encoding, usize)> {
         if buffer.starts_with(b"\xEF\xBB\xBF") {
             Some((UTF_8, 3))
@@ -2264,6 +2272,7 @@ impl Encoding {
     /// `document.characterSet` property.
     ///
     /// Available via the C wrapper.
+    #[inline]
     pub fn name(&'static self) -> &'static str {
         self.name
     }
@@ -2272,6 +2281,7 @@ impl Encoding {
     /// `char`. (Only true if the output encoding is UTF-8.)
     ///
     /// Available via the C wrapper.
+    #[inline]
     pub fn can_encode_everything(&'static self) -> bool {
         self.output_encoding() == UTF_8
     }
@@ -2280,12 +2290,14 @@ impl Encoding {
     /// U+0000...U+007F and vice versa.
     ///
     /// Available via the C wrapper.
+    #[inline]
     pub fn is_ascii_compatible(&'static self) -> bool {
         !(self == REPLACEMENT || self == UTF_16BE || self == UTF_16LE || self == ISO_2022_JP)
     }
 
     /// Checks whether the bytes 0x00...0x7F map mostly to the characters
     /// U+0000...U+007F and vice versa.
+    #[inline]
     fn is_potentially_borrowable(&'static self) -> bool {
         !(self == REPLACEMENT || self == UTF_16BE || self == UTF_16LE)
     }
@@ -2294,6 +2306,7 @@ impl Encoding {
     /// UTF-16BE, UTF-16LE and replacement and the encoding itself otherwise.
     ///
     /// Available via the C wrapper.
+    #[inline]
     pub fn output_encoding(&'static self) -> &'static Encoding {
         if self == REPLACEMENT || self == UTF_16BE || self == UTF_16LE {
             UTF_8
@@ -2336,6 +2349,7 @@ impl Encoding {
     /// `usize`.
     ///
     /// Available to Rust only.
+    #[inline]
     pub fn decode<'a>(&'static self, bytes: &'a [u8]) -> (Cow<'a, str>, &'static Encoding, bool) {
         let (encoding, without_bom) = match Encoding::for_bom(bytes) {
             Some((encoding, bom_length)) => (encoding, &bytes[bom_length..]),
@@ -2378,6 +2392,7 @@ impl Encoding {
     /// `usize`.
     ///
     /// Available to Rust only.
+    #[inline]
     pub fn decode_with_bom_removal<'a>(&'static self, bytes: &'a [u8]) -> (Cow<'a, str>, bool) {
         let without_bom = if self == UTF_8 && bytes.starts_with(b"\xEF\xBB\xBF") {
             &bytes[3..]
@@ -2689,6 +2704,7 @@ impl Encoding {
     /// for UTF-8, UTF-16LE or UTF-16BE instead of this encoding.
     ///
     /// Available via the C wrapper.
+    #[inline]
     pub fn new_decoder(&'static self) -> Decoder {
         Decoder::new(self, self.new_variant_decoder(), BomHandling::Sniff)
     }
@@ -2702,6 +2718,7 @@ impl Encoding {
     /// encoding.
     ///
     /// Available via the C wrapper.
+    #[inline]
     pub fn new_decoder_with_bom_removal(&'static self) -> Decoder {
         Decoder::new(self, self.new_variant_decoder(), BomHandling::Remove)
     }
@@ -2717,6 +2734,7 @@ impl Encoding {
     /// instead of this method to cause the BOM to be removed.
     ///
     /// Available via the C wrapper.
+    #[inline]
     pub fn new_decoder_without_bom_handling(&'static self) -> Decoder {
         Decoder::new(self, self.new_variant_decoder(), BomHandling::Off)
     }
@@ -2724,6 +2742,7 @@ impl Encoding {
     /// Instantiates a new encoder for the output encoding of this encoding.
     ///
     /// Available via the C wrapper.
+    #[inline]
     pub fn new_encoder(&'static self) -> Encoder {
         let enc = self.output_encoding();
         enc.variant.new_encoder(enc)
@@ -2767,6 +2786,7 @@ impl Encoding {
 }
 
 impl PartialEq for Encoding {
+    #[inline]
     fn eq(&self, other: &Encoding) -> bool {
         (self as *const Encoding) == (other as *const Encoding)
     }
@@ -2775,12 +2795,14 @@ impl PartialEq for Encoding {
 impl Eq for Encoding {}
 
 impl Hash for Encoding {
+    #[inline]
     fn hash<H: Hasher>(&self, state: &mut H) {
         (self as *const Encoding).hash(state);
     }
 }
 
 impl std::fmt::Debug for Encoding {
+    #[inline]
     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
         write!(f, "Encoding {{ {} }}", self.name)
     }
@@ -2788,6 +2810,7 @@ impl std::fmt::Debug for Encoding {
 
 #[cfg(feature = "serde")]
 impl Serialize for Encoding {
+    #[inline]
     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
         where S: Serializer
     {
@@ -3054,6 +3077,7 @@ impl Decoder {
     /// of the decoder.
     ///
     /// Available via the C wrapper.
+    #[inline]
     pub fn encoding(&self) -> &'static Encoding {
         self.encoding
     }
@@ -3769,12 +3793,14 @@ impl Encoder {
     }
 
     /// The `Encoding` this `Encoder` is for.
+    #[inline]
     pub fn encoding(&self) -> &'static Encoding {
         self.encoding
     }
 
     /// Returns `true` if this is an ISO-2022-JP encoder that's not in the
     /// ASCII state and `false` otherwise.
+    #[inline]
     pub fn has_pending_state(&self) -> bool {
         self.variant.has_pending_state()
     }
@@ -4111,6 +4137,16 @@ fn in_range16(i: u16, start: u16, end: u16) -> bool {
     i.wrapping_sub(start) < (end - start)
 }
 
+#[inline(always)]
+fn in_range32(i: u32, start: u32, end: u32) -> bool {
+    i.wrapping_sub(start) < (end - start)
+}
+
+#[inline(always)]
+fn in_inclusive_range8(i: u8, start: u8, end: u8) -> bool {
+    i.wrapping_sub(start) <= (end - start)
+}
+
 #[inline(always)]
 fn in_inclusive_range16(i: u16, start: u16, end: u16) -> bool {
     i.wrapping_sub(start) <= (end - start)
diff --git a/third_party/rust/encoding_rs/src/mem.rs b/third_party/rust/encoding_rs/src/mem.rs
new file mode 100644
index 000000000000..b26593855190
--- /dev/null
+++ b/third_party/rust/encoding_rs/src/mem.rs
@@ -0,0 +1,2873 @@
+// Copyright 2015-2016 Mozilla Foundation. See the COPYRIGHT
+// file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Functions for converting between different in-RAM representations of text
+//! and for quickly checking if the Unicode Bidirectional Algorithm can be
+//! avoided.
+//!
+//! By using slices for output, the functions here seek to enable by-register
+//! (ALU register or SIMD register as available) operations in order to
+//! outperform iterator-based conversions available in the Rust standard
+//! library.
+//!
+//! _Note:_ "Latin1" in this module refers to the Unicode range from U+0000 to
+//! U+00FF, inclusive, and does not refer to the windows-1252 range. This
+//! in-memory encoding is sometimes used as a storage optimization of text
+//! when UTF-16 indexing and length semantics are exposed.
+
+use ascii::*;
+use super::in_inclusive_range8;
+use super::in_inclusive_range16;
+use super::in_range16;
+use super::in_inclusive_range32;
+use super::in_range32;
+use super::DecoderResult;
+use super::EncoderResult;
+use utf_8::*;
+
+cfg_if!{
+    if #[cfg(feature = "simd-accel")] {
+		use ::std::intrinsics::unlikely;
+	} else {
+		#[inline(always)]
+		// Unsafe to match the intrinsic, which is needlessly unsafe.
+		unsafe fn unlikely(b: bool) -> bool {
+			b
+		}
+	}
+}
+
+/// Classification of text as Latin1 (all code points are below U+0100),
+/// left-to-right with some non-Latin1 characters or as containing at least
+/// some right-to-left characters.
+#[must_use]
+#[derive(Debug, PartialEq, Eq)]
+#[repr(C)]
+pub enum Latin1Bidi {
+    /// Every character is below U+0100.
+    Latin1 = 0,
+    /// There is at least one character that's U+0100 or higher, but there
+    /// are no right-to-left characters.
+    LeftToRight = 1,
+    /// There is at least one right-to-left character.
+    Bidi = 2,
+}
+
+// `as` truncates, so works on 32-bit, too.
+#[allow(dead_code)]
+const LATIN1_MASK: usize = 0xFF00FF00_FF00FF00u64 as usize;
+
+#[allow(unused_macros)]
+macro_rules! by_unit_check_alu {
+    ($name:ident,
+     $unit:ty,
+     $bound:expr,
+     $mask:ident) => (
+    #[inline(always)]
+    fn $name(buffer: &[$unit]) -> bool {
+        let mut offset = 0usize;
+        let mut accu = 0usize;
+        let unit_size = ::std::mem::size_of::<$unit>();
+        let len = buffer.len();
+        if len >= ALIGNMENT / unit_size {
+            // The most common reason to return `false` is for the first code
+            // unit to fail the test, so check that first.
+            if buffer[0] >= $bound {
+                return false;
+            }
+            let src = buffer.as_ptr();
+            let mut until_alignment = ((ALIGNMENT - ((src as usize) & ALIGNMENT_MASK)) &
+                                       ALIGNMENT_MASK) / unit_size;
+            if until_alignment + ALIGNMENT / unit_size <= len {
+                if until_alignment != 0 {
+                    accu |= buffer[offset] as usize;
+                    offset += 1;
+                    until_alignment -= 1;
+                    while until_alignment != 0 {
+                        accu |= buffer[offset] as usize;
+                        offset += 1;
+                        until_alignment -= 1;
+                    }
+                    if accu >= $bound {
+                        return false;
+                    }
+                }
+                let len_minus_stride = len - ALIGNMENT / unit_size;
+                if offset + (4 * (ALIGNMENT / unit_size)) <= len {
+                    let len_minus_unroll = len - (4 * (ALIGNMENT / unit_size));
+                    loop {
+                        let unroll_accu = unsafe { *(src.offset(offset as isize) as *const usize) } |
+                                          unsafe { *(src.offset((offset + (ALIGNMENT / unit_size)) as isize) as *const usize) } |
+                                          unsafe { *(src.offset((offset + (2 * (ALIGNMENT / unit_size))) as isize) as *const usize) } |
+                                          unsafe { *(src.offset((offset + (3 * (ALIGNMENT / unit_size))) as isize) as *const usize) };
+                        if unroll_accu & $mask != 0 {
+                            return false;
+                        }
+                        offset += 4 * (ALIGNMENT / unit_size);
+                        if offset > len_minus_unroll {
+                            break;
+                        }
+                    }
+                }
+                while offset <= len_minus_stride {
+                    accu |= unsafe { *(src.offset(offset as isize) as *const usize) };
+                    offset += ALIGNMENT / unit_size;
+                }
+            }
+        }
+        for &unit in &buffer[offset..] {
+            accu |= unit as usize;
+        }
+        accu & $mask == 0
+    })
+}
+
+#[allow(unused_macros)]
+macro_rules! by_unit_check_simd {
+    ($name:ident,
+     $unit:ty,
+     $splat:expr,
+     $simd_ty:ty,
+     $bound:expr,
+     $func:ident) => (
+    #[inline(always)]
+    fn $name(buffer: &[$unit]) -> bool {
+        let mut offset = 0usize;
+        let mut accu = 0usize;
+        let unit_size = ::std::mem::size_of::<$unit>();
+        let len = buffer.len();
+        if len >= STRIDE_SIZE / unit_size {
+            // The most common reason to return `false` is for the first code
+            // unit to fail the test, so check that first.
+            if buffer[0] >= $bound {
+                return false;
+            }
+            let src = buffer.as_ptr();
+            let mut until_alignment = ((SIMD_ALIGNMENT - ((src as usize) & SIMD_ALIGNMENT_MASK)) &
+                                       SIMD_ALIGNMENT_MASK) / unit_size;
+            if until_alignment + STRIDE_SIZE / unit_size <= len {
+                if until_alignment != 0 {
+                    accu |= buffer[offset] as usize;
+                    offset += 1;
+                    until_alignment -= 1;
+                    while until_alignment != 0 {
+                        accu |= buffer[offset] as usize;
+                        offset += 1;
+                        until_alignment -= 1;
+                    }
+                    if accu >= $bound {
+                        return false;
+                    }
+                }
+                let len_minus_stride = len - STRIDE_SIZE / unit_size;
+                if offset + (4 * (STRIDE_SIZE / unit_size)) <= len {
+                    let len_minus_unroll = len - (4 * (STRIDE_SIZE / unit_size));
+                    loop {
+                        let unroll_accu = unsafe { *(src.offset(offset as isize) as *const $simd_ty) } |
+                                          unsafe { *(src.offset((offset + (STRIDE_SIZE / unit_size)) as isize) as *const $simd_ty) } |
+                                          unsafe { *(src.offset((offset + (2 * (STRIDE_SIZE / unit_size))) as isize) as *const $simd_ty) } |
+                                          unsafe { *(src.offset((offset + (3 * (STRIDE_SIZE / unit_size))) as isize) as *const $simd_ty) };
+                        if !$func(unroll_accu) {
+                            return false;
+                        }
+                        offset += 4 * (STRIDE_SIZE / unit_size);
+                        if offset > len_minus_unroll {
+                            break;
+                        }
+                    }
+                }
+                let mut simd_accu = $splat;
+                while offset <= len_minus_stride {
+                    simd_accu = simd_accu | unsafe { *(src.offset(offset as isize) as *const $simd_ty) };
+                    offset += STRIDE_SIZE / unit_size;
+                }
+                if !$func(simd_accu) {
+                    return false;
+                }
+            }
+        }
+        for &unit in &buffer[offset..] {
+            accu |= unit as usize;
+        }
+        accu < $bound
+    })
+}
+
+cfg_if!{
+    if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"))))] {
+        use simd_funcs::*;
+        use simd::u8x16;
+        use simd::u16x8;
+
+        const SIMD_ALIGNMENT: usize = 16;
+
+        const SIMD_ALIGNMENT_MASK: usize = 15;
+
+        by_unit_check_simd!(is_ascii_impl, u8, u8x16::splat(0), u8x16, 0x80, simd_is_ascii);
+        by_unit_check_simd!(is_basic_latin_impl, u16, u16x8::splat(0), u16x8, 0x80, simd_is_basic_latin);
+        by_unit_check_simd!(is_utf16_latin1_impl, u16, u16x8::splat(0), u16x8, 0x100, simd_is_latin1);
+
+        #[inline(always)]
+        fn utf16_valid_up_to_impl(buffer: &[u16]) -> usize {
+            // This function is a mess, because it simultaneously tries to do
+            // only aligned SIMD (perhaps misguidedly) and needs to deal with
+            // the last code unit in a SIMD stride being part of a valid
+            // surrogate pair.
+            let unit_size = ::std::mem::size_of::<u16>();
+            let src = buffer.as_ptr();
+            let len = buffer.len();
+            let mut offset = 0usize;
+            'outer: loop {
+                let until_alignment = ((SIMD_ALIGNMENT - ((unsafe { src.offset(offset as isize) } as usize) & SIMD_ALIGNMENT_MASK)) &
+                                        SIMD_ALIGNMENT_MASK) / unit_size;
+                if until_alignment == 0 {
+                    if offset + STRIDE_SIZE / unit_size > len {
+                        break;
+                    }
+                } else {
+                    let offset_plus_until_alignment = offset + until_alignment;
+                    let offset_plus_until_alignment_plus_one = offset_plus_until_alignment + 1;
+                    if offset_plus_until_alignment_plus_one + STRIDE_SIZE / unit_size > len {
+                        break;
+                    }
+                    let (up_to, last_valid_low) = utf16_valid_up_to_alu(&buffer[offset..offset_plus_until_alignment_plus_one]);
+                    if up_to < until_alignment {
+                        return offset + up_to;
+                    }
+                    if last_valid_low {
+                        offset = offset_plus_until_alignment_plus_one;
+                        continue;
+                    }
+                    offset = offset_plus_until_alignment;
+                }
+                let len_minus_stride = len - STRIDE_SIZE / unit_size;
+                'inner: loop {
+                    let offset_plus_stride = offset + STRIDE_SIZE / unit_size;
+                    if contains_surrogates(unsafe { *(src.offset(offset as isize) as *const u16x8) }) {
+                        if offset_plus_stride == len {
+                            break 'outer;
+                        }
+                        let offset_plus_stride_plus_one = offset_plus_stride + 1;
+                        let (up_to, last_valid_low) = utf16_valid_up_to_alu(&buffer[offset..offset_plus_stride_plus_one]);
+                        if up_to < STRIDE_SIZE / unit_size {
+                            return offset + up_to;
+                        }
+                        if last_valid_low {
+                            offset = offset_plus_stride_plus_one;
+                            continue 'outer;
+                        }
+                    }
+                    offset = offset_plus_stride;
+                    if offset > len_minus_stride {
+                        break 'outer;
+                    }
+                }
+            }
+            let (up_to, _) = utf16_valid_up_to_alu(&buffer[offset..]);
+            offset + up_to
+        }
+    } else {
+        by_unit_check_alu!(is_ascii_impl, u8, 0x80, ASCII_MASK);
+        by_unit_check_alu!(is_basic_latin_impl, u16, 0x80, BASIC_LATIN_MASK);
+        by_unit_check_alu!(is_utf16_latin1_impl, u16, 0x100, LATIN1_MASK);
+
+        #[inline(always)]
+        fn utf16_valid_up_to_impl(buffer: &[u16]) -> usize {
+            let (up_to, _) = utf16_valid_up_to_alu(buffer);
+            up_to
+        }
+    }
+}
+
+/// The second return value is true iff the last code unit of the slice was
+/// reached and turned out to be a low surrogate that is part of a valid pair.
+#[inline(always)]
+fn utf16_valid_up_to_alu(buffer: &[u16]) -> (usize, bool) {
+    let len = buffer.len();
+    if len == 0 {
+        return (0, false);
+    }
+    let mut offset = 0usize;
+    loop {
+        let unit = buffer[offset];
+        let next = offset + 1;
+        let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
+        if unit_minus_surrogate_start > (0xDFFF - 0xD800) {
+            // Not a surrogate
+            offset = next;
+            if offset == len {
+                return (offset, false);
+            }
+            continue;
+        }
+        if unit_minus_surrogate_start <= (0xDBFF - 0xD800) {
+            // high surrogate
+            if next < len {
+                let second = buffer[next];
+                let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
+                if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
+                    // The next code unit is a low surrogate. Advance position.
+                    offset = next + 1;
+                    if offset == len {
+                        return (offset, true);
+                    }
+                    continue;
+                }
+                // The next code unit is not a low surrogate. Don't advance
+                // position and treat the high surrogate as unpaired.
+                // fall through
+            }
+            // Unpaired, fall through
+        }
+        // Unpaired surrogate
+        return (offset, false);
+    }
+}
+
+cfg_if!{
+    if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"))))] {
+        #[inline(always)]
+        fn is_str_latin1_impl(buffer: &str) -> Option<usize> {
+            let mut offset = 0usize;
+            let bytes = buffer.as_bytes();
+            let len = bytes.len();
+            if len >= STRIDE_SIZE {
+                let src = bytes.as_ptr();
+                let mut until_alignment = (SIMD_ALIGNMENT - ((src as usize) & SIMD_ALIGNMENT_MASK)) &
+                                           SIMD_ALIGNMENT_MASK;
+                if until_alignment + STRIDE_SIZE <= len {
+                    while until_alignment != 0 {
+                        if bytes[offset] > 0xC3 {
+                            return Some(offset);
+                        }
+                        offset += 1;
+                        until_alignment -= 1;
+                    }
+                    let len_minus_stride = len - STRIDE_SIZE;
+                    loop {
+                        if !simd_is_str_latin1(unsafe { *(src.offset(offset as isize) as *const u8x16) }) {
+                            // TODO: Ensure this compiles away when inlined into `is_str_latin1()`.
+                            while bytes[offset] & 0xC0 == 0x80 {
+                                offset += 1;
+                            }
+                            return Some(offset);
+                        }
+                        offset += STRIDE_SIZE;
+                        if offset > len_minus_stride {
+                            break;
+                        }
+                    }
+                }
+            }
+            for i in offset..len {
+                if bytes[i] > 0xC3 {
+                    return Some(i);
+                }
+            }
+            None
+        }
+    } else {
+        #[inline(always)]
+        fn is_str_latin1_impl(buffer: &str) -> Option<usize> {
+            let mut bytes = buffer.as_bytes();
+            let mut total = 0;
+            loop {
+                if let Some((byte, offset)) = validate_ascii(bytes) {
+                    total += offset;
+                    if byte > 0xC3 {
+                        return Some(total);
+                    }
+                    bytes = &bytes[offset + 2..];
+                    total += 2;
+                } else {
+                    return None;
+                }
+            }
+        }
+    }
+}
+
+#[inline(always)]
+fn is_utf8_latin1_impl(buffer: &[u8]) -> Option<usize> {
+    let mut bytes = buffer;
+    let mut total = 0;
+    loop {
+        if let Some((byte, offset)) = validate_ascii(bytes) {
+            total += offset;
+            if in_inclusive_range8(byte, 0xC2, 0xC3) {
+                let next = offset + 1;
+                if next == bytes.len() {
+                    return Some(total);
+                }
+                if bytes[next] & 0xC0 != 0x80 {
+                    return Some(total);
+                }
+                bytes = &bytes[offset + 2..];
+                total += 2;
+            } else {
+                return Some(total);
+            }
+        } else {
+            return None;
+        }
+    }
+}
+
+cfg_if!{
+    if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"))))] {
+        #[inline(always)]
+        fn is_utf16_bidi_impl(buffer: &[u16]) -> bool {
+            let mut offset = 0usize;
+            let len = buffer.len();
+            if len >= STRIDE_SIZE / 2 {
+                let src = buffer.as_ptr();
+                let mut until_alignment = ((SIMD_ALIGNMENT - ((src as usize) & SIMD_ALIGNMENT_MASK)) &
+                                           SIMD_ALIGNMENT_MASK) / 2;
+                if until_alignment + (STRIDE_SIZE / 2) <= len {
+                    while until_alignment != 0 {
+                        if is_utf16_code_unit_bidi(buffer[offset]) {
+                            return true;
+                        }
+                        offset += 1;
+                        until_alignment -= 1;
+                    }
+                    let len_minus_stride = len - (STRIDE_SIZE / 2);
+                    loop {
+                        if is_u16x8_bidi(unsafe { *(src.offset(offset as isize) as *const u16x8) }) {
+                            return true;
+                        }
+                        offset += STRIDE_SIZE / 2;
+                        if offset > len_minus_stride {
+                            break;
+                        }
+                    }
+                }
+            }
+            for &u in &buffer[offset..] {
+                if is_utf16_code_unit_bidi(u) {
+                    return true;
+                }
+            }
+            false
+        }
+    } else {
+        #[inline(always)]
+        fn is_utf16_bidi_impl(buffer: &[u16]) -> bool {
+            for &u in buffer {
+                if is_utf16_code_unit_bidi(u) {
+                    return true;
+                }
+            }
+            false
+        }
+    }
+}
+
+cfg_if!{
+    if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"))))] {
+        #[inline(always)]
+        fn check_utf16_for_latin1_and_bidi_impl(buffer: &[u16]) -> Latin1Bidi {
+            let mut offset = 0usize;
+            let len = buffer.len();
+            if len >= STRIDE_SIZE / 2 {
+                let src = buffer.as_ptr();
+                let mut until_alignment = ((SIMD_ALIGNMENT - ((src as usize) & SIMD_ALIGNMENT_MASK)) &
+                                           SIMD_ALIGNMENT_MASK) / 2;
+                if until_alignment + (STRIDE_SIZE / 2) <= len {
+                    while until_alignment != 0 {
+                        if buffer[offset] > 0xFF {
+                            // This transition isn't optimal, since the aligment is recomputing
+                            // but not tweaking further today.
+                            if is_utf16_bidi_impl(&buffer[offset..]) {
+                                return Latin1Bidi::Bidi;
+                            }
+                            return Latin1Bidi::LeftToRight;
+                        }
+                        offset += 1;
+                        until_alignment -= 1;
+                    }
+                    let len_minus_stride = len - (STRIDE_SIZE / 2);
+                    loop {
+                        let mut s = unsafe { *(src.offset(offset as isize) as *const u16x8) };
+                        if !simd_is_latin1(s) {
+                            loop {
+                                if is_u16x8_bidi(s) {
+                                    return Latin1Bidi::Bidi;
+                                }
+                                offset += STRIDE_SIZE / 2;
+                                if offset > len_minus_stride {
+                                    for &u in &buffer[offset..] {
+                                        if is_utf16_code_unit_bidi(u) {
+                                            return Latin1Bidi::Bidi;
+                                        }
+                                    }
+                                    return Latin1Bidi::LeftToRight;
+                                }
+                                s = unsafe { *(src.offset(offset as isize) as *const u16x8) };
+                            }
+                        }
+                        offset += STRIDE_SIZE / 2;
+                        if offset > len_minus_stride {
+                            break;
+                        }
+                    }
+                }
+            }
+            let mut iter = (&buffer[offset..]).iter();
+            loop {
+                if let Some(&u) = iter.next() {
+                    if u > 0xFF {
+                        let mut inner_u = u;
+                        loop {
+                            if is_utf16_code_unit_bidi(inner_u) {
+                                return Latin1Bidi::Bidi;
+                            }
+                            if let Some(&code_unit) = iter.next() {
+                                inner_u = code_unit;
+                            } else {
+                                return Latin1Bidi::LeftToRight;
+                            }
+                        }
+                    }
+                } else {
+                    return Latin1Bidi::Latin1;
+                }
+            }
+        }
+    } else {
+        #[inline(always)]
+        fn check_utf16_for_latin1_and_bidi_impl(buffer: &[u16]) -> Latin1Bidi {
+            let mut offset = 0usize;
+            let len = buffer.len();
+            if len >= ALIGNMENT / 2 {
+                let src = buffer.as_ptr();
+                let mut until_alignment = ((ALIGNMENT - ((src as usize) & ALIGNMENT_MASK)) &
+                                           ALIGNMENT_MASK) / 2;
+                if until_alignment + ALIGNMENT / 2 <= len {
+                    while until_alignment != 0 {
+                        if buffer[offset] > 0xFF {
+                            if is_utf16_bidi_impl(&buffer[offset..]) {
+                                return Latin1Bidi::Bidi;
+                            }
+                            return Latin1Bidi::LeftToRight;
+                        }
+                        offset += 1;
+                        until_alignment -= 1;
+                    }
+                    let len_minus_stride = len - ALIGNMENT / 2;
+                    loop {
+                        if unsafe { *(src.offset(offset as isize) as *const usize) } & LATIN1_MASK != 0 {
+                            if is_utf16_bidi_impl(&buffer[offset..]) {
+                                return Latin1Bidi::Bidi;
+                            }
+                            return Latin1Bidi::LeftToRight;
+                        }
+                        offset += ALIGNMENT / 2;
+                        if offset > len_minus_stride {
+                            break;
+                        }
+                    }
+                }
+            }
+            let mut iter = (&buffer[offset..]).iter();
+            loop {
+                if let Some(&u) = iter.next() {
+                    if u > 0xFF {
+                        let mut inner_u = u;
+                        loop {
+                            if is_utf16_code_unit_bidi(inner_u) {
+                                return Latin1Bidi::Bidi;
+                            }
+                            if let Some(&code_unit) = iter.next() {
+                                inner_u = code_unit;
+                            } else {
+                                return Latin1Bidi::LeftToRight;
+                            }
+                        }
+                    }
+                } else {
+                    return Latin1Bidi::Latin1;
+                }
+            }
+        }
+    }
+}
+
+/// Checks whether the buffer is all-ASCII.
+///
+/// May read the entire buffer even if it isn't all-ASCII. (I.e. the function
+/// is not guaranteed to fail fast.)
+#[inline]
+pub fn is_ascii(buffer: &[u8]) -> bool {
+    is_ascii_impl(buffer)
+}
+
+/// Checks whether the buffer is all-Basic Latin (i.e. UTF-16 representing
+/// only ASCII characters).
+///
+/// May read the entire buffer even if it isn't all-ASCII. (I.e. the function
+/// is not guaranteed to fail fast.)
+#[inline]
+pub fn is_basic_latin(buffer: &[u16]) -> bool {
+    is_basic_latin_impl(buffer)
+}
+
+/// Checks whether the buffer is valid UTF-8 representing only code points
+/// less than or equal to U+00FF.
+///
+/// Fails fast. (I.e. returns before having read the whole buffer if UTF-8
+/// invalidity or code points above U+00FF are discovered.
+#[inline]
+pub fn is_utf8_latin1(buffer: &[u8]) -> bool {
+    is_utf8_latin1_impl(buffer).is_none()
+}
+
+/// Checks whether the buffer represents only code point less than or equal
+/// to U+00FF.
+///
+/// Fails fast. (I.e. returns before having read the whole buffer if code
+/// points above U+00FF are discovered.
+#[inline]
+pub fn is_str_latin1(buffer: &str) -> bool {
+    is_str_latin1_impl(buffer).is_none()
+}
+
+/// Checks whether the buffer represents only code point less than or equal
+/// to U+00FF.
+///
+/// May read the entire buffer even if it isn't all-Latin1. (I.e. the function
+/// is not guaranteed to fail fast.)
+#[inline]
+pub fn is_utf16_latin1(buffer: &[u16]) -> bool {
+    is_utf16_latin1_impl(buffer)
+}
+
+/// Checks whether a potentially-invalid UTF-8 buffer contains code points
+/// that trigger right-to-left processing.
+///
+/// The check is done on a Unicode block basis without regard to assigned
+/// vs. unassigned code points in the block. Additionally, the four
+/// RIGHT-TO-LEFT FOO controls in General Punctuation are checked for.
+/// Control characters that are technically bidi controls but do not cause
+/// right-to-left behavior without the presence of right-to-left characters
+/// or right-to-left controls are not checked for.
+///
+/// Returns `true` if the input is invalid UTF-8 or the input contains an
+/// RTL character. Returns `false` if the input is valid UTF-8 and contains
+/// no RTL characters.
+#[inline]
+pub fn is_utf8_bidi(buffer: &[u8]) -> bool {
+    // As of rustc 1.25.0-nightly (73ac5d6a8 2018-01-11), this is faster
+    // than UTF-8 validation followed by `is_str_bidi()` for German,
+    // Russian and Japanese. However, this is considerably slower for Thai.
+    // Chances are that the compiler makes some branch predictions that are
+    // unfortunate for Thai. Not spending the time to manually optimize
+    // further at this time, since it's unclear if this variant even has
+    // use cases. However, this is worth revisiting once Rust gets the
+    // ability to annotate relative priorities of match arms.
+
+    // U+058F: D6 8F
+    // U+0590: D6 90
+    // U+08FF: E0 A3 BF
+    // U+0900: E0 A4 80
+    //
+    // U+200F: E2 80 8F
+    // U+202B: E2 80 AB
+    // U+202E: E2 80 AE
+    // U+2067: E2 81 A7
+    //
+    // U+FB4F: EF AD 8F
+    // U+FB50: EF AD 90
+    // U+FDFF: EF B7 BF
+    // U+FE00: EF B8 80
+    //
+    // U+FE6F: EF B9 AF
+    // U+FE70: EF B9 B0
+    // U+FEFF: EF BB BF
+    // U+FF00: EF BC 80
+    //
+    // U+107FF: F0 90 9F BF
+    // U+10800: F0 90 A0 80
+    // U+10FFF: F0 90 BF BF
+    // U+11000: F0 91 80 80
+    //
+    // U+1E7FF: F0 9E 9F BF
+    // U+1E800: F0 9E A0 80
+    // U+1EFFF: F0 9E BF BF
+    // U+1F000: F0 9F 80 80
+    let mut bytes = buffer;
+    'outer: loop {
+        if let Some((mut byte, mut read)) = validate_ascii(bytes) {
+            // Check for the longest sequence to avoid checking twice for the
+            // multi-byte sequences.
+            if read + 4 <= bytes.len() {
+                'inner: loop {
+                    // At this point, `byte` is not included in `read`.
+                    match byte {
+                        0...0x7F => {
+                            // ASCII: go back to SIMD.
+                            read += 1;
+                            bytes = &bytes[read..];
+                            continue 'outer;
+                        }
+                        0xC2...0xD5 => {
+                            // Two-byte
+                            let second = bytes[read + 1];
+                            if (UTF8_TRAIL_INVALID[second as usize] & UTF8_NORMAL_TRAIL) != 0 {
+                                return true;
+                            }
+                            read += 2;
+                        }
+                        0xD6 => {
+                            // Two-byte
+                            let second = bytes[read + 1];
+                            if (UTF8_TRAIL_INVALID[second as usize] & UTF8_NORMAL_TRAIL) != 0 {
+                                return true;
+                            }
+                            // XXX consider folding the above and below checks
+                            if second > 0x8F {
+                                return true;
+                            }
+                            read += 2;
+                        }
+                        // two-byte starting with 0xD7 and above is bidi
+                        0xE1 | 0xE3...0xEC | 0xEE => {
+                            // Three-byte normal
+                            let second = bytes[read + 1];
+                            let third = bytes[read + 2];
+                            if ((UTF8_TRAIL_INVALID[second as usize] & UTF8_NORMAL_TRAIL) |
+                                (UTF8_TRAIL_INVALID[third as usize] & UTF8_NORMAL_TRAIL)) !=
+                               0 {
+                                return true;
+                            }
+                            read += 3;
+                        }
+                        0xE2 => {
+                            // Three-byte normal, potentially bidi
+                            let second = bytes[read + 1];
+                            let third = bytes[read + 2];
+                            if ((UTF8_TRAIL_INVALID[second as usize] & UTF8_NORMAL_TRAIL) |
+                                (UTF8_TRAIL_INVALID[third as usize] & UTF8_NORMAL_TRAIL)) !=
+                               0 {
+                                return true;
+                            }
+                            if second == 0x80 {
+                                if third == 0x8F || third == 0xAB || third == 0xAE {
+                                    return true;
+                                }
+                            } else if second == 0x81 {
+                                if third == 0xA7 {
+                                    return true;
+                                }
+                            }
+                            read += 3;
+                        }
+                        0xEF => {
+                            // Three-byte normal, potentially bidi
+                            let second = bytes[read + 1];
+                            let third = bytes[read + 2];
+                            if ((UTF8_TRAIL_INVALID[second as usize] & UTF8_NORMAL_TRAIL) |
+                                (UTF8_TRAIL_INVALID[third as usize] & UTF8_NORMAL_TRAIL)) !=
+                               0 {
+                                return true;
+                            }
+                            if in_inclusive_range8(second, 0xAD, 0xB7) {
+                                if second == 0xAD {
+                                    if third > 0x8F {
+                                        return true;
+                                    }
+                                } else {
+                                    return true;
+                                }
+                            } else if in_inclusive_range8(second, 0xB9, 0xBB) {
+                                if second == 0xB9 {
+                                    if third > 0xAF {
+                                        return true;
+                                    }
+                                } else {
+                                    return true;
+                                }
+                            }
+                            read += 3;
+                        }
+                        0xE0 => {
+                            // Three-byte special lower bound, potentially bidi
+                            let second = bytes[read + 1];
+                            let third = bytes[read + 2];
+                            if ((UTF8_TRAIL_INVALID[second as usize] &
+                                 UTF8_THREE_BYTE_SPECIAL_LOWER_BOUND_TRAIL) |
+                                (UTF8_TRAIL_INVALID[third as usize] & UTF8_NORMAL_TRAIL)) !=
+                               0 {
+                                return true;
+                            }
+                            // XXX can this be folded into the above validity check
+                            if second < 0xA4 {
+                                return true;
+                            }
+                            read += 3;
+                        }
+                        0xED => {
+                            // Three-byte special upper bound
+                            let second = bytes[read + 1];
+                            let third = bytes[read + 2];
+                            if ((UTF8_TRAIL_INVALID[second as usize] &
+                                 UTF8_THREE_BYTE_SPECIAL_UPPER_BOUND_TRAIL) |
+                                (UTF8_TRAIL_INVALID[third as usize] & UTF8_NORMAL_TRAIL)) !=
+                               0 {
+                                return true;
+                            }
+                            read += 3;
+                        }
+                        0xF1...0xF3 => {
+                            // Four-byte normal
+                            let second = bytes[read + 1];
+                            let third = bytes[read + 2];
+                            let fourth = bytes[read + 3];
+                            if ((UTF8_TRAIL_INVALID[second as usize] & UTF8_NORMAL_TRAIL) |
+                                (UTF8_TRAIL_INVALID[third as usize] & UTF8_NORMAL_TRAIL) |
+                                (UTF8_TRAIL_INVALID[fourth as usize] & UTF8_NORMAL_TRAIL)) !=
+                               0 {
+                                return true;
+                            }
+                            read += 4;
+                        }
+                        0xF0 => {
+                            // Four-byte special lower bound, potentially bidi
+                            let second = bytes[read + 1];
+                            let third = bytes[read + 2];
+                            let fourth = bytes[read + 3];
+                            if ((UTF8_TRAIL_INVALID[second as usize] &
+                                 UTF8_FOUR_BYTE_SPECIAL_LOWER_BOUND_TRAIL) |
+                                (UTF8_TRAIL_INVALID[third as usize] & UTF8_NORMAL_TRAIL) |
+                                (UTF8_TRAIL_INVALID[fourth as usize] & UTF8_NORMAL_TRAIL)) !=
+                               0 {
+                                return true;
+                            }
+                            if unsafe { unlikely(second == 0x90 || second == 0x9E) } {
+                                let third = bytes[read + 2];
+                                if third >= 0xA0 {
+                                    return true;
+                                }
+                            }
+                            read += 4;
+                        }
+                        0xF4 => {
+                            // Four-byte special upper bound
+                            let second = bytes[read + 1];
+                            let third = bytes[read + 2];
+                            let fourth = bytes[read + 3];
+                            if ((UTF8_TRAIL_INVALID[second as usize] &
+                                 UTF8_FOUR_BYTE_SPECIAL_UPPER_BOUND_TRAIL) |
+                                (UTF8_TRAIL_INVALID[third as usize] & UTF8_NORMAL_TRAIL) |
+                                (UTF8_TRAIL_INVALID[fourth as usize] & UTF8_NORMAL_TRAIL)) !=
+                               0 {
+                                return true;
+                            }
+                            read += 4;
+                        }
+                        _ => {
+                            // Invalid lead or bidi-only lead
+                            return true;
+                        }
+                    }
+                    if read + 4 > bytes.len() {
+                        if read == bytes.len() {
+                            return false;
+                        }
+                        byte = bytes[read];
+                        break 'inner;
+                    }
+                    byte = bytes[read];
+                    continue 'inner;
+                }
+            }
+            // We can't have a complete 4-byte sequence, but we could still have
+            // a complete shorter sequence.
+
+            // At this point, `byte` is not included in `read`.
+            match byte {
+                0...0x7F => {
+                    // ASCII: go back to SIMD.
+                    read += 1;
+                    bytes = &bytes[read..];
+                    continue 'outer;
+                }
+                0xC2...0xD5 => {
+                    // Two-byte
+                    let new_read = read + 2;
+                    if new_read > bytes.len() {
+                        return true;
+                    }
+                    let second = bytes[read + 1];
+                    if (UTF8_TRAIL_INVALID[second as usize] & UTF8_NORMAL_TRAIL) != 0 {
+                        return true;
+                    }
+                    read = new_read;
+                    // We need to deal with the case where we came here with 3 bytes
+                    // left, so we need to take a look at the last one.
+                    bytes = &bytes[read..];
+                    continue 'outer;
+                }
+                0xD6 => {
+                    // Two-byte, potentially bidi
+                    let new_read = read + 2;
+                    if new_read > bytes.len() {
+                        return true;
+                    }
+                    let second = bytes[read + 1];
+                    if (UTF8_TRAIL_INVALID[second as usize] & UTF8_NORMAL_TRAIL) != 0 {
+                        return true;
+                    }
+                    // XXX consider folding the above and below checks
+                    if second > 0x8F {
+                        return true;
+                    }
+                    read = new_read;
+                    // We need to deal with the case where we came here with 3 bytes
+                    // left, so we need to take a look at the last one.
+                    bytes = &bytes[read..];
+                    continue 'outer;
+                }
+                // two-byte starting with 0xD7 and above is bidi
+                0xE1 | 0xE3...0xEC | 0xEE => {
+                    // Three-byte normal
+                    let new_read = read + 3;
+                    if new_read > bytes.len() {
+                        return true;
+                    }
+                    let second = bytes[read + 1];
+                    let third = bytes[read + 2];
+                    if ((UTF8_TRAIL_INVALID[second as usize] & UTF8_NORMAL_TRAIL) |
+                        (UTF8_TRAIL_INVALID[third as usize] & UTF8_NORMAL_TRAIL)) !=
+                       0 {
+                        return true;
+                    }
+                }
+                0xE2 => {
+                    // Three-byte normal, potentially bidi
+                    let new_read = read + 3;
+                    if new_read > bytes.len() {
+                        return true;
+                    }
+                    let second = bytes[read + 1];
+                    let third = bytes[read + 2];
+                    if ((UTF8_TRAIL_INVALID[second as usize] & UTF8_NORMAL_TRAIL) |
+                        (UTF8_TRAIL_INVALID[third as usize] & UTF8_NORMAL_TRAIL)) !=
+                       0 {
+                        return true;
+                    }
+                    if second == 0x80 {
+                        if third == 0x8F || third == 0xAB || third == 0xAE {
+                            return true;
+                        }
+                    } else if second == 0x81 {
+                        if third == 0xA7 {
+                            return true;
+                        }
+                    }
+                }
+                0xEF => {
+                    // Three-byte normal, potentially bidi
+                    let new_read = read + 3;
+                    if new_read > bytes.len() {
+                        return true;
+                    }
+                    let second = bytes[read + 1];
+                    let third = bytes[read + 2];
+                    if ((UTF8_TRAIL_INVALID[second as usize] & UTF8_NORMAL_TRAIL) |
+                        (UTF8_TRAIL_INVALID[third as usize] & UTF8_NORMAL_TRAIL)) !=
+                       0 {
+                        return true;
+                    }
+                    if in_inclusive_range8(second, 0xAD, 0xB7) {
+                        if second == 0xAD {
+                            if third > 0x8F {
+                                return true;
+                            }
+                        } else {
+                            return true;
+                        }
+                    } else if in_inclusive_range8(second, 0xB9, 0xBB) {
+                        if second == 0xB9 {
+                            if third > 0xAF {
+                                return true;
+                            }
+                        } else {
+                            return true;
+                        }
+                    }
+                }
+                0xE0 => {
+                    // Three-byte special lower bound, potentially bidi
+                    let new_read = read + 3;
+                    if new_read > bytes.len() {
+                        return true;
+                    }
+                    let second = bytes[read + 1];
+                    let third = bytes[read + 2];
+                    if ((UTF8_TRAIL_INVALID[second as usize] &
+                         UTF8_THREE_BYTE_SPECIAL_LOWER_BOUND_TRAIL) |
+                        (UTF8_TRAIL_INVALID[third as usize] & UTF8_NORMAL_TRAIL)) !=
+                       0 {
+                        return true;
+                    }
+                    // XXX can this be folded into the above validity check
+                    if second < 0xA4 {
+                        return true;
+                    }
+                }
+                0xED => {
+                    // Three-byte special upper bound
+                    let new_read = read + 3;
+                    if new_read > bytes.len() {
+                        return true;
+                    }
+                    let second = bytes[read + 1];
+                    let third = bytes[read + 2];
+                    if ((UTF8_TRAIL_INVALID[second as usize] &
+                         UTF8_THREE_BYTE_SPECIAL_UPPER_BOUND_TRAIL) |
+                        (UTF8_TRAIL_INVALID[third as usize] & UTF8_NORMAL_TRAIL)) !=
+                       0 {
+                        return true;
+                    }
+                }
+                _ => {
+                    // Invalid lead, 4-byte lead or 2-byte bidi-only lead
+                    return true;
+                }
+            }
+            return false;
+        } else {
+            return false;
+        }
+    }
+}
+
+/// Checks whether a valid UTF-8 buffer contains code points that trigger
+/// right-to-left processing.
+///
+/// The check is done on a Unicode block basis without regard to assigned
+/// vs. unassigned code points in the block. Additionally, the four
+/// RIGHT-TO-LEFT FOO controls in General Punctuation are checked for.
+/// Control characters that are technically bidi controls but do not cause
+/// right-to-left behavior without the presence of right-to-left characters
+/// or right-to-left controls are not checked for.
+#[inline]
+pub fn is_str_bidi(buffer: &str) -> bool {
+    // U+058F: D6 8F
+    // U+0590: D6 90
+    // U+08FF: E0 A3 BF
+    // U+0900: E0 A4 80
+    //
+    // U+200F: E2 80 8F
+    // U+202B: E2 80 AB
+    // U+202E: E2 80 AE
+    // U+2067: E2 81 A7
+    //
+    // U+FB4F: EF AD 8F
+    // U+FB50: EF AD 90
+    // U+FDFF: EF B7 BF
+    // U+FE00: EF B8 80
+    //
+    // U+FE6F: EF B9 AF
+    // U+FE70: EF B9 B0
+    // U+FEFF: EF BB BF
+    // U+FF00: EF BC 80
+    //
+    // U+107FF: F0 90 9F BF
+    // U+10800: F0 90 A0 80
+    // U+10FFF: F0 90 BF BF
+    // U+11000: F0 91 80 80
+    //
+    // U+1E7FF: F0 9E 9F BF
+    // U+1E800: F0 9E A0 80
+    // U+1EFFF: F0 9E BF BF
+    // U+1F000: F0 9F 80 80
+    let mut bytes = buffer.as_bytes();
+    'outer: loop {
+        // TODO: Instead of just validating ASCII using SIMD, use SIMD
+        // to check for non-ASCII lead bytes, too, to quickly conclude
+        // that the vector consist entirely of CJK and below-Hebrew
+        // code points.
+        // Unfortunately, scripts above Arabic but below CJK share
+        // lead bytes with RTL.
+        if let Some((mut byte, mut read)) = validate_ascii(bytes) {
+            'inner: loop {
+                // At this point, `byte` is not included in `read`.
+                if byte < 0xE0 {
+                    if byte >= 0x80 {
+                        // Two-byte
+                        // Adding `unlikely` here improved throughput on
+                        // Russian plain text by 33%!
+                        if unsafe { unlikely(byte >= 0xD6) } {
+                            if byte == 0xD6 {
+                                let second = bytes[read + 1];
+                                if second > 0x8F {
+                                    return true;
+                                }
+                            } else {
+                                return true;
+                            }
+                        }
+                        read += 2;
+                    } else {
+                        // ASCII: write and go back to SIMD.
+                        read += 1;
+                        // Intuitively, we should go back to the outer loop only
+                        // if byte is 0x30 or above, so as to avoid trashing on
+                        // ASCII space, comma and period in non-Latin context.
+                        // However, the extra branch seems to cost more than it's
+                        // worth.
+                        bytes = &bytes[read..];
+                        continue 'outer;
+                    }
+                } else if byte < 0xF0 {
+                    // Three-byte
+                    if unsafe { unlikely(!in_inclusive_range8(byte, 0xE3, 0xEE) && byte != 0xE1) } {
+                        let second = bytes[read + 1];
+                        if byte == 0xE0 {
+                            if second < 0xA4 {
+                                return true;
+                            }
+                        } else if byte == 0xE2 {
+                            let third = bytes[read + 2];
+                            if second == 0x80 {
+                                if third == 0x8F || third == 0xAB || third == 0xAE {
+                                    return true;
+                                }
+                            } else if second == 0x81 {
+                                if third == 0xA7 {
+                                    return true;
+                                }
+                            }
+                        } else {
+                            debug_assert_eq!(byte, 0xEF);
+                            if in_inclusive_range8(second, 0xAD, 0xB7) {
+                                if second == 0xAD {
+                                    let third = bytes[read + 2];
+                                    if third > 0x8F {
+                                        return true;
+                                    }
+                                } else {
+                                    return true;
+                                }
+                            } else if in_inclusive_range8(second, 0xB9, 0xBB) {
+                                if second == 0xB9 {
+                                    let third = bytes[read + 2];
+                                    if third > 0xAF {
+                                        return true;
+                                    }
+                                } else {
+                                    return true;
+                                }
+                            }
+                        }
+                    }
+                    read += 3;
+                } else {
+                    // Four-byte
+                    let second = bytes[read + 1];
+                    if unsafe { unlikely(byte == 0xF0 && (second == 0x90 || second == 0x9E)) } {
+                        let third = bytes[read + 2];
+                        if third >= 0xA0 {
+                            return true;
+                        }
+                    }
+                    read += 4;
+                }
+                // The comparison is always < or == and never >, but including
+                // > here to let the compiler assume that < is true if this
+                // comparison is false.
+                if read >= bytes.len() {
+                    return false;
+                }
+                byte = bytes[read];
+                continue 'inner;
+            }
+        } else {
+            return false;
+        }
+    }
+}
+
+/// Checks whether a UTF-16 buffer contains code points that trigger
+/// right-to-left processing.
+///
+/// The check is done on a Unicode block basis without regard to assigned
+/// vs. unassigned code points in the block. Additionally, the four
+/// RIGHT-TO-LEFT FOO controls in General Punctuation are checked for.
+/// Control characters that are technically bidi controls but do not cause
+/// right-to-left behavior without the presence of right-to-left characters
+/// or right-to-left controls are not checked for.
+///
+/// Returns `true` if the input contains an RTL character or an unpaired
+/// high surrogate that could be the high half of an RTL character.
+/// Returns `false` if teh input contains neither RTL characters nor
+/// unpaired high surrogates that could be higher halves of RTL characters.
+#[inline]
+pub fn is_utf16_bidi(buffer: &[u16]) -> bool {
+    is_utf16_bidi_impl(buffer)
+}
+
+/// Checks whether a code point triggers right-to-left processing.
+///
+/// The check is done on a Unicode block basis without regard to assigned
+/// vs. unassigned code points in the block. Additionally, the four
+/// RIGHT-TO-LEFT FOO controls in General Punctuation are checked for.
+/// Control characters that are technically bidi controls but do not cause
+/// right-to-left behavior without the presence of right-to-left characters
+/// or right-to-left controls are not checked for.
+#[inline(always)]
+pub fn is_char_bidi(c: char) -> bool {
+    // Controls:
+    // Every control with RIGHT-TO-LEFT in its name in
+    // https://www.unicode.org/charts/PDF/U2000.pdf
+    // U+200F RLM
+    // U+202B RLE
+    // U+202E RLO
+    // U+2067 RLI
+    //
+    // BMP RTL:
+    // https://www.unicode.org/roadmaps/bmp/
+    // U+0590...U+08FF
+    // U+FB50...U+FDFF Arabic Presentation Forms A
+    // U+FE70...U+FEFF Arabic Presentation Forms B
+    //
+    // Supplementary RTL:
+    // https://www.unicode.org/roadmaps/smp/
+    // U+10800...U+10FFF (Lead surrogate U+D802 or U+D803)
+    // U+1E800...U+1EFFF (Lead surrogate U+D83A or U+D83B)
+    let code_point = c as u32;
+    if code_point < 0x0590 {
+        // Below Hebrew
+        return false;
+    }
+    if in_range32(code_point, 0x0900, 0xFB50) {
+        // Above Arabic Extended-A and below Arabic Presentation Forms
+        if in_inclusive_range32(code_point, 0x200F, 0x2067) {
+            // In the range that contains the RTL controls
+            return code_point == 0x200F || code_point == 0x202B || code_point == 0x202E ||
+                   code_point == 0x2067;
+        }
+        return false;
+    }
+    if code_point > 0x1EFFF {
+        // Above second astral RTL. (Emoji is here.)
+        return false;
+    }
+    if in_range32(code_point, 0x11000, 0x1E800) {
+        // Between astral RTL blocks
+        return false;
+    }
+    if in_range32(code_point, 0xFF00, 0x10800) {
+        // Above Arabic Presentations Forms B and below first
+        // astral RTL
+        return false;
+    }
+    if in_range32(code_point, 0xFE00, 0xFE70) {
+        // Between Arabic Presentations Forms
+        return false;
+    }
+    true
+}
+
+/// Checks whether a UTF-16 code unit triggers right-to-left processing.
+///
+/// The check is done on a Unicode block basis without regard to assigned
+/// vs. unassigned code points in the block. Additionally, the four
+/// RIGHT-TO-LEFT FOO controls in General Punctuation are checked for.
+/// Control characters that are technically bidi controls but do not cause
+/// right-to-left behavior without the presence of right-to-left characters
+/// or right-to-left controls are not checked for.
+///
+/// Since supplementary-plane right-to-left blocks are identifiable from the
+/// high surrogate without examining the low surrogate, this function returns
+/// `true` for such high surrogates making the function suitable for handling
+/// supplementary-plane text without decoding surrogate pairs to scalar
+/// values. Obviously, such high surrogates are then reported as right-to-left
+/// even if actually unpaired.
+#[inline(always)]
+pub fn is_utf16_code_unit_bidi(u: u16) -> bool {
+    if u < 0x0590 {
+        // Below Hebrew
+        return false;
+    }
+    if in_range16(u, 0x0900, 0xD802) {
+        // Above Arabic Extended-A and below first RTL surrogate
+        if in_inclusive_range16(u, 0x200F, 0x2067) {
+            // In the range that contains the RTL controls
+            return u == 0x200F || u == 0x202B || u == 0x202E || u == 0x2067;
+        }
+        return false;
+    }
+    if in_range16(u, 0xD83C, 0xFB50) {
+        // Between astral RTL high surrogates and Arabic Presentation Forms
+        // (Emoji is here)
+        return false;
+    }
+    if in_range16(u, 0xD804, 0xD83A) {
+        // Between RTL high surragates
+        return false;
+    }
+    if u > 0xFEFF {
+        // Above Arabic Presentation Forms
+        return false;
+    }
+    if in_range16(u, 0xFE00, 0xFE70) {
+        // Between Arabic Presentations Forms
+        return false;
+    }
+    true
+}
+
+/// Checks whether a potentially invalid UTF-8 buffer contains code points
+/// that trigger right-to-left processing or is all-Latin1.
+///
+/// Possibly more efficient than performing the checks separately.
+///
+/// Returns `Latin1Bidi::Latin1` if `is_utf8_latin1()` would return `true`.
+/// Otherwise, returns `Latin1Bidi::Bidi` if `is_utf8_bidi()` would return
+/// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`.
+#[inline]
+pub fn check_utf8_for_latin1_and_bidi(buffer: &[u8]) -> Latin1Bidi {
+    if let Some(offset) = is_utf8_latin1_impl(buffer) {
+        if is_utf8_bidi(&buffer[offset..]) {
+            Latin1Bidi::Bidi
+        } else {
+            Latin1Bidi::LeftToRight
+        }
+    } else {
+        Latin1Bidi::Latin1
+    }
+}
+
+/// Checks whether a valid UTF-8 buffer contains code points
+/// that trigger right-to-left processing or is all-Latin1.
+///
+/// Possibly more efficient than performing the checks separately.
+///
+/// Returns `Latin1Bidi::Latin1` if `is_str_latin1()` would return `true`.
+/// Otherwise, returns `Latin1Bidi::Bidi` if `is_str_bidi()` would return
+/// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`.
+#[inline]
+pub fn check_str_for_latin1_and_bidi(buffer: &str) -> Latin1Bidi {
+    // The transition from the latin1 check to the bidi check isn't
+    // optimal but not tweaking it to perfection today.
+    if let Some(offset) = is_str_latin1_impl(buffer) {
+        if is_str_bidi(&buffer[offset..]) {
+            Latin1Bidi::Bidi
+        } else {
+            Latin1Bidi::LeftToRight
+        }
+    } else {
+        Latin1Bidi::Latin1
+    }
+}
+
+/// Checks whether a potentially invalid UTF-16 buffer contains code points
+/// that trigger right-to-left processing or is all-Latin1.
+///
+/// Possibly more efficient than performing the checks separately.
+///
+/// Returns `Latin1Bidi::Latin1` if `is_utf16_latin1()` would return `true`.
+/// Otherwise, returns `Latin1Bidi::Bidi` if `is_utf16_bidi()` would return
+/// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`.
+#[inline]
+pub fn check_utf16_for_latin1_and_bidi(buffer: &[u16]) -> Latin1Bidi {
+    check_utf16_for_latin1_and_bidi_impl(buffer)
+}
+
+/// Converts potentially-invalid UTF-8 to valid UTF-16 with errors replaced
+/// with the REPLACEMENT CHARACTER.
+///
+/// The length of the destination buffer must be at least the length of the
+/// source buffer _plus one_.
+///
+/// Returns the number of `u16`s written.
+///
+/// # Panics
+///
+/// Panics if the destination buffer is shorter than stated above.
+#[inline]
+pub fn convert_utf8_to_utf16(src: &[u8], dst: &mut [u16]) -> usize {
+    // TODO: Can the + 1 be eliminated?
+    assert!(dst.len() >= src.len() + 1);
+    let mut decoder = Utf8Decoder::new_inner();
+    let mut total_read = 0usize;
+    let mut total_written = 0usize;
+    loop {
+        let (result, read, written) =
+            decoder.decode_to_utf16_raw(&src[total_read..], &mut dst[total_written..], true);
+        total_read += read;
+        total_written += written;
+        match result {
+            DecoderResult::InputEmpty => {
+                return total_written;
+            }
+            DecoderResult::OutputFull => {
+                unreachable!("The assert at the top of the function should have caught this.");
+            }
+            DecoderResult::Malformed(_, _) => {
+                // There should always be space for the U+FFFD, because
+                // otherwise we'd have gotten OutputFull already.
+                dst[total_written] = 0xFFFD;
+                total_written += 1;
+            }
+        }
+    }
+}
+
+/// Converts valid UTF-8 to valid UTF-16.
+///
+/// The length of the destination buffer must be at least the length of the
+/// source buffer.
+///
+/// Returns the number of `u16`s written.
+///
+/// # Panics
+///
+/// Panics if the destination buffer is shorter than stated above.
+#[inline]
+pub fn convert_str_to_utf16(src: &str, dst: &mut [u16]) -> usize {
+    assert!(
+        dst.len() >= src.len(),
+        "Destination must not be shorter than the source."
+    );
+    let bytes = src.as_bytes();
+    let mut read = 0;
+    let mut written = 0;
+    'outer: loop {
+        let mut byte = {
+            let src_remaining = &bytes[read..];
+            let dst_remaining = &mut dst[written..];
+            let length = src_remaining.len();
+            match unsafe {
+                      ascii_to_basic_latin(
+                    src_remaining.as_ptr(),
+                    dst_remaining.as_mut_ptr(),
+                    length,
+                )
+                  } {
+                None => {
+                    written += length;
+                    return written;
+                }
+                Some((non_ascii, consumed)) => {
+                    read += consumed;
+                    written += consumed;
+                    non_ascii
+                }
+            }
+        };
+        'inner: loop {
+            // At this point, `byte` is not included in `read`.
+            if byte < 0xE0 {
+                if byte >= 0x80 {
+                    // Two-byte
+                    let second = bytes[read + 1];
+                    let point = (((byte as u32) & 0x1Fu32) << 6) | (second as u32 & 0x3Fu32);
+                    dst[written] = point as u16;
+                    read += 2;
+                    written += 1;
+                } else {
+                    // ASCII: write and go back to SIMD.
+                    dst[written] = byte as u16;
+                    read += 1;
+                    written += 1;
+                    // Intuitively, we should go back to the outer loop only
+                    // if byte is 0x30 or above, so as to avoid trashing on
+                    // ASCII space, comma and period in non-Latin context.
+                    // However, the extra branch seems to cost more than it's
+                    // worth.
+                    continue 'outer;
+                }
+            } else if byte < 0xF0 {
+                // Three-byte
+                let second = bytes[read + 1];
+                let third = bytes[read + 2];
+                let point = (((byte as u32) & 0xFu32) << 12) | ((second as u32 & 0x3Fu32) << 6) |
+                            (third as u32 & 0x3Fu32);
+                dst[written] = point as u16;
+                read += 3;
+                written += 1;
+            } else {
+                // Four-byte
+                let second = bytes[read + 1];
+                let third = bytes[read + 2];
+                let fourth = bytes[read + 3];
+                let point = (((byte as u32) & 0x7u32) << 18) | ((second as u32 & 0x3Fu32) << 12) |
+                            ((third as u32 & 0x3Fu32) << 6) |
+                            (fourth as u32 & 0x3Fu32);
+                dst[written] = (0xD7C0 + (point >> 10)) as u16;
+                dst[written + 1] = (0xDC00 + (point & 0x3FF)) as u16;
+                read += 4;
+                written += 2;
+            }
+            // The comparison is always < or == and never >, but including
+            // > here to let the compiler assume that < is true if this
+            // comparison is false.
+            if read >= src.len() {
+                return written;
+            }
+            byte = bytes[read];
+            continue 'inner;
+        }
+    }
+}
+
+/// Converts potentially-invalid UTF-16 to valid UTF-8 with errors replaced
+/// with the REPLACEMENT CHARACTER.
+///
+/// The length of the destination buffer must be at least the length of the
+/// source buffer times three _plus one_.
+///
+/// Returns the number of bytes written.
+///
+/// # Panics
+///
+/// Panics if the destination buffer is shorter than stated above.
+///
+/// # Safety
+///
+/// Note that this function may write garbage beyond the number of bytes
+/// indicated by the return value, so using a `&mut str` interpreted as
+/// `&mut [u8]` as the destination is not safe. If you want to convert into
+/// a `&mut str`, use `convert_utf16_to_str()` instead of this function.
+#[inline]
+pub fn convert_utf16_to_utf8(src: &[u16], dst: &mut [u8]) -> usize {
+    assert!(dst.len() >= src.len() * 3 + 1);
+    let mut encoder = Utf8Encoder;
+    let (result, _, written) = encoder.encode_from_utf16_raw(src, dst, true);
+    debug_assert!(result == EncoderResult::InputEmpty);
+    written
+}
+
+/// Converts potentially-invalid UTF-16 to valid UTF-8 with errors replaced
+/// with the REPLACEMENT CHARACTER such that the validity of the output is
+/// signaled using the Rust type system.
+///
+/// The length of the destination buffer must be at least the length of the
+/// source buffer times three _plus one_.
+///
+/// Returns the number of bytes written.
+///
+/// # Panics
+///
+/// Panics if the destination buffer is shorter than stated above.
+#[inline]
+pub fn convert_utf16_to_str(src: &[u16], dst: &mut str) -> usize {
+    let bytes: &mut [u8] = unsafe { ::std::mem::transmute(dst) };
+    let written = convert_utf16_to_utf8(src, bytes);
+    let len = bytes.len();
+    let mut trail = written;
+    let max = ::std::cmp::min(len, trail + STRIDE_SIZE);
+    while trail < max {
+        bytes[trail] = 0;
+        trail += 1;
+    }
+    while trail < len && ((bytes[trail] & 0xC0) == 0x80) {
+        bytes[trail] = 0;
+        trail += 1;
+    }
+    written
+}
+
+/// Converts bytes whose unsigned value is interpreted as Unicode code point
+/// (i.e. U+0000 to U+00FF, inclusive) to UTF-16.
+///
+/// The length of the destination buffer must be at least the length of the
+/// source buffer.
+///
+/// The number of `u16`s written equals the length of the source buffer.
+///
+/// # Panics
+///
+/// Panics if the destination buffer is shorter than stated above.
+#[inline]
+pub fn convert_latin1_to_utf16(src: &[u8], dst: &mut [u16]) {
+    assert!(
+        dst.len() >= src.len(),
+        "Destination must not be shorter than the source."
+    );
+    // TODO: On aarch64, the safe version autovectorizes to the same unpacking
+    // instructions and this code, but, yet, the autovectorized version is
+    // faster.
+    unsafe {
+        unpack_latin1(src.as_ptr(), dst.as_mut_ptr(), src.len());
+    }
+}
+
+/// Converts bytes whose unsigned value is interpreted as Unicode code point
+/// (i.e. U+0000 to U+00FF, inclusive) to UTF-8.
+///
+/// The length of the destination buffer must be at least the length of the
+/// source buffer times two.
+///
+/// Returns the number of bytes written.
+///
+/// # Panics
+///
+/// Panics if the destination buffer is shorter than stated above.
+///
+/// # Safety
+///
+/// Note that this function may write garbage beyond the number of bytes
+/// indicated by the return value, so using a `&mut str` interpreted as
+/// `&mut [u8]` as the destination is not safe. If you want to convert into
+/// a `&mut str`, use `convert_utf16_to_str()` instead of this function.
+#[inline]
+pub fn convert_latin1_to_utf8(src: &[u8], dst: &mut [u8]) -> usize {
+    assert!(
+        dst.len() >= src.len() * 2,
+        "Destination must not be shorter than the source times two."
+    );
+    let src_len = src.len();
+    let src_ptr = src.as_ptr();
+    let dst_ptr = dst.as_mut_ptr();
+    let mut total_read = 0usize;
+    let mut total_written = 0usize;
+    loop {
+        // src can't advance more than dst
+        let src_left = src_len - total_read;
+        if let Some((non_ascii, consumed)) =
+            unsafe {
+                ascii_to_ascii(
+                    src_ptr.offset(total_read as isize),
+                    dst_ptr.offset(total_written as isize),
+                    src_left,
+                )
+            } {
+            total_read += consumed + 1;
+            total_written += consumed;
+
+            let code_point = non_ascii as u32;
+            dst[total_written] = ((code_point >> 6) | 0xC0u32) as u8;
+            total_written += 1;
+            dst[total_written] = ((code_point as u32 & 0x3Fu32) | 0x80u32) as u8;
+            total_written += 1;
+            continue;
+        }
+        return total_written + src_left;
+    }
+}
+
+/// Converts bytes whose unsigned value is interpreted as Unicode code point
+/// (i.e. U+0000 to U+00FF, inclusive) to UTF-8 such that the validity of the
+/// output is signaled using the Rust type system.
+///
+/// The length of the destination buffer must be at least the length of the
+/// source buffer times two.
+///
+/// Returns the number of bytes written.
+///
+/// # Panics
+///
+/// Panics if the destination buffer is shorter than stated above.
+#[inline]
+pub fn convert_latin1_to_str(src: &[u8], dst: &mut str) -> usize {
+    let bytes: &mut [u8] = unsafe { ::std::mem::transmute(dst) };
+    let written = convert_latin1_to_utf8(src, bytes);
+    let len = bytes.len();
+    let mut trail = written;
+    let max = ::std::cmp::min(len, trail + STRIDE_SIZE);
+    while trail < max {
+        bytes[trail] = 0;
+        trail += 1;
+    }
+    while trail < len && ((bytes[trail] & 0xC0) == 0x80) {
+        bytes[trail] = 0;
+        trail += 1;
+    }
+    written
+}
+
+/// If the input is valid UTF-8 representing only Unicode code points from
+/// U+0000 to U+00FF, inclusive, converts the input into output that
+/// represents the value of each code point as the unsigned byte value of
+/// each output byte.
+///
+/// If the input does not fulfill the condition stated above, this function
+/// does something that is memory-safe without any promises about any
+/// properties of the output. In particular, callers shouldn't assume the
+/// output to be the same across crate versions or CPU architectures and
+/// should not assume that non-ASCII input can't map to ASCII output.
+///
+/// The length of the destination buffer must be at least the length of the
+/// source buffer.
+///
+/// Returns the number of bytes written.
+///
+/// # Panics
+///
+/// Panics if the destination buffer is shorter than stated above.
+#[inline]
+pub fn convert_utf8_to_latin1_lossy(src: &[u8], dst: &mut [u8]) -> usize {
+    assert!(
+        dst.len() >= src.len(),
+        "Destination must not be shorter than the source."
+    );
+    let src_len = src.len();
+    let src_ptr = src.as_ptr();
+    let dst_ptr = dst.as_mut_ptr();
+    let mut total_read = 0usize;
+    let mut total_written = 0usize;
+    loop {
+        // dst can't advance more than src
+        let src_left = src_len - total_read;
+        if let Some((non_ascii, consumed)) =
+            unsafe {
+                ascii_to_ascii(
+                    src_ptr.offset(total_read as isize),
+                    dst_ptr.offset(total_written as isize),
+                    src_left,
+                )
+            } {
+            total_read += consumed + 1;
+            total_written += consumed;
+
+            if total_read == src_len {
+                return total_written;
+            }
+
+            let trail = src[total_read];
+            total_read += 1;
+
+            dst[total_written] = (((non_ascii as u32 & 0x1Fu32) << 6) |
+                                  (trail as u32 & 0x3Fu32)) as u8;
+            total_written += 1;
+            continue;
+        }
+        return total_written + src_left;
+    }
+}
+
+/// If the input is valid UTF-16 representing only Unicode code points from
+/// U+0000 to U+00FF, inclusive, converts the input into output that
+/// represents the value of each code point as the unsigned byte value of
+/// each output byte.
+///
+/// If the input does not fulfill the condition stated above, this function
+/// does something that is memory-safe without any promises about any
+/// properties of the output. In particular, callers shouldn't assume the
+/// output to be the same across crate versions or CPU architectures and
+/// should not assume that non-Basic Latin input can't map to ASCII output.
+///
+/// The length of the destination buffer must be at least the length of the
+/// source buffer.
+///
+/// The number of bytes written equals the length of the source buffer.
+///
+/// # Panics
+///
+/// Panics if the destination buffer is shorter than stated above.
+#[inline]
+pub fn convert_utf16_to_latin1_lossy(src: &[u16], dst: &mut [u8]) {
+    assert!(
+        dst.len() >= src.len(),
+        "Destination must not be shorter than the source."
+    );
+    unsafe {
+        pack_latin1(src.as_ptr(), dst.as_mut_ptr(), src.len());
+    }
+}
+
+/// Returns the index of the first unpaired surrogate or, if the input is
+/// valid UTF-16 in its entirety, the length of the input.
+#[inline]
+pub fn utf16_valid_up_to(buffer: &[u16]) -> usize {
+    utf16_valid_up_to_impl(buffer)
+}
+
+/// Replaces unpaired surrogates in the input with the REPLACEMENT CHARACTER.
+#[inline]
+pub fn ensure_utf16_validity(buffer: &mut [u16]) {
+    let mut offset = 0;
+    loop {
+        offset += utf16_valid_up_to(&buffer[offset..]);
+        if offset == buffer.len() {
+            return;
+        }
+        buffer[offset] = 0xFFFD;
+        offset += 1;
+    }
+}
+
+/// Copies ASCII from source to destination up to the first non-ASCII byte
+/// (or the end of the input if it is ASCII in its entirety).
+///
+/// The length of the destination buffer must be at least the length of the
+/// source buffer.
+///
+/// Returns the number of bytes written.
+///
+/// # Panics
+///
+/// Panics if the destination buffer is shorter than stated above.
+#[inline]
+pub fn copy_ascii_to_ascii(src: &[u8], dst: &mut [u8]) -> usize {
+    assert!(
+        dst.len() >= src.len(),
+        "Destination must not be shorter than the source."
+    );
+    if let Some((_, consumed)) =
+        unsafe { ascii_to_ascii(src.as_ptr(), dst.as_mut_ptr(), src.len()) } {
+        consumed
+    } else {
+        src.len()
+    }
+}
+
+/// Copies ASCII from source to destination zero-extending it to UTF-16 up to
+/// the first non-ASCII byte (or the end of the input if it is ASCII in its
+/// entirety).
+///
+/// The length of the destination buffer must be at least the length of the
+/// source buffer.
+///
+/// Returns the number of `u16`s written.
+///
+/// # Panics
+///
+/// Panics if the destination buffer is shorter than stated above.
+#[inline]
+pub fn copy_ascii_to_basic_latin(src: &[u8], dst: &mut [u16]) -> usize {
+    assert!(
+        dst.len() >= src.len(),
+        "Destination must not be shorter than the source."
+    );
+    if let Some((_, consumed)) =
+        unsafe { ascii_to_basic_latin(src.as_ptr(), dst.as_mut_ptr(), src.len()) } {
+        consumed
+    } else {
+        src.len()
+    }
+}
+
+/// Copies Basic Latin from source to destination narrowing it to ASCII up to
+/// the first non-Basic Latin code unit (or the end of the input if it is
+/// Basic Latin in its entirety).
+///
+/// The length of the destination buffer must be at least the length of the
+/// source buffer.
+///
+/// Returns the number of bytes written.
+///
+/// # Panics
+///
+/// Panics if the destination buffer is shorter than stated above.
+#[inline]
+pub fn copy_basic_latin_to_ascii(src: &[u16], dst: &mut [u8]) -> usize {
+    assert!(
+        dst.len() >= src.len(),
+        "Destination must not be shorter than the source."
+    );
+    if let Some((_, consumed)) =
+        unsafe { basic_latin_to_ascii(src.as_ptr(), dst.as_mut_ptr(), src.len()) } {
+        consumed
+    } else {
+        src.len()
+    }
+}
+
+// Any copyright to the test code below this comment is dedicated to the
+// Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_is_ascii_success() {
+        let mut src: Vec<u8> = Vec::with_capacity(128);
+        src.resize(128, 0);
+        for i in 0..src.len() {
+            src[i] = i as u8;
+        }
+        for i in 0..src.len() {
+            assert!(is_ascii(&src[i..]));
+        }
+    }
+
+    #[test]
+    fn test_is_ascii_fail() {
+        let mut src: Vec<u8> = Vec::with_capacity(128);
+        src.resize(128, 0);
+        for i in 0..src.len() {
+            src[i] = i as u8;
+        }
+        for i in 0..src.len() {
+            let tail = &mut src[i..];
+            for j in 0..tail.len() {
+                tail[j] = 0xA0;
+                assert!(!is_ascii(tail));
+            }
+        }
+    }
+
+    #[test]
+    fn test_is_basic_latin_success() {
+        let mut src: Vec<u16> = Vec::with_capacity(128);
+        src.resize(128, 0);
+        for i in 0..src.len() {
+            src[i] = i as u16;
+        }
+        for i in 0..src.len() {
+            assert!(is_basic_latin(&src[i..]));
+        }
+    }
+
+    #[test]
+    fn test_is_basic_latin_fail() {
+        let mut src: Vec<u16> = Vec::with_capacity(128);
+        src.resize(128, 0);
+        for i in 0..src.len() {
+            src[i] = i as u16;
+        }
+        for i in 0..src.len() {
+            let tail = &mut src[i..];
+            for j in 0..tail.len() {
+                tail[j] = 0xA0;
+                assert!(!is_basic_latin(tail));
+            }
+        }
+    }
+
+    #[test]
+    fn test_is_utf16_latin1_success() {
+        let mut src: Vec<u16> = Vec::with_capacity(256);
+        src.resize(256, 0);
+        for i in 0..src.len() {
+            src[i] = i as u16;
+        }
+        for i in 0..src.len() {
+            assert!(is_utf16_latin1(&src[i..]));
+            assert_eq!(
+                check_utf16_for_latin1_and_bidi(&src[i..]),
+                Latin1Bidi::Latin1
+            );
+        }
+    }
+
+    #[test]
+    fn test_is_utf16_latin1_fail() {
+        let mut src: Vec<u16> = Vec::with_capacity(256);
+        src.resize(256, 0);
+        for i in 0..src.len() {
+            src[i] = i as u16;
+        }
+        for i in 0..src.len() {
+            let tail = &mut src[i..];
+            for j in 0..tail.len() {
+                tail[j] = 0x100 + j as u16;
+                assert!(!is_utf16_latin1(tail));
+                assert_ne!(check_utf16_for_latin1_and_bidi(tail), Latin1Bidi::Latin1);
+            }
+        }
+    }
+
+    #[test]
+    fn test_is_str_latin1_success() {
+        let mut src: Vec<u16> = Vec::with_capacity(256);
+        src.resize(256, 0);
+        for i in 0..src.len() {
+            src[i] = i as u16;
+        }
+        for i in 0..src.len() {
+            let s = String::from_utf16(&src[i..]).unwrap();
+            assert!(is_str_latin1(&s[..]));
+            assert_eq!(check_str_for_latin1_and_bidi(&s[..]), Latin1Bidi::Latin1);
+        }
+    }
+
+    #[test]
+    fn test_is_str_latin1_fail() {
+        let mut src: Vec<u16> = Vec::with_capacity(256);
+        src.resize(256, 0);
+        for i in 0..src.len() {
+            src[i] = i as u16;
+        }
+        for i in 0..src.len() {
+            let tail = &mut src[i..];
+            for j in 0..tail.len() {
+                tail[j] = 0x100 + j as u16;
+                let s = String::from_utf16(tail).unwrap();
+                assert!(!is_str_latin1(&s[..]));
+                assert_ne!(check_str_for_latin1_and_bidi(&s[..]), Latin1Bidi::Latin1);
+            }
+        }
+    }
+
+    #[test]
+    fn test_is_utf8_latin1_success() {
+        let mut src: Vec<u16> = Vec::with_capacity(256);
+        src.resize(256, 0);
+        for i in 0..src.len() {
+            src[i] = i as u16;
+        }
+        for i in 0..src.len() {
+            let s = String::from_utf16(&src[i..]).unwrap();
+            assert!(is_utf8_latin1(s.as_bytes()));
+            assert_eq!(
+                check_utf8_for_latin1_and_bidi(s.as_bytes()),
+                Latin1Bidi::Latin1
+            );
+        }
+    }
+
+    #[test]
+    fn test_is_utf8_latin1_fail() {
+        let mut src: Vec<u16> = Vec::with_capacity(256);
+        src.resize(256, 0);
+        for i in 0..src.len() {
+            src[i] = i as u16;
+        }
+        for i in 0..src.len() {
+            let tail = &mut src[i..];
+            for j in 0..tail.len() {
+                tail[j] = 0x100 + j as u16;
+                let s = String::from_utf16(tail).unwrap();
+                assert!(!is_utf8_latin1(s.as_bytes()));
+                assert_ne!(
+                    check_utf8_for_latin1_and_bidi(s.as_bytes()),
+                    Latin1Bidi::Latin1
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn test_is_utf8_latin1_invalid() {
+        assert!(!is_utf8_latin1(b"\xC3"));
+        assert!(!is_utf8_latin1(b"a\xC3"));
+        assert!(!is_utf8_latin1(b"\xFF"));
+        assert!(!is_utf8_latin1(b"a\xFF"));
+        assert!(!is_utf8_latin1(b"\xC3\xFF"));
+        assert!(!is_utf8_latin1(b"a\xC3\xFF"));
+    }
+
+    #[test]
+    fn test_convert_utf8_to_utf16() {
+        let src = "abcdefghijklmnopqrstu\u{1F4A9}v\u{2603}w\u{00B6}xyzz";
+        let mut dst: Vec<u16> = Vec::with_capacity(src.len() + 1);
+        dst.resize(src.len() + 1, 0);
+        let len = convert_utf8_to_utf16(src.as_bytes(), &mut dst[..]);
+        dst.truncate(len);
+        let reference: Vec<u16> = src.encode_utf16().collect();
+        assert_eq!(dst, reference);
+    }
+
+    #[test]
+    fn test_convert_str_to_utf16() {
+        let src = "abcdefghijklmnopqrstu\u{1F4A9}v\u{2603}w\u{00B6}xyzz";
+        let mut dst: Vec<u16> = Vec::with_capacity(src.len());
+        dst.resize(src.len(), 0);
+        let len = convert_str_to_utf16(src, &mut dst[..]);
+        dst.truncate(len);
+        let reference: Vec<u16> = src.encode_utf16().collect();
+        assert_eq!(dst, reference);
+    }
+
+    #[test]
+    fn test_convert_utf16_to_utf8() {
+        let reference = "abcdefghijklmnopqrstu\u{1F4A9}v\u{2603}w\u{00B6}xyzz";
+        let src: Vec<u16> = reference.encode_utf16().collect();
+        let mut dst: Vec<u8> = Vec::with_capacity(src.len() * 3 + 1);
+        dst.resize(src.len() * 3 + 1, 0);
+        let len = convert_utf16_to_utf8(&src[..], &mut dst[..]);
+        dst.truncate(len);
+        assert_eq!(dst, reference.as_bytes());
+    }
+
+    #[test]
+    fn test_convert_latin1_to_utf16() {
+        let mut src: Vec<u8> = Vec::with_capacity(256);
+        src.resize(256, 0);
+        let mut reference: Vec<u16> = Vec::with_capacity(256);
+        reference.resize(256, 0);
+        for i in 0..256 {
+            src[i] = i as u8;
+            reference[i] = i as u16;
+        }
+        let mut dst: Vec<u16> = Vec::with_capacity(src.len());
+        dst.resize(src.len(), 0);
+        convert_latin1_to_utf16(&src[..], &mut dst[..]);
+        assert_eq!(dst, reference);
+    }
+
+    #[test]
+    fn test_convert_latin1_to_utf8() {
+        let mut src: Vec<u8> = Vec::with_capacity(256);
+        src.resize(256, 0);
+        let mut reference: Vec<u16> = Vec::with_capacity(256);
+        reference.resize(256, 0);
+        for i in 0..256 {
+            src[i] = i as u8;
+            reference[i] = i as u16;
+        }
+        let s = String::from_utf16(&reference[..]).unwrap();
+        let mut dst: Vec<u8> = Vec::with_capacity(src.len() * 2);
+        dst.resize(src.len() * 2, 0);
+        let len = convert_latin1_to_utf8(&src[..], &mut dst[..]);
+        dst.truncate(len);
+        assert_eq!(&dst[..], s.as_bytes());
+    }
+
+    #[test]
+    fn test_convert_utf8_to_latin1_lossy() {
+        let mut reference: Vec<u8> = Vec::with_capacity(256);
+        reference.resize(256, 0);
+        let mut src16: Vec<u16> = Vec::with_capacity(256);
+        src16.resize(256, 0);
+        for i in 0..256 {
+            src16[i] = i as u16;
+            reference[i] = i as u8;
+        }
+        let src = String::from_utf16(&src16[..]).unwrap();
+        let mut dst: Vec<u8> = Vec::with_capacity(src.len());
+        dst.resize(src.len(), 0);
+        let len = convert_utf8_to_latin1_lossy(src.as_bytes(), &mut dst[..]);
+        dst.truncate(len);
+        assert_eq!(dst, reference);
+    }
+
+    #[test]
+    fn test_convert_utf16_to_latin1_lossy() {
+        let mut src: Vec<u16> = Vec::with_capacity(256);
+        src.resize(256, 0);
+        let mut reference: Vec<u8> = Vec::with_capacity(256);
+        reference.resize(256, 0);
+        for i in 0..256 {
+            src[i] = i as u16;
+            reference[i] = i as u8;
+        }
+        let mut dst: Vec<u8> = Vec::with_capacity(src.len());
+        dst.resize(src.len(), 0);
+        convert_utf16_to_latin1_lossy(&src[..], &mut dst[..]);
+        assert_eq!(dst, reference);
+    }
+
+    #[test]
+    fn test_utf16_valid_up_to() {
+        let valid = vec![0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16,
+                         0x2603u16, 0xD83Du16, 0xDCA9u16, 0x00B6u16];
+        assert_eq!(utf16_valid_up_to(&valid[..]), 16);;
+        let lone_high = vec![0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16,
+                             0u16, 0u16, 0x2603u16, 0xD83Du16, 0x00B6u16];
+        assert_eq!(utf16_valid_up_to(&lone_high[..]), 14);;
+        let lone_low = vec![0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16,
+                            0u16, 0u16, 0x2603u16, 0xDCA9u16, 0x00B6u16];
+        assert_eq!(utf16_valid_up_to(&lone_low[..]), 14);;
+        let lone_high_at_end = vec![0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16,
+                                    0u16, 0u16, 0u16, 0x2603u16, 0x00B6u16, 0xD83Du16];
+        assert_eq!(utf16_valid_up_to(&lone_high_at_end[..]), 15);;
+    }
+
+    #[test]
+    fn test_ensure_utf16_validity() {
+        let mut src = vec![0u16, 0xD83Du16, 0u16, 0u16, 0u16, 0xD83Du16, 0xDCA9u16, 0u16, 0u16,
+                           0u16, 0u16, 0u16, 0u16, 0xDCA9u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16,
+                           0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16];
+        let reference = vec![0u16, 0xFFFDu16, 0u16, 0u16, 0u16, 0xD83Du16, 0xDCA9u16, 0u16, 0u16,
+                             0u16, 0u16, 0u16, 0u16, 0xFFFDu16, 0u16, 0u16, 0u16, 0u16, 0u16,
+                             0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16,
+                             0u16];
+        ensure_utf16_validity(&mut src[..]);
+        assert_eq!(src, reference);
+    }
+
+    #[test]
+    fn test_is_char_bidi() {
+        assert!(!is_char_bidi('a'));
+        assert!(!is_char_bidi('\u{03B1}'));
+        assert!(!is_char_bidi('\u{3041}'));
+        assert!(!is_char_bidi('\u{1F4A9}'));
+        assert!(!is_char_bidi('\u{FE00}'));
+        assert!(!is_char_bidi('\u{202C}'));
+        assert!(is_char_bidi('\u{0590}'));
+        assert!(is_char_bidi('\u{08FF}'));
+        assert!(is_char_bidi('\u{061C}'));
+        assert!(is_char_bidi('\u{FB50}'));
+        assert!(is_char_bidi('\u{FDFF}'));
+        assert!(is_char_bidi('\u{FE70}'));
+        assert!(is_char_bidi('\u{FEFF}'));
+        assert!(is_char_bidi('\u{200F}'));
+        assert!(is_char_bidi('\u{202B}'));
+        assert!(is_char_bidi('\u{202E}'));
+        assert!(is_char_bidi('\u{2067}'));
+        assert!(is_char_bidi('\u{10800}'));
+        assert!(is_char_bidi('\u{10FFF}'));
+        assert!(is_char_bidi('\u{1E800}'));
+        assert!(is_char_bidi('\u{1EFFF}'));
+    }
+
+    #[test]
+    fn test_is_utf16_code_unit_bidi() {
+        assert!(!is_utf16_code_unit_bidi(0x0062));
+        assert!(!is_utf16_code_unit_bidi(0x03B1));
+        assert!(!is_utf16_code_unit_bidi(0x3041));
+        assert!(!is_utf16_code_unit_bidi(0xD801));
+        assert!(!is_utf16_code_unit_bidi(0xFE00));
+        assert!(!is_utf16_code_unit_bidi(0x202C));
+        assert!(is_utf16_code_unit_bidi(0x0590));
+        assert!(is_utf16_code_unit_bidi(0x08FF));
+        assert!(is_utf16_code_unit_bidi(0x061C));
+        assert!(is_utf16_code_unit_bidi(0xFB50));
+        assert!(is_utf16_code_unit_bidi(0xFDFF));
+        assert!(is_utf16_code_unit_bidi(0xFE70));
+        assert!(is_utf16_code_unit_bidi(0xFEFF));
+        assert!(is_utf16_code_unit_bidi(0x200F));
+        assert!(is_utf16_code_unit_bidi(0x202B));
+        assert!(is_utf16_code_unit_bidi(0x202E));
+        assert!(is_utf16_code_unit_bidi(0x2067));
+        assert!(is_utf16_code_unit_bidi(0xD802));
+        assert!(is_utf16_code_unit_bidi(0xD803));
+        assert!(is_utf16_code_unit_bidi(0xD83A));
+        assert!(is_utf16_code_unit_bidi(0xD83B));
+    }
+
+    #[test]
+    fn test_is_str_bidi() {
+        assert!(!is_str_bidi("abcdefghijklmnopaabcdefghijklmnop"));
+        assert!(!is_str_bidi("abcdefghijklmnop\u{03B1}abcdefghijklmnop"));
+        assert!(!is_str_bidi("abcdefghijklmnop\u{3041}abcdefghijklmnop"));
+        assert!(!is_str_bidi("abcdefghijklmnop\u{1F4A9}abcdefghijklmnop"));
+        assert!(!is_str_bidi("abcdefghijklmnop\u{FE00}abcdefghijklmnop"));
+        assert!(!is_str_bidi("abcdefghijklmnop\u{202C}abcdefghijklmnop"));
+        assert!(is_str_bidi("abcdefghijklmnop\u{0590}abcdefghijklmnop"));
+        assert!(is_str_bidi("abcdefghijklmnop\u{08FF}abcdefghijklmnop"));
+        assert!(is_str_bidi("abcdefghijklmnop\u{061C}abcdefghijklmnop"));
+        assert!(is_str_bidi("abcdefghijklmnop\u{FB50}abcdefghijklmnop"));
+        assert!(is_str_bidi("abcdefghijklmnop\u{FDFF}abcdefghijklmnop"));
+        assert!(is_str_bidi("abcdefghijklmnop\u{FE70}abcdefghijklmnop"));
+        assert!(is_str_bidi("abcdefghijklmnop\u{FEFF}abcdefghijklmnop"));
+        assert!(is_str_bidi("abcdefghijklmnop\u{200F}abcdefghijklmnop"));
+        assert!(is_str_bidi("abcdefghijklmnop\u{202B}abcdefghijklmnop"));
+        assert!(is_str_bidi("abcdefghijklmnop\u{202E}abcdefghijklmnop"));
+        assert!(is_str_bidi("abcdefghijklmnop\u{2067}abcdefghijklmnop"));
+        assert!(is_str_bidi("abcdefghijklmnop\u{10800}abcdefghijklmnop"));
+        assert!(is_str_bidi("abcdefghijklmnop\u{10FFF}abcdefghijklmnop"));
+        assert!(is_str_bidi("abcdefghijklmnop\u{1E800}abcdefghijklmnop"));
+        assert!(is_str_bidi("abcdefghijklmnop\u{1EFFF}abcdefghijklmnop"));
+    }
+
+    #[test]
+    fn test_is_utf8_bidi() {
+        assert!(!is_utf8_bidi("abcdefghijklmnopaabcdefghijklmnop".as_bytes()));
+        assert!(!is_utf8_bidi("abcdefghijklmnop\u{03B1}abcdefghijklmnop".as_bytes()));
+        assert!(!is_utf8_bidi("abcdefghijklmnop\u{3041}abcdefghijklmnop".as_bytes()));
+        assert!(!is_utf8_bidi("abcdefghijklmnop\u{1F4A9}abcdefghijklmnop".as_bytes()));
+        assert!(!is_utf8_bidi("abcdefghijklmnop\u{FE00}abcdefghijklmnop".as_bytes()));
+        assert!(!is_utf8_bidi("abcdefghijklmnop\u{202C}abcdefghijklmnop".as_bytes()));
+        assert!(is_utf8_bidi("abcdefghijklmnop\u{0590}abcdefghijklmnop".as_bytes()));
+        assert!(is_utf8_bidi("abcdefghijklmnop\u{08FF}abcdefghijklmnop".as_bytes()));
+        assert!(is_utf8_bidi("abcdefghijklmnop\u{061C}abcdefghijklmnop".as_bytes()));
+        assert!(is_utf8_bidi("abcdefghijklmnop\u{FB50}abcdefghijklmnop".as_bytes()));
+        assert!(is_utf8_bidi("abcdefghijklmnop\u{FDFF}abcdefghijklmnop".as_bytes()));
+        assert!(is_utf8_bidi("abcdefghijklmnop\u{FE70}abcdefghijklmnop".as_bytes()));
+        assert!(is_utf8_bidi("abcdefghijklmnop\u{FEFF}abcdefghijklmnop".as_bytes()));
+        assert!(is_utf8_bidi("abcdefghijklmnop\u{200F}abcdefghijklmnop".as_bytes()));
+        assert!(is_utf8_bidi("abcdefghijklmnop\u{202B}abcdefghijklmnop".as_bytes()));
+        assert!(is_utf8_bidi("abcdefghijklmnop\u{202E}abcdefghijklmnop".as_bytes()));
+        assert!(is_utf8_bidi("abcdefghijklmnop\u{2067}abcdefghijklmnop".as_bytes()));
+        assert!(is_utf8_bidi("abcdefghijklmnop\u{10800}abcdefghijklmnop".as_bytes()));
+        assert!(is_utf8_bidi("abcdefghijklmnop\u{10FFF}abcdefghijklmnop".as_bytes()));
+        assert!(is_utf8_bidi("abcdefghijklmnop\u{1E800}abcdefghijklmnop".as_bytes()));
+        assert!(is_utf8_bidi("abcdefghijklmnop\u{1EFFF}abcdefghijklmnop".as_bytes()));
+    }
+
+    #[test]
+    fn test_is_utf16_bidi() {
+        assert!(
+            !is_utf16_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x0062, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            )
+        );
+        assert!(
+            !is_utf16_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x03B1, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            )
+        );
+        assert!(
+            !is_utf16_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x3041, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            )
+        );
+        assert!(
+            !is_utf16_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD801, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            )
+        );
+        assert!(
+            !is_utf16_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFE00, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            )
+        );
+        assert!(
+            !is_utf16_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x202C, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            )
+        );
+        assert!(
+            is_utf16_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x0590, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            )
+        );
+        assert!(
+            is_utf16_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x08FF, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            )
+        );
+        assert!(
+            is_utf16_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x061C, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            )
+        );
+        assert!(
+            is_utf16_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFB50, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            )
+        );
+        assert!(
+            is_utf16_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFDFF, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            )
+        );
+        assert!(
+            is_utf16_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFE70, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            )
+        );
+        assert!(
+            is_utf16_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFEFF, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            )
+        );
+        assert!(
+            is_utf16_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x200F, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            )
+        );
+        assert!(
+            is_utf16_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x202B, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            )
+        );
+        assert!(
+            is_utf16_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x202E, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            )
+        );
+        assert!(
+            is_utf16_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x2067, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            )
+        );
+        assert!(
+            is_utf16_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD802, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            )
+        );
+        assert!(
+            is_utf16_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD803, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            )
+        );
+        assert!(
+            is_utf16_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD83A, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            )
+        );
+        assert!(
+            is_utf16_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD83B, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            )
+        );
+
+        assert!(
+            is_utf16_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x0590, 0x3041, 0x62, 0x63,
+                  0x64, 0x65, 0x66, 0x67, 0x68, 0x69]
+            )
+        );
+    }
+
+    #[test]
+    fn test_check_str_for_latin1_and_bidi() {
+        assert_ne!(
+            check_str_for_latin1_and_bidi("abcdefghijklmnopaabcdefghijklmnop"),
+            Latin1Bidi::Bidi
+        );
+        assert_ne!(
+            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{03B1}abcdefghijklmnop"),
+            Latin1Bidi::Bidi
+        );
+        assert_ne!(
+            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{3041}abcdefghijklmnop"),
+            Latin1Bidi::Bidi
+        );
+        assert_ne!(
+            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{1F4A9}abcdefghijklmnop"),
+            Latin1Bidi::Bidi
+        );
+        assert_ne!(
+            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{FE00}abcdefghijklmnop"),
+            Latin1Bidi::Bidi
+        );
+        assert_ne!(
+            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{202C}abcdefghijklmnop"),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{0590}abcdefghijklmnop"),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{08FF}abcdefghijklmnop"),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{061C}abcdefghijklmnop"),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{FB50}abcdefghijklmnop"),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{FDFF}abcdefghijklmnop"),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{FE70}abcdefghijklmnop"),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{FEFF}abcdefghijklmnop"),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{200F}abcdefghijklmnop"),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{202B}abcdefghijklmnop"),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{202E}abcdefghijklmnop"),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{2067}abcdefghijklmnop"),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{10800}abcdefghijklmnop"),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{10FFF}abcdefghijklmnop"),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{1E800}abcdefghijklmnop"),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_str_for_latin1_and_bidi("abcdefghijklmnop\u{1EFFF}abcdefghijklmnop"),
+            Latin1Bidi::Bidi
+        );
+    }
+
+    #[test]
+    fn test_check_utf8_for_latin1_and_bidi() {
+        assert_ne!(
+            check_utf8_for_latin1_and_bidi("abcdefghijklmnopaabcdefghijklmnop".as_bytes()),
+            Latin1Bidi::Bidi
+        );
+        assert_ne!(
+            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{03B1}abcdefghijklmnop".as_bytes()),
+            Latin1Bidi::Bidi
+        );
+        assert_ne!(
+            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{3041}abcdefghijklmnop".as_bytes()),
+            Latin1Bidi::Bidi
+        );
+        assert_ne!(
+            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{1F4A9}abcdefghijklmnop".as_bytes()),
+            Latin1Bidi::Bidi
+        );
+        assert_ne!(
+            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{FE00}abcdefghijklmnop".as_bytes()),
+            Latin1Bidi::Bidi
+        );
+        assert_ne!(
+            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{202C}abcdefghijklmnop".as_bytes()),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{0590}abcdefghijklmnop".as_bytes()),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{08FF}abcdefghijklmnop".as_bytes()),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{061C}abcdefghijklmnop".as_bytes()),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{FB50}abcdefghijklmnop".as_bytes()),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{FDFF}abcdefghijklmnop".as_bytes()),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{FE70}abcdefghijklmnop".as_bytes()),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{FEFF}abcdefghijklmnop".as_bytes()),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{200F}abcdefghijklmnop".as_bytes()),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{202B}abcdefghijklmnop".as_bytes()),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{202E}abcdefghijklmnop".as_bytes()),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{2067}abcdefghijklmnop".as_bytes()),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{10800}abcdefghijklmnop".as_bytes()),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{10FFF}abcdefghijklmnop".as_bytes()),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{1E800}abcdefghijklmnop".as_bytes()),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{1EFFF}abcdefghijklmnop".as_bytes()),
+            Latin1Bidi::Bidi
+        );
+    }
+
+    #[test]
+    fn test_check_utf16_for_latin1_and_bidi() {
+        assert_ne!(
+            check_utf16_for_latin1_and_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x0062, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            ),
+            Latin1Bidi::Bidi
+        );
+        assert_ne!(
+            check_utf16_for_latin1_and_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x03B1, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            ),
+            Latin1Bidi::Bidi
+        );
+        assert_ne!(
+            check_utf16_for_latin1_and_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x3041, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            ),
+            Latin1Bidi::Bidi
+        );
+        assert_ne!(
+            check_utf16_for_latin1_and_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD801, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            ),
+            Latin1Bidi::Bidi
+        );
+        assert_ne!(
+            check_utf16_for_latin1_and_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFE00, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            ),
+            Latin1Bidi::Bidi
+        );
+        assert_ne!(
+            check_utf16_for_latin1_and_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x202C, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            ),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf16_for_latin1_and_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x0590, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            ),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf16_for_latin1_and_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x08FF, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            ),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf16_for_latin1_and_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x061C, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            ),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf16_for_latin1_and_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFB50, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            ),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf16_for_latin1_and_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFDFF, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            ),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf16_for_latin1_and_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFE70, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            ),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf16_for_latin1_and_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFEFF, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            ),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf16_for_latin1_and_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x200F, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            ),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf16_for_latin1_and_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x202B, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            ),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf16_for_latin1_and_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x202E, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            ),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf16_for_latin1_and_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x2067, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            ),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf16_for_latin1_and_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD802, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            ),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf16_for_latin1_and_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD803, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            ),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf16_for_latin1_and_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD83A, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            ),
+            Latin1Bidi::Bidi
+        );
+        assert_eq!(
+            check_utf16_for_latin1_and_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD83B, 0x62, 0x63, 0x64, 0x65,
+                  0x66, 0x67, 0x68, 0x69]
+            ),
+            Latin1Bidi::Bidi
+        );
+
+        assert_eq!(
+            check_utf16_for_latin1_and_bidi(
+                &[0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x0590, 0x3041, 0x62, 0x63,
+                  0x64, 0x65, 0x66, 0x67, 0x68, 0x69]
+            ),
+            Latin1Bidi::Bidi
+        );
+    }
+
+    #[inline(always)]
+    pub fn reference_is_char_bidi(c: char) -> bool {
+        match c {
+            '\u{0590}'...'\u{08FF}' |
+            '\u{FB50}'...'\u{FDFF}' |
+            '\u{FE70}'...'\u{FEFF}' |
+            '\u{10800}'...'\u{10FFF}' |
+            '\u{1E800}'...'\u{1EFFF}' |
+            '\u{200F}' |
+            '\u{202B}' |
+            '\u{202E}' |
+            '\u{2067}' => true,
+            _ => false,
+        }
+    }
+
+    #[inline(always)]
+    pub fn reference_is_utf16_code_unit_bidi(u: u16) -> bool {
+        match u {
+            0x0590...0x08FF | 0xFB50...0xFDFF | 0xFE70...0xFEFF | 0xD802 | 0xD803 | 0xD83A |
+            0xD83B | 0x200F | 0x202B | 0x202E | 0x2067 => true,
+            _ => false,
+        }
+    }
+
+    #[test]
+    fn test_is_char_bidi_thoroughly() {
+        for i in 0..0xD800u32 {
+            let c: char = unsafe { ::std::mem::transmute(i) };
+            assert_eq!(is_char_bidi(c), reference_is_char_bidi(c));
+        }
+        for i in 0xE000..0x110000u32 {
+            let c: char = unsafe { ::std::mem::transmute(i) };
+            assert_eq!(is_char_bidi(c), reference_is_char_bidi(c));
+        }
+    }
+
+    #[test]
+    fn test_is_utf16_code_unit_bidi_thoroughly() {
+        for i in 0..0x10000u32 {
+            let u = i as u16;
+            assert_eq!(
+                is_utf16_code_unit_bidi(u),
+                reference_is_utf16_code_unit_bidi(u)
+            );
+        }
+    }
+
+    #[test]
+    fn test_is_str_bidi_thoroughly() {
+        let mut buf = [0; 4];
+        for i in 0..0xD800u32 {
+            let c: char = unsafe { ::std::mem::transmute(i) };
+            assert_eq!(
+                is_str_bidi(c.encode_utf8(&mut buf[..])),
+                reference_is_char_bidi(c)
+            );
+        }
+        for i in 0xE000..0x110000u32 {
+            let c: char = unsafe { ::std::mem::transmute(i) };
+            assert_eq!(
+                is_str_bidi(c.encode_utf8(&mut buf[..])),
+                reference_is_char_bidi(c)
+            );
+        }
+    }
+
+    #[test]
+    fn test_is_utf8_bidi_thoroughly() {
+        let mut buf = [0; 8];
+        for i in 0..0xD800u32 {
+            let c: char = unsafe { ::std::mem::transmute(i) };
+            let expect = reference_is_char_bidi(c);
+            {
+                let len = {
+                    let bytes = c.encode_utf8(&mut buf[..]).as_bytes();
+                    assert_eq!(is_utf8_bidi(bytes), expect);
+                    bytes.len()
+                };
+                {
+                    let tail = &mut buf[len..];
+                    for b in tail.iter_mut() {
+                        *b = 0;
+                    }
+                }
+            }
+            assert_eq!(is_utf8_bidi(&buf[..]), expect);
+        }
+        for i in 0xE000..0x110000u32 {
+            let c: char = unsafe { ::std::mem::transmute(i) };
+            let expect = reference_is_char_bidi(c);
+            {
+                let len = {
+                    let bytes = c.encode_utf8(&mut buf[..]).as_bytes();
+                    assert_eq!(is_utf8_bidi(bytes), expect);
+                    bytes.len()
+                };
+                {
+                    let tail = &mut buf[len..];
+                    for b in tail.iter_mut() {
+                        *b = 0;
+                    }
+                }
+            }
+            assert_eq!(is_utf8_bidi(&buf[..]), expect);
+        }
+    }
+
+    #[test]
+    fn test_is_utf8_bidi_edge_cases() {
+        assert!(!is_utf8_bidi(b"\xD5\xBF\x61"));
+        assert!(!is_utf8_bidi(b"\xD6\x80\x61"));
+        assert!(!is_utf8_bidi(b"abc"));
+        assert!(is_utf8_bidi(b"\xD5\xBF\xC2"));
+        assert!(is_utf8_bidi(b"\xD6\x80\xC2"));
+        assert!(is_utf8_bidi(b"ab\xC2"));
+    }
+}
diff --git a/third_party/rust/encoding_rs/src/simd_funcs.rs b/third_party/rust/encoding_rs/src/simd_funcs.rs
index 1614cdb367a3..867a3f1cf161 100644
--- a/third_party/rust/encoding_rs/src/simd_funcs.rs
+++ b/third_party/rust/encoding_rs/src/simd_funcs.rs
@@ -21,6 +21,7 @@ pub unsafe fn load16_unaligned(ptr: *const u8) -> u8x16 {
     simd
 }
 
+#[allow(dead_code)]
 #[inline(always)]
 pub unsafe fn load16_aligned(ptr: *const u8) -> u8x16 {
     *(ptr as *const u8x16)
@@ -31,6 +32,7 @@ pub unsafe fn store16_unaligned(ptr: *mut u8, s: u8x16) {
     ::std::ptr::copy_nonoverlapping(&s as *const u8x16 as *const u8, ptr, 16);
 }
 
+#[allow(dead_code)]
 #[inline(always)]
 pub unsafe fn store16_aligned(ptr: *mut u8, s: u8x16) {
     *(ptr as *mut u8x16) = s;
@@ -43,6 +45,7 @@ pub unsafe fn load8_unaligned(ptr: *const u16) -> u16x8 {
     simd
 }
 
+#[allow(dead_code)]
 #[inline(always)]
 pub unsafe fn load8_aligned(ptr: *const u16) -> u16x8 {
     *(ptr as *const u16x8)
@@ -53,6 +56,7 @@ pub unsafe fn store8_unaligned(ptr: *mut u16, s: u16x8) {
     ::std::ptr::copy_nonoverlapping(&s as *const u16x8 as *const u8, ptr as *mut u8, 16);
 }
 
+#[allow(dead_code)]
 #[inline(always)]
 pub unsafe fn store8_aligned(ptr: *mut u16, s: u16x8) {
     *(ptr as *mut u16x8) = s;
@@ -89,7 +93,7 @@ cfg_if! {
 cfg_if! {
     if #[cfg(target_feature = "sse2")] {
         #[inline(always)]
-        pub fn is_ascii(s: u8x16) -> bool {
+        pub fn simd_is_ascii(s: u8x16) -> bool {
             unsafe {
                 let signed: i8x16 = ::std::mem::transmute_copy(&s);
                 x86_mm_movemask_epi8(signed) == 0
@@ -101,16 +105,42 @@ cfg_if! {
         }
 
         #[inline(always)]
-        pub fn is_ascii(s: u8x16) -> bool {
+        pub fn simd_is_ascii(s: u8x16) -> bool {
             unsafe {
                 aarch64_vmaxvq_u8(s) < 0x80
             }
         }
     } else {
         #[inline(always)]
-        pub fn is_ascii(s: u8x16) -> bool {
-            let highest_ascii = u8x16::splat(0x7F);
-            !s.gt(highest_ascii).any()
+        pub fn simd_is_ascii(s: u8x16) -> bool {
+            let above_ascii = u8x16::splat(0x80);
+            s.lt(above_ascii).all()
+        }
+    }
+}
+
+cfg_if! {
+    if #[cfg(target_feature = "sse2")] {
+        #[inline(always)]
+        pub fn simd_is_str_latin1(s: u8x16) -> bool {
+            if simd_is_ascii(s) {
+                return true;
+            }
+            let above_str_latin1 = u8x16::splat(0xC4);
+            s.lt(above_str_latin1).all()
+        }
+    } else if #[cfg(target_arch = "aarch64")]{
+        #[inline(always)]
+        pub fn simd_is_str_latin1(s: u8x16) -> bool {
+            unsafe {
+                aarch64_vmaxvq_u8(s) < 0xC4
+            }
+        }
+    } else {
+        #[inline(always)]
+        pub fn simd_is_str_latin1(s: u8x16) -> bool {
+            let above_str_latin1 = u8x16::splat(0xC4);
+            s.lt(above_str_latin1).all()
         }
     }
 }
@@ -122,20 +152,107 @@ cfg_if! {
         }
 
         #[inline(always)]
-        pub fn is_basic_latin(s: u16x8) -> bool {
+        pub fn simd_is_basic_latin(s: u16x8) -> bool {
             unsafe {
                 aarch64_vmaxvq_u16(s) < 0x80
             }
         }
+
+        #[inline(always)]
+        pub fn simd_is_latin1(s: u16x8) -> bool {
+            unsafe {
+                aarch64_vmaxvq_u16(s) < 0x100
+            }
+        }
     } else {
         #[inline(always)]
-        pub fn is_basic_latin(s: u16x8) -> bool {
-            let highest_ascii = u16x8::splat(0x7F);
-            !s.gt(highest_ascii).any()
+        pub fn simd_is_basic_latin(s: u16x8) -> bool {
+            let above_ascii = u16x8::splat(0x80);
+            s.lt(above_ascii).all()
+        }
+
+        #[inline(always)]
+        pub fn simd_is_latin1(s: u16x8) -> bool {
+            // For some reason, on SSE2 this formulation
+            // seems faster in this case while the above
+            // function is better the other way round...
+            let highest_latin1 = u16x8::splat(0xFF);
+            !s.gt(highest_latin1).any()
         }
     }
 }
 
+#[inline(always)]
+pub fn contains_surrogates(s: u16x8) -> bool {
+    let mask = u16x8::splat(0xF800);
+    let surrogate_bits = u16x8::splat(0xD800);
+    (s & mask).eq(surrogate_bits).any()
+}
+
+cfg_if! {
+    if #[cfg(target_arch = "aarch64")]{
+        macro_rules! aarch64_return_false_if_below_hebrew {
+            ($s:ident) => ({
+                unsafe {
+                    if aarch64_vmaxvq_u16($s) < 0x0590 {
+                        return false;
+                    }
+                }
+            })
+        }
+
+        macro_rules! non_aarch64_return_false_if_all {
+            ($s:ident) => ()
+        }
+    } else {
+        macro_rules! aarch64_return_false_if_below_hebrew {
+            ($s:ident) => ()
+        }
+
+        macro_rules! non_aarch64_return_false_if_all {
+            ($s:ident) => ({
+                if $s.all() {
+                    return false;
+                }
+            })
+        }
+    }
+}
+
+macro_rules! in_range16x8 {
+    ($s:ident, $start:expr, $end:expr) => ({
+        // SIMD sub is wrapping
+        ($s - u16x8::splat($start)).lt(u16x8::splat($end - $start))
+    })
+}
+
+#[inline(always)]
+pub fn is_u16x8_bidi(s: u16x8) -> bool {
+    // We try to first quickly refute the RTLness of the vector. If that
+    // fails, we do the real RTL check, so in that case we end up wasting
+    // the work for the up-front quick checks. Even the quick-check is
+    // two-fold in order to return `false` ASAP if everything is below
+    // Hebrew.
+
+    aarch64_return_false_if_below_hebrew!(s);
+
+    let below_hebrew = s.lt(u16x8::splat(0x0590));
+
+    non_aarch64_return_false_if_all!(below_hebrew);
+
+    if (below_hebrew | in_range16x8!(s, 0x0900, 0x200F) | in_range16x8!(s, 0x2068, 0xD802)).all() {
+        return false;
+    }
+
+    // Quick refutation failed. Let's do the full check.
+
+    (in_range16x8!(s, 0x0590, 0x0900) | in_range16x8!(s, 0xFB50, 0xFE00) |
+     in_range16x8!(s, 0xFE70, 0xFF00) | in_range16x8!(s, 0xD802, 0xD804) |
+     in_range16x8!(s, 0xD83A, 0xD83C) | s.eq(u16x8::splat(0x200F)) |
+     s.eq(u16x8::splat(0x202B)) | s.eq(u16x8::splat(0x202E)) | s.eq(u16x8::splat(0x2067)))
+            .any()
+}
+
 #[inline(always)]
 pub fn simd_unpack(s: u8x16) -> (u16x8, u16x8) {
     unsafe {
@@ -206,7 +323,7 @@ mod tests {
     }
 
     #[test]
-    fn test_is_basic_latin_success() {
+    fn test_simd_is_basic_latin_success() {
         let ascii: [u8; 16] = [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x70, 0x71,
                                0x72, 0x73, 0x74, 0x75, 0x76];
         let basic_latin: [u16; 16] = [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x70,
@@ -216,7 +333,7 @@ mod tests {
         let mut vec = Vec::with_capacity(16);
         vec.resize(16, 0u8);
         let ptr = vec.as_mut_ptr();
-        assert!(is_basic_latin(first | second));
+        assert!(simd_is_basic_latin(first | second));
         unsafe {
             store16_unaligned(ptr, simd_pack(first, second));
         }
@@ -224,46 +341,46 @@ mod tests {
     }
 
     #[test]
-    fn test_is_basic_latin_c0() {
+    fn test_simd_is_basic_latin_c0() {
         let input: [u16; 16] = [0x61, 0x62, 0x63, 0x81, 0x65, 0x66, 0x67, 0x68, 0x69, 0x70, 0x71,
                                 0x72, 0x73, 0x74, 0x75, 0x76];
         let first = unsafe { load8_unaligned(input.as_ptr()) };
         let second = unsafe { load8_unaligned(input.as_ptr().offset(8)) };
-        assert!(!is_basic_latin(first | second));
+        assert!(!simd_is_basic_latin(first | second));
     }
 
     #[test]
-    fn test_is_basic_latin_0fff() {
+    fn test_simd_is_basic_latin_0fff() {
         let input: [u16; 16] = [0x61, 0x62, 0x63, 0x0FFF, 0x65, 0x66, 0x67, 0x68, 0x69, 0x70,
                                 0x71, 0x72, 0x73, 0x74, 0x75, 0x76];
         let first = unsafe { load8_unaligned(input.as_ptr()) };
         let second = unsafe { load8_unaligned(input.as_ptr().offset(8)) };
-        assert!(!is_basic_latin(first | second));
+        assert!(!simd_is_basic_latin(first | second));
     }
 
     #[test]
-    fn test_is_basic_latin_ffff() {
+    fn test_simd_is_basic_latin_ffff() {
         let input: [u16; 16] = [0x61, 0x62, 0x63, 0xFFFF, 0x65, 0x66, 0x67, 0x68, 0x69, 0x70,
                                 0x71, 0x72, 0x73, 0x74, 0x75, 0x76];
         let first = unsafe { load8_unaligned(input.as_ptr()) };
         let second = unsafe { load8_unaligned(input.as_ptr().offset(8)) };
-        assert!(!is_basic_latin(first | second));
+        assert!(!simd_is_basic_latin(first | second));
     }
 
     #[test]
-    fn test_is_ascii_success() {
+    fn test_simd_is_ascii_success() {
         let ascii: [u8; 16] = [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x70, 0x71,
                                0x72, 0x73, 0x74, 0x75, 0x76];
         let simd = unsafe { load16_unaligned(ascii.as_ptr()) };
-        assert!(is_ascii(simd));
+        assert!(simd_is_ascii(simd));
     }
 
     #[test]
-    fn test_is_ascii_failure() {
+    fn test_simd_is_ascii_failure() {
         let input: [u8; 16] = [0x61, 0x62, 0x63, 0x64, 0x81, 0x66, 0x67, 0x68, 0x69, 0x70, 0x71,
                                0x72, 0x73, 0x74, 0x75, 0x76];
         let simd = unsafe { load16_unaligned(input.as_ptr()) };
-        assert!(!is_ascii(simd));
+        assert!(!simd_is_ascii(simd));
     }
 
     #[cfg(target_feature = "sse2")]
diff --git a/third_party/rust/encoding_rs/src/testing.rs b/third_party/rust/encoding_rs/src/testing.rs
index eeeea00efa2e..724eb60579de 100644
--- a/third_party/rust/encoding_rs/src/testing.rs
+++ b/third_party/rust/encoding_rs/src/testing.rs
@@ -22,13 +22,20 @@ pub fn decode(encoding: &'static Encoding, bytes: &[u8], expect: &str) {
         }
         vec.extend_from_slice(bytes);
         string.push_str(expect);
-        decode_without_padding(encoding, &vec[..], &string[..]);
+        decode_without_padding_impl(encoding, &vec[..], &string[..], i);
     }
 }
 
 pub fn decode_without_padding(encoding: &'static Encoding, bytes: &[u8], expect: &str) {
-    decode_to_utf8(encoding, bytes, expect);
-    decode_to_utf16(encoding, bytes, &utf16_from_utf8(expect)[..]);
+    decode_without_padding_impl(encoding, bytes, expect, 0);
+}
+
+fn decode_without_padding_impl(encoding: &'static Encoding,
+                               bytes: &[u8],
+                               expect: &str,
+                               padding: usize) {
+    decode_to_utf8_impl(encoding, bytes, expect, padding);
+    decode_to_utf16_impl(encoding, bytes, &utf16_from_utf8(expect)[..], padding);
     decode_to_string(encoding, bytes, expect);
 }
 
@@ -56,40 +63,116 @@ pub fn encode_without_padding(encoding: &'static Encoding, string: &str, expect:
 }
 
 pub fn decode_to_utf16(encoding: &'static Encoding, bytes: &[u8], expect: &[u16]) {
+    decode_to_utf16_impl(encoding, bytes, expect, 0);
+}
+
+pub fn decode_to_utf16_impl(encoding: &'static Encoding,
+                            bytes: &[u8],
+                            expect: &[u16],
+                            padding: usize) {
+    for i in padding..bytes.len() {
+        let (head, tail) = bytes.split_at(i);
+        decode_to_utf16_with_boundary(encoding, head, tail, expect);
+    }
+}
+
+pub fn decode_to_utf16_with_boundary(encoding: &'static Encoding,
+                                     head: &[u8],
+                                     tail: &[u8],
+                                     expect: &[u16]) {
     let mut decoder = encoding.new_decoder();
-    let mut dest: Vec<u16> =
-        Vec::with_capacity(decoder.max_utf16_buffer_length(bytes.len()).unwrap());
+    let mut dest: Vec<u16> = Vec::with_capacity(
+        decoder
+            .max_utf16_buffer_length(head.len() + tail.len())
+            .unwrap()
+    );
     let capacity = dest.capacity();
     dest.resize(capacity, 0u16);
-    let (complete, read, written, _) = decoder.decode_to_utf16(bytes, &mut dest, true);
-    match complete {
-        CoderResult::InputEmpty => {}
-        CoderResult::OutputFull => {
-            unreachable!();
+    let mut total_read = 0;
+    let mut total_written = 0;
+    {
+        let (complete, read, written, _) = decoder.decode_to_utf16(head, &mut dest, false);
+        match complete {
+            CoderResult::InputEmpty => {}
+            CoderResult::OutputFull => {
+                unreachable!();
+            }
         }
+        total_read += read;
+        total_written += written;
     }
-    assert_eq!(read, bytes.len());
-    assert_eq!(written, expect.len());
-    dest.truncate(written);
+    {
+        let (complete, read, written, _) =
+            decoder.decode_to_utf16(tail, &mut dest[total_written..], true);
+        match complete {
+            CoderResult::InputEmpty => {}
+            CoderResult::OutputFull => {
+                unreachable!();
+            }
+        }
+        total_read += read;
+        total_written += written;
+    }
+    assert_eq!(total_read, head.len() + tail.len());
+    assert_eq!(total_written, expect.len());
+    dest.truncate(total_written);
     assert_eq!(&dest[..], expect);
 }
 
 pub fn decode_to_utf8(encoding: &'static Encoding, bytes: &[u8], expect: &str) {
+    decode_to_utf8_impl(encoding, bytes, expect, 0);
+}
+
+pub fn decode_to_utf8_impl(encoding: &'static Encoding,
+                           bytes: &[u8],
+                           expect: &str,
+                           padding: usize) {
+    for i in padding..bytes.len() {
+        let (head, tail) = bytes.split_at(i);
+        decode_to_utf8_with_boundary(encoding, head, tail, expect);
+    }
+}
+
+pub fn decode_to_utf8_with_boundary(encoding: &'static Encoding,
+                                    head: &[u8],
+                                    tail: &[u8],
+                                    expect: &str) {
     let mut decoder = encoding.new_decoder();
-    let mut dest: Vec<u8> =
-        Vec::with_capacity(decoder.max_utf8_buffer_length(bytes.len()).unwrap());
+    let mut dest: Vec<u8> = Vec::with_capacity(
+        decoder
+            .max_utf8_buffer_length(head.len() + tail.len())
+            .unwrap()
+    );
     let capacity = dest.capacity();
     dest.resize(capacity, 0u8);
-    let (complete, read, written, _) = decoder.decode_to_utf8(bytes, &mut dest, true);
-    match complete {
-        CoderResult::InputEmpty => {}
-        CoderResult::OutputFull => {
-            unreachable!();
+    let mut total_read = 0;
+    let mut total_written = 0;
+    {
+        let (complete, read, written, _) = decoder.decode_to_utf8(head, &mut dest, false);
+        match complete {
+            CoderResult::InputEmpty => {}
+            CoderResult::OutputFull => {
+                unreachable!();
+            }
         }
+        total_read += read;
+        total_written += written;
     }
-    assert_eq!(read, bytes.len());
-    assert_eq!(written, expect.len());
-    dest.truncate(written);
+    {
+        let (complete, read, written, _) =
+            decoder.decode_to_utf8(tail, &mut dest[total_written..], true);
+        match complete {
+            CoderResult::InputEmpty => {}
+            CoderResult::OutputFull => {
+                unreachable!();
+            }
+        }
+        total_read += read;
+        total_written += written;
+    }
+    assert_eq!(total_read, head.len() + tail.len());
+    assert_eq!(total_written, expect.len());
+    dest.truncate(total_written);
     assert_eq!(&dest[..], expect.as_bytes());
 }
 
diff --git a/third_party/rust/encoding_rs/src/utf_8.rs b/third_party/rust/encoding_rs/src/utf_8.rs
index a31587a314b8..9920c523f757 100644
--- a/third_party/rust/encoding_rs/src/utf_8.rs
+++ b/third_party/rust/encoding_rs/src/utf_8.rs
@@ -34,21 +34,21 @@ cfg_if! {
     }
 }
 
-const UTF8_NORMAL_TRAIL: u8 = 1 << 3;
+pub const UTF8_NORMAL_TRAIL: u8 = 1 << 3;
 
-const UTF8_THREE_BYTE_SPECIAL_LOWER_BOUND_TRAIL: u8 = 1 << 4;
+pub const UTF8_THREE_BYTE_SPECIAL_LOWER_BOUND_TRAIL: u8 = 1 << 4;
 
-const UTF8_THREE_BYTE_SPECIAL_UPPER_BOUND_TRAIL: u8 = 1 << 5;
+pub const UTF8_THREE_BYTE_SPECIAL_UPPER_BOUND_TRAIL: u8 = 1 << 5;
 
-const UTF8_FOUR_BYTE_SPECIAL_LOWER_BOUND_TRAIL: u8 = 1 << 6;
+pub const UTF8_FOUR_BYTE_SPECIAL_LOWER_BOUND_TRAIL: u8 = 1 << 6;
 
-const UTF8_FOUR_BYTE_SPECIAL_UPPER_BOUND_TRAIL: u8 = 1 << 7;
+pub const UTF8_FOUR_BYTE_SPECIAL_UPPER_BOUND_TRAIL: u8 = 1 << 7;
 
 // BEGIN GENERATED CODE. PLEASE DO NOT EDIT.
 // Instead, please regenerate using generate-encoding-data.py
 
 /// Bit is 1 if the trail is invalid.
-static UTF8_TRAIL_INVALID: [u8; 256] =
+pub static UTF8_TRAIL_INVALID: [u8; 256] =
     [248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248,
      248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248,
      248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248,
@@ -433,16 +433,18 @@ pub struct Utf8Decoder {
 }
 
 impl Utf8Decoder {
+    pub fn new_inner() -> Utf8Decoder {
+        Utf8Decoder {
+            code_point: 0,
+            bytes_seen: 0,
+            bytes_needed: 0,
+            lower_boundary: 0x80u8,
+            upper_boundary: 0xBFu8,
+        }
+    }
+
     pub fn new() -> VariantDecoder {
-        VariantDecoder::Utf8(
-            Utf8Decoder {
-                code_point: 0,
-                bytes_seen: 0,
-                bytes_needed: 0,
-                lower_boundary: 0x80u8,
-                upper_boundary: 0xBFu8,
-            }
-        )
+        VariantDecoder::Utf8(Utf8Decoder::new_inner())
     }
 
     fn extra_from_state(&self) -> usize {
diff --git a/third_party/rust/simd/.cargo-checksum.json b/third_party/rust/simd/.cargo-checksum.json
index 65f31b0560e6..470289a0355c 100644
--- a/third_party/rust/simd/.cargo-checksum.json
+++ b/third_party/rust/simd/.cargo-checksum.json
@@ -1 +1 @@
-{"files":{".travis.yml":"e2c720c3633b7671efce49147c62b12bcbf630d7c5d6fc65cd97620bfa4ddcea","Cargo.toml":"608aad04f17a524ee21048fa2ce9f656ae344e0473dd0e331dc954f0f9677c63","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6d3a9431e65e69c73a8923e6517b889d17549b23db406b9ec027710d16af701f","README.md":"249294a9a5f63c64c0f7fe4a607060f43f3507dce2378067aa59d25fb3ae681d","benches/mandelbrot.rs":"051b5199e66bca6cf7774e9024915fd4e1349ab39726a10a14e06b60d65d87a4","benches/matrix.rs":"048a21dacdb62365e0105d00d2c8cd6bd2396ac81134f2bff7eb4f7d095fb735","examples/axpy.rs":"4307626045d64ec08361c97c9c72c5dc8d361bdc88f64453b97ac0212041a1b2","examples/convert.rs":"8e658fde050f8a0d8b84ad7570446b10fcf544afbd551b940ca340474f324840","examples/dot-product.rs":"6fe2e007c147af5353804173a593c5b9d57dbccec156e1da37e9e32537363f91","examples/fannkuch-redux-nosimd.rs":"7b2fbde35e8666929d14d67328471cb0483d038a5325232f8db148b30865312b","examples/fannkuch-redux.rs":"ea21fdbd2274488a62cc984acad6e0b65d52f24fb4ff63b7057a3a667e9c8aae","examples/mandelbrot.rs":"8b8fdca1edac50e5a33e0e0592bd41eb75114f31839ccd40d485c61a9a664380","examples/matrix-inverse.rs":"a378d20ef20c2119bb10a86de27c92fec2c2f77f374e6bfd36707c9825a5fe92","examples/nbody-nosimd.rs":"2c8e0a7feacd202fdd65eeceb6420d6e9f43340b81f20a8e532704a587a2796b","examples/nbody.rs":"a864311affab262024479d6348ff51af43d809e9ad332ec30ea4aacceaa2eae1","examples/ops.rs":"1316f915d0afcfa98fdc4077e965ccccf6b4b21c433cbe487ff0cdc60df3cd39","examples/spectral-norm-nosimd.rs":"ffc8512ecde779078ea467f38f423a0ea623c63da7078193f9dd370200773f79","examples/spectral-norm.rs":"edb09c9d477f83939098cfb77a27cc298bc7a0c8a8e29cece0cccae0d70d890e","src/aarch64/mod.rs":"83f52775364c98de0cecb7e1509530c18972e932469f5f1522aa24a735d0fa37","src/aarch64/neon.rs":"1fe769979e07d8e2bc3c78ce116e05d735860744efe097a894cc9421153257fb","src/arm/mod.rs":"dcdd90bc0b39abaf86a0c8946d442b16313563fbae1ff03248628275c74d8617","src/arm/neon.rs":"51cc509856200e80f8e4cc2c982586e6d1cef593ec4537e153dce0cfe31d3428","src/common.rs":"62f4e7e0fefb52ad190d0f2191bc435ac4deab3f2bc70dc427f2a7f9ccb7856e","src/lib.rs":"25f0b39c038fa85af858318135dfd87865be26c33bb4bd1438aec96a1e68d8b5","src/sixty_four.rs":"510a9e00189a61e4f0a5beb7052d5dee37fc8261f94a2af45ef10327e0f3b7df","src/v256.rs":"2e328e49034876d535e0627c7a62191da2b4fb156a657614bf531a5fc75b1385","src/x86/avx.rs":"c66140abefca634b48eae307c3ec8cf5a40f2279b10e246a7e2ac602a2a2bb28","src/x86/avx2.rs":"efe3006b13a13261a3dec3d37dc1d8cb53950f3803c420069231803374949937","src/x86/mod.rs":"0acc5a5e2672e2a0fddc11065663be8b8fa2da87320ea291fa86ff8c2f33edf5","src/x86/sse2.rs":"5ceda75a401958a135fc9d851b22075314cdeed69fd483b6a7be4f11373f40da","src/x86/sse3.rs":"9bd01a4f08069ca4f445952e744d651efe887e3835b18872e757375f0d053bd2","src/x86/sse4_1.rs":"9ceb80dd70a7e7dfeef508cb935e1a2637175bc87a3b090f5dea691ff6aa0516","src/x86/sse4_2.rs":"c59321aed8decdce4d0d8570cff46aed02e1a8265647ef7702e9b180fc581254","src/x86/ssse3.rs":"2290f0269bae316b8e0491495645ee38a9bd73525c8572759c1328341c3bdb4c"},"package":"7a94d14a2ae1f1f110937de5fb69e494372560181c7e1739a097fcc2cee37ba0"}
\ No newline at end of file
+{"files":{".travis.yml":"e2c720c3633b7671efce49147c62b12bcbf630d7c5d6fc65cd97620bfa4ddcea","Cargo.toml":"27c6a208f0c6253c4580508311d49bb421944abd272a7f9a5a38b51ef657aec2","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6d3a9431e65e69c73a8923e6517b889d17549b23db406b9ec027710d16af701f","README.md":"249294a9a5f63c64c0f7fe4a607060f43f3507dce2378067aa59d25fb3ae681d","benches/mandelbrot.rs":"051b5199e66bca6cf7774e9024915fd4e1349ab39726a10a14e06b60d65d87a4","benches/matrix.rs":"048a21dacdb62365e0105d00d2c8cd6bd2396ac81134f2bff7eb4f7d095fb735","examples/axpy.rs":"4307626045d64ec08361c97c9c72c5dc8d361bdc88f64453b97ac0212041a1b2","examples/convert.rs":"8e658fde050f8a0d8b84ad7570446b10fcf544afbd551b940ca340474f324840","examples/dot-product.rs":"6fe2e007c147af5353804173a593c5b9d57dbccec156e1da37e9e32537363f91","examples/fannkuch-redux-nosimd.rs":"7b2fbde35e8666929d14d67328471cb0483d038a5325232f8db148b30865312b","examples/fannkuch-redux.rs":"ea21fdbd2274488a62cc984acad6e0b65d52f24fb4ff63b7057a3a667e9c8aae","examples/mandelbrot.rs":"71be242543c1e487145d7f16341c05d05d86109de4d9e94c5d6bc9a9c6ed9766","examples/matrix-inverse.rs":"93dbc55c66a72e5f7bc730072f35682523fa20dd362755d8443ad6982143cb5d","examples/nbody-nosimd.rs":"9cf46ea02e266c20f811318f1c5856d5afb9575b2d48d552fbd978f5c1856bdb","examples/nbody.rs":"a864311affab262024479d6348ff51af43d809e9ad332ec30ea4aacceaa2eae1","examples/ops.rs":"b08ea83583df71d0052895d677320a9888da5b6729c9b70636d31ede5128bb7f","examples/spectral-norm-nosimd.rs":"ffc8512ecde779078ea467f38f423a0ea623c63da7078193f9dd370200773f79","examples/spectral-norm.rs":"edb09c9d477f83939098cfb77a27cc298bc7a0c8a8e29cece0cccae0d70d890e","src/aarch64/mod.rs":"83f52775364c98de0cecb7e1509530c18972e932469f5f1522aa24a735d0fa37","src/aarch64/neon.rs":"3c05ea43b7261b9af9c0d904b37de01c2ba99caedcb464700f16617b672965a1","src/arm/mod.rs":"dcdd90bc0b39abaf86a0c8946d442b16313563fbae1ff03248628275c74d8617","src/arm/neon.rs":"00aed2c94455b7ff5755b7598fb166a94c7242ad9adf4e5379560ab04af560e7","src/common.rs":"c5a7b937c5cd8c3bccf0fb20d5d77770c0d9b0dd9fa06a661c6f2ddf118e65c0","src/lib.rs":"08c345b6a2ad641daa3c1a40b1dcc6e4f9047939414bd81b05051fc74a563fec","src/sixty_four.rs":"d168776d02acf943bda8044b24e644b7a9584197a223eba1a7c3024b205dc87d","src/v256.rs":"34bfde3676e23f6925db5d0408ae838e3aab7706128fd7c33e855b8579c69318","src/x86/avx.rs":"efcf2120a904a89b0adf2d3d3bdd0ca17df2ec058410af23fb7e81915873f808","src/x86/avx2.rs":"3bcb3f391ad5f16f0a6da0bc1301329beb478ad6265bd3b2c9c124fc2e6198e5","src/x86/mod.rs":"0acc5a5e2672e2a0fddc11065663be8b8fa2da87320ea291fa86ff8c2f33edf5","src/x86/sse2.rs":"8807fb04bbfb404e17fcacf1e21d22616f8b377540a227b1fd03c121879122dd","src/x86/sse3.rs":"9bd01a4f08069ca4f445952e744d651efe887e3835b18872e757375f0d053bd2","src/x86/sse4_1.rs":"9ceb80dd70a7e7dfeef508cb935e1a2637175bc87a3b090f5dea691ff6aa0516","src/x86/sse4_2.rs":"c59321aed8decdce4d0d8570cff46aed02e1a8265647ef7702e9b180fc581254","src/x86/ssse3.rs":"2290f0269bae316b8e0491495645ee38a9bd73525c8572759c1328341c3bdb4c"},"package":"3dd0805c7363ab51a829a1511ad24b6ed0349feaa756c4bc2f977f9f496e6673"}
\ No newline at end of file
diff --git a/third_party/rust/simd/Cargo.toml b/third_party/rust/simd/Cargo.toml
index 769d509eb3ad..31e1f908cc49 100644
--- a/third_party/rust/simd/Cargo.toml
+++ b/third_party/rust/simd/Cargo.toml
@@ -1,25 +1,36 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g. crates.io) dependencies
+#
+# If you believe there's an error in this file please file an
+# issue against the rust-lang/cargo repository. If you're
+# editing this file be aware that the upstream Cargo.toml
+# will likely look very different (and much more reasonable)
+
 [package]
 name = "simd"
-version = "0.2.0"
+version = "0.2.1"
 authors = ["Huon Wilson <dbau.pp+github@gmail.com>"]
-
-repository = "https://github.com/rust-lang-nursery/simd"
+description = "`simd` offers limited cross-platform access to SIMD instructions on\nCPUs, as well as raw interfaces to platform-specific instructions.\n"
 documentation = "https://rust-lang-nursery.github.io/simd/doc/simd"
-license = "MIT/Apache-2.0"
-keywords = ["simd", "data-parallel"]
 readme = "README.md"
+keywords = ["simd", "data-parallel"]
+license = "MIT/Apache-2.0"
+repository = "https://github.com/rust-lang-nursery/simd"
+[package.metadata.docs.rs]
+features = ["doc"]
+[dependencies.serde]
+version = "1.0"
+optional = true
 
-description = """
-`simd` offers limited cross-platform access to SIMD instructions on
-CPUs, as well as raw interfaces to platform-specific instructions.
-"""
-
-[dependencies]
-serde = { version = "0.8", optional = true }
-serde_derive = { version = "0.8", optional = true }
-
-[dev-dependencies]
-cfg-if = "0.1"
+[dependencies.serde_derive]
+version = "1.0"
+optional = true
+[dev-dependencies.cfg-if]
+version = "0.1"
 
 [features]
 doc = []
diff --git a/third_party/rust/simd/examples/mandelbrot.rs b/third_party/rust/simd/examples/mandelbrot.rs
index 69a5214bbcb7..c6f1320a0784 100755
--- a/third_party/rust/simd/examples/mandelbrot.rs
+++ b/third_party/rust/simd/examples/mandelbrot.rs
@@ -1,4 +1,4 @@
-#![feature(step_by, test)]
+#![feature(iterator_step_by, test)]
 
 extern crate test;
 extern crate simd;
diff --git a/third_party/rust/simd/examples/matrix-inverse.rs b/third_party/rust/simd/examples/matrix-inverse.rs
index c366f3c02ddc..e6eb7ffc4655 100644
--- a/third_party/rust/simd/examples/matrix-inverse.rs
+++ b/third_party/rust/simd/examples/matrix-inverse.rs
@@ -25,6 +25,7 @@ fn mul(x: &[f32x4; 4], y: &[f32x4; 4]) -> [f32x4; 4] {
      ]
 }
 
+#[allow(dead_code)]
 fn inverse_naive(x: &[[f32; 4]; 4]) -> [[f32; 4]; 4] {
     let mut t = [[0_f32; 4]; 4];
     for i in 0..4 {
diff --git a/third_party/rust/simd/examples/nbody-nosimd.rs b/third_party/rust/simd/examples/nbody-nosimd.rs
index bafda399e754..d5f1bb422ff2 100644
--- a/third_party/rust/simd/examples/nbody-nosimd.rs
+++ b/third_party/rust/simd/examples/nbody-nosimd.rs
@@ -66,7 +66,7 @@ struct Planet {
 }
 
 fn advance(bodies: &mut [Planet;N_BODIES], dt: f64, steps: i32) {
-    for _ in (0..steps) {
+    for _ in 0..steps {
         let mut b_slice: &mut [_] = bodies;
         loop {
             let bi = match shift_mut_ref(&mut b_slice) {
diff --git a/third_party/rust/simd/examples/ops.rs b/third_party/rust/simd/examples/ops.rs
index 0e0ddcfe6d2f..f8c919101e3c 100644
--- a/third_party/rust/simd/examples/ops.rs
+++ b/third_party/rust/simd/examples/ops.rs
@@ -2,6 +2,7 @@ extern crate simd;
 
 use simd::*;
 
+#[allow(unused_variables)]
 fn main() {
     let x = i32x4::splat(1_i32);
     let y = -x;
diff --git a/third_party/rust/simd/src/aarch64/neon.rs b/third_party/rust/simd/src/aarch64/neon.rs
index 50db8e1aa7fd..0cca05a52788 100644
--- a/third_party/rust/simd/src/aarch64/neon.rs
+++ b/third_party/rust/simd/src/aarch64/neon.rs
@@ -630,7 +630,7 @@ impl Aarch64I8x16 for i8x16 {
 #[doc(hidden)]
 pub mod common {
     use super::super::super::*;
-    use std::mem;
+    use core::mem;
 
     #[inline]
     pub fn f32x4_sqrt(x: f32x4) -> f32x4 {
diff --git a/third_party/rust/simd/src/arm/neon.rs b/third_party/rust/simd/src/arm/neon.rs
index b77e1211270e..e29a84040123 100644
--- a/third_party/rust/simd/src/arm/neon.rs
+++ b/third_party/rust/simd/src/arm/neon.rs
@@ -473,7 +473,7 @@ impl u8x8 {
 pub mod common {
     use super::super::super::*;
     use super::*;
-    use std::mem;
+    use core::mem;
 
     #[inline]
     pub fn f32x4_sqrt(x: f32x4) -> f32x4 {
diff --git a/third_party/rust/simd/src/common.rs b/third_party/rust/simd/src/common.rs
index 8e36b2c3a069..1052ae36959d 100644
--- a/third_party/rust/simd/src/common.rs
+++ b/third_party/rust/simd/src/common.rs
@@ -9,8 +9,7 @@ use super::{
 
     Unalign, bitcast,
 };
-use std::mem;
-use std::ops;
+use core::{mem,ops};
 
 #[cfg(any(target_arch = "x86",
           target_arch = "x86_64"))]
diff --git a/third_party/rust/simd/src/lib.rs b/third_party/rust/simd/src/lib.rs
index 82bae9045066..a5398ebe1e3d 100644
--- a/third_party/rust/simd/src/lib.rs
+++ b/third_party/rust/simd/src/lib.rs
@@ -1,4 +1,5 @@
 //! `simd` offers a basic interface to the SIMD functionality of CPUs.
+#![no_std]
 
 #![feature(cfg_target_feature, repr_simd, platform_intrinsics, const_fn)]
 #![allow(non_camel_case_types)]
@@ -9,6 +10,8 @@ extern crate serde;
 #[macro_use]
 extern crate serde_derive;
 
+use core::mem;
+
 /// Boolean type for 8-bit integers.
 #[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
@@ -172,9 +175,9 @@ simd! {
 #[allow(dead_code)]
 #[inline]
 fn bitcast<T: Simd, U: Simd>(x: T) -> U {
-    assert_eq!(std::mem::size_of::<T>(),
-               std::mem::size_of::<U>());
-    unsafe {std::mem::transmute_copy(&x)}
+    assert_eq!(mem::size_of::<T>(),
+               mem::size_of::<U>());
+    unsafe {mem::transmute_copy(&x)}
 }
 
 #[allow(dead_code)]
@@ -207,9 +210,15 @@ extern "platform-intrinsic" {
     fn simd_xor<T: Simd>(x: T, y: T) -> T;
 }
 #[repr(packed)]
-#[derive(Debug, Copy, Clone)]
+#[derive(Copy)]
 struct Unalign<T>(T);
 
+impl<T: Clone> Clone for Unalign<T> {
+    fn clone(&self) -> Unalign<T> {
+        Unalign(unsafe { self.0.clone() })
+    }
+}
+
 #[macro_use]
 mod common;
 mod sixty_four;
diff --git a/third_party/rust/simd/src/sixty_four.rs b/third_party/rust/simd/src/sixty_four.rs
index 0d3fd4363105..a87f44a77ee7 100644
--- a/third_party/rust/simd/src/sixty_four.rs
+++ b/third_party/rust/simd/src/sixty_four.rs
@@ -11,8 +11,7 @@ use super::{
 
     Unalign, bitcast,
 };
-use std::mem;
-use std::ops;
+use core::{mem,ops};
 
 /// Boolean type for 64-bit integers.
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
diff --git a/third_party/rust/simd/src/v256.rs b/third_party/rust/simd/src/v256.rs
index 88e486842358..519eb14e7259 100644
--- a/third_party/rust/simd/src/v256.rs
+++ b/third_party/rust/simd/src/v256.rs
@@ -1,6 +1,5 @@
 #![allow(dead_code)]
-use std::ops;
-use std::mem;
+use core::{mem,ops};
 #[allow(unused_imports)]
 use super::{
 	Simd,
@@ -329,6 +328,19 @@ impl i32x8 {
     }
 }
 
+impl f32x8 {
+    /// Convert each lane to a signed integer.
+    #[inline]
+    pub fn to_i32(self) -> i32x8 {
+        unsafe {simd_cast(self)}
+    }
+    /// Convert each lane to an unsigned integer.
+    #[inline]
+    pub fn to_u32(self) -> u32x8 {
+        unsafe {simd_cast(self)}
+    }
+}
+
 impl i16x16 {
     /// Convert each lane to an unsigned integer.
     #[inline]
diff --git a/third_party/rust/simd/src/x86/avx.rs b/third_party/rust/simd/src/x86/avx.rs
index 933fa41a612b..180247e36561 100644
--- a/third_party/rust/simd/src/x86/avx.rs
+++ b/third_party/rust/simd/src/x86/avx.rs
@@ -54,7 +54,7 @@ extern "platform-intrinsic" {
 #[doc(hidden)]
 pub mod common {
     use super::*;
-    use std::mem;
+    use core::mem;
 
     macro_rules! bools {
         ($($ty: ty, $all: ident, $any: ident, $testc: ident, $testz: ident;)*) => {
diff --git a/third_party/rust/simd/src/x86/avx2.rs b/third_party/rust/simd/src/x86/avx2.rs
index fa92e3b60786..e86a33d3b5bb 100644
--- a/third_party/rust/simd/src/x86/avx2.rs
+++ b/third_party/rust/simd/src/x86/avx2.rs
@@ -42,7 +42,7 @@ extern "platform-intrinsic" {
     fn x86_mm256_packus_epi32(x: i32x8, y: i32x8) -> u16x16;
     fn x86_mm256_permutevar8x32_epi32(x: i32x8, y: i32x8) -> i32x8;
     fn x86_mm256_permutevar8x32_ps(x: f32x8, y: i32x8) -> f32x8;
-    fn x86_mm256_sad_epu8(x: u8x32, y: u8x32) -> u8x32;
+    fn x86_mm256_sad_epu8(x: u8x32, y: u8x32) -> u64x4;
     fn x86_mm256_shuffle_epi8(x: i8x32, y: i8x32) -> i8x32;
     fn x86_mm256_sign_epi8(x: i8x32, y: i8x32) -> i8x32;
     fn x86_mm256_sign_epi16(x: i16x16, y: i16x16) -> i16x16;
diff --git a/third_party/rust/simd/src/x86/sse2.rs b/third_party/rust/simd/src/x86/sse2.rs
index 143254379edc..5cbc853694d5 100644
--- a/third_party/rust/simd/src/x86/sse2.rs
+++ b/third_party/rust/simd/src/x86/sse2.rs
@@ -48,7 +48,7 @@ extern "platform-intrinsic" {
 #[doc(hidden)]
 pub mod common {
     use super::super::super::*;
-    use std::mem;
+    use core::mem;
 
     #[inline]
     pub fn f32x4_sqrt(x: f32x4) -> f32x4 {
diff --git a/toolkit/library/gtest/rust/Cargo.lock b/toolkit/library/gtest/rust/Cargo.lock
index 10c8a3a728c2..fb83a93b90e6 100644
--- a/toolkit/library/gtest/rust/Cargo.lock
+++ b/toolkit/library/gtest/rust/Cargo.lock
@@ -420,25 +420,25 @@ name = "encoding_c"
 version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
- "encoding_rs 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "encoding_rs 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
 name = "encoding_glue"
 version = "0.1.0"
 dependencies = [
- "encoding_rs 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "encoding_rs 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "nserror 0.1.0",
  "nsstring 0.1.0",
 ]
 
 [[package]]
 name = "encoding_rs"
-version = "0.7.1"
+version = "0.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
  "cfg-if 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "simd 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "simd 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
@@ -1208,7 +1208,7 @@ dependencies = [
 
 [[package]]
 name = "simd"
-version = "0.2.0"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
@@ -1659,7 +1659,7 @@ dependencies = [
 "checksum dwrote 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a207eb7b40e25d1d28dc679f451d321fb6954b73ceaa47986702575865469461"
 "checksum either 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18785c1ba806c258137c937e44ada9ee7e69a37e3c72077542cd2f069d78562a"
 "checksum encoding_c 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "93ec52324ca72f423237a413ca0e1c60654c8b3d0934fcd5fd888508dfcc4ba7"
-"checksum encoding_rs 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f5215aabf22b83153be3ee44dfe3f940214541b2ce13d419c55e7a115c8c51a9"
+"checksum encoding_rs 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "98fd0f24d1fb71a4a6b9330c8ca04cbd4e7cc5d846b54ca74ff376bc7c9f798d"
 "checksum env_logger 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3ddf21e73e016298f5cb37d6ef8e8da8e39f91f9ec8b0df44b7deb16a9f8cd5b"
 "checksum error-chain 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ff511d5dc435d703f4971bc399647c9bc38e20cb41452e3b9feb4765419ed3f3"
 "checksum euclid 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)" = "926c639bfdff1f3063f76bb66245f6d2b691aa20fdbaabecc38b2947a13a4eba"
@@ -1734,7 +1734,7 @@ dependencies = [
 "checksum serde 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)" = "db99f3919e20faa51bb2996057f5031d8685019b5a06139b1ce761da671b8526"
 "checksum serde_derive 1.0.27 (git+https://github.com/gankro/serde?branch=deserialize_from_enums4)" = "<none>"
 "checksum serde_derive_internals 0.19.0 (git+https://github.com/gankro/serde?branch=deserialize_from_enums4)" = "<none>"
-"checksum simd 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7a94d14a2ae1f1f110937de5fb69e494372560181c7e1739a097fcc2cee37ba0"
+"checksum simd 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3dd0805c7363ab51a829a1511ad24b6ed0349feaa756c4bc2f977f9f496e6673"
 "checksum siphasher 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2ffc669b726f2bc9a3bcff66e5e23b56ba6bf70e22a34c3d7b6d0b3450b65b84"
 "checksum slab 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "17b4fcaed89ab08ef143da37bc52adbcc04d4a69014f4c1208d6b51f0c47bc23"
 "checksum smallbitvec 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "79b776f00dfe01df905fa3b2eaa1659522e99e3fc4a7b1334171622205c4bdcf"
diff --git a/toolkit/library/rust/Cargo.lock b/toolkit/library/rust/Cargo.lock
index 81f436e0bc52..eff11f8f86e3 100644
--- a/toolkit/library/rust/Cargo.lock
+++ b/toolkit/library/rust/Cargo.lock
@@ -420,25 +420,25 @@ name = "encoding_c"
 version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
- "encoding_rs 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "encoding_rs 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
 name = "encoding_glue"
 version = "0.1.0"
 dependencies = [
- "encoding_rs 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "encoding_rs 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "nserror 0.1.0",
  "nsstring 0.1.0",
 ]
 
 [[package]]
 name = "encoding_rs"
-version = "0.7.1"
+version = "0.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
  "cfg-if 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "simd 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "simd 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
@@ -1196,7 +1196,7 @@ dependencies = [
 
 [[package]]
 name = "simd"
-version = "0.2.0"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
@@ -1671,7 +1671,7 @@ dependencies = [
 "checksum dwrote 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a207eb7b40e25d1d28dc679f451d321fb6954b73ceaa47986702575865469461"
 "checksum either 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18785c1ba806c258137c937e44ada9ee7e69a37e3c72077542cd2f069d78562a"
 "checksum encoding_c 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "93ec52324ca72f423237a413ca0e1c60654c8b3d0934fcd5fd888508dfcc4ba7"
-"checksum encoding_rs 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f5215aabf22b83153be3ee44dfe3f940214541b2ce13d419c55e7a115c8c51a9"
+"checksum encoding_rs 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "98fd0f24d1fb71a4a6b9330c8ca04cbd4e7cc5d846b54ca74ff376bc7c9f798d"
 "checksum env_logger 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3ddf21e73e016298f5cb37d6ef8e8da8e39f91f9ec8b0df44b7deb16a9f8cd5b"
 "checksum error-chain 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ff511d5dc435d703f4971bc399647c9bc38e20cb41452e3b9feb4765419ed3f3"
 "checksum euclid 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)" = "926c639bfdff1f3063f76bb66245f6d2b691aa20fdbaabecc38b2947a13a4eba"
@@ -1746,7 +1746,7 @@ dependencies = [
 "checksum serde 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)" = "db99f3919e20faa51bb2996057f5031d8685019b5a06139b1ce761da671b8526"
 "checksum serde_derive 1.0.27 (git+https://github.com/gankro/serde?branch=deserialize_from_enums4)" = "<none>"
 "checksum serde_derive_internals 0.19.0 (git+https://github.com/gankro/serde?branch=deserialize_from_enums4)" = "<none>"
-"checksum simd 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7a94d14a2ae1f1f110937de5fb69e494372560181c7e1739a097fcc2cee37ba0"
+"checksum simd 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3dd0805c7363ab51a829a1511ad24b6ed0349feaa756c4bc2f977f9f496e6673"
 "checksum siphasher 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2ffc669b726f2bc9a3bcff66e5e23b56ba6bf70e22a34c3d7b6d0b3450b65b84"
 "checksum slab 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "17b4fcaed89ab08ef143da37bc52adbcc04d4a69014f4c1208d6b51f0c47bc23"
 "checksum smallbitvec 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "79b776f00dfe01df905fa3b2eaa1659522e99e3fc4a7b1334171622205c4bdcf"