зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1482095 - Update encoding_rs to 0.8.6. r=emk.
MozReview-Commit-ID: IqPrrQ7L1lU
This commit is contained in:
Родитель
726074f95d
Коммит
c82d099240
|
@ -634,21 +634,21 @@ name = "encoding_c"
|
|||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"encoding_rs 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs 0.8.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_glue"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"encoding_rs 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs 0.8.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"nserror 0.1.0",
|
||||
"nsstring 0.1.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.4"
|
||||
version = "0.8.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
@ -1458,7 +1458,7 @@ name = "nsstring"
|
|||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs 0.8.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2684,7 +2684,7 @@ dependencies = [
|
|||
"checksum either 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18785c1ba806c258137c937e44ada9ee7e69a37e3c72077542cd2f069d78562a"
|
||||
"checksum ena 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "cabe5a5078ac8c506d3e4430763b1ba9b609b1286913e7d08e581d1c2de9b7e5"
|
||||
"checksum encoding_c 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "769ecb8b33323998e482b218c0d13cd64c267609023b4b7ec3ee740714c318ee"
|
||||
"checksum encoding_rs 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)" = "88a1b66a0d28af4b03a8c8278c6dcb90e6e600d89c14500a9e7a02e64b9ee3ac"
|
||||
"checksum encoding_rs 0.8.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2a91912d6f37c6a8fef8a2316a862542d036f13c923ad518b5aca7bcaac7544c"
|
||||
"checksum env_logger 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0561146661ae44c579e993456bc76d11ce1e0c7d745e57b2fa7146b6e49fa2ad"
|
||||
"checksum error-chain 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ff511d5dc435d703f4971bc399647c9bc38e20cb41452e3b9feb4765419ed3f3"
|
||||
"checksum euclid 0.19.0 (registry+https://github.com/rust-lang/crates.io-index)" = "70a2ebdf55fb9d6329046e026329a55ef8fbaae5ea833f56e170beb3125a8a5f"
|
||||
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -12,7 +12,7 @@
|
|||
|
||||
[package]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.4"
|
||||
version = "0.8.6"
|
||||
authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
|
||||
description = "A Gecko-oriented implementation of the Encoding Standard"
|
||||
homepage = "https://docs.rs/encoding_rs/"
|
||||
|
|
|
@ -68,6 +68,13 @@ Additionally, `encoding_rs::mem` does the following:
|
|||
* Converts ASCII to UTF-16 up to the first non-ASCII byte.
|
||||
* Converts UTF-16 to ASCII up to the first non-Basic Latin code unit.
|
||||
|
||||
## Integration with `std::io`
|
||||
|
||||
Notably, the above feature list doesn't include the capability to wrap
|
||||
a `std::io::Read`, decode it into UTF-8 and presenting the result via
|
||||
`std::io::Read`. The [`encoding_rs_io`](https://crates.io/crates/encoding_rs_io)
|
||||
crate provides that capability.
|
||||
|
||||
## Licensing
|
||||
|
||||
Please see the file named
|
||||
|
@ -237,6 +244,22 @@ used in Firefox.
|
|||
|
||||
## Release Notes
|
||||
|
||||
### 0.8.6
|
||||
|
||||
* Temporarily removed the debug assertion added in version 0.8.5 from
|
||||
`convert_utf16_to_latin1_lossy`.
|
||||
|
||||
### 0.8.5
|
||||
|
||||
* If debug assertions are enabled but fuzzing isn't enabled, lossy conversions
|
||||
to Latin1 in the `mem` module assert that the input is in the range
|
||||
U+0000...U+00FF (inclusive).
|
||||
* In the `mem` module provide conversions from Latin1 and UTF-16 to UTF-8
|
||||
that can deal with insufficient output space. The idea is to use them
|
||||
first with an allocation rounded up to jemalloc bucket size and do the
|
||||
worst-case allocation only if the jemalloc rounding up was insufficient
|
||||
as the first guess.
|
||||
|
||||
### 0.8.4
|
||||
|
||||
* Fix SSE2-specific, `simd-accel`-specific memory corruption introduced in
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
// except according to those terms.
|
||||
|
||||
#![cfg_attr(feature = "cargo-clippy", allow(doc_markdown, inline_always, new_ret_no_self))]
|
||||
#![doc(html_root_url = "https://docs.rs/encoding_rs/0.8.4")]
|
||||
#![doc(html_root_url = "https://docs.rs/encoding_rs/0.8.6")]
|
||||
|
||||
//! encoding_rs is a Gecko-oriented Free Software / Open Source implementation
|
||||
//! of the [Encoding Standard](https://encoding.spec.whatwg.org/) in Rust.
|
||||
|
@ -36,6 +36,13 @@
|
|||
//! The [repository is on GitHub](https://github.com/hsivonen/encoding_rs). The
|
||||
//! [crate is available on crates.io](https://crates.io/crates/encoding_rs).
|
||||
//!
|
||||
//! # Integration with `std::io`
|
||||
//!
|
||||
//! This crate doesn't implement traits from `std::io`. However, for the case of
|
||||
//! wrapping a `std::io::Read` in a decoder that implements `std::io::Read` and
|
||||
//! presents the data from the wrapped `std::io::Read` as UTF-8 is addressed by
|
||||
//! the [`encoding_rs_io`](https://docs.rs/encoding_rs_io/) crate.
|
||||
//!
|
||||
//! # Examples
|
||||
//!
|
||||
//! Example programs:
|
||||
|
|
|
@ -31,6 +31,10 @@ use super::EncoderResult;
|
|||
use ascii::*;
|
||||
use utf_8::*;
|
||||
|
||||
macro_rules! non_fuzz_debug_assert {
|
||||
($($arg:tt)*) => (if !cfg!(fuzzing) { debug_assert!($($arg)*); })
|
||||
}
|
||||
|
||||
cfg_if!{
|
||||
if #[cfg(feature = "simd-accel")] {
|
||||
use ::std::intrinsics::unlikely;
|
||||
|
@ -1547,6 +1551,33 @@ pub fn convert_str_to_utf16(src: &str, dst: &mut [u16]) -> usize {
|
|||
}
|
||||
}
|
||||
|
||||
/// Converts potentially-invalid UTF-16 to valid UTF-8 with errors replaced
|
||||
/// with the REPLACEMENT CHARACTER with potentially insufficient output
|
||||
/// space.
|
||||
///
|
||||
/// Returns the number of code units read and the number of bytes written.
|
||||
///
|
||||
/// Not all code units are read if there isn't enough output space.
|
||||
///
|
||||
/// Note that this method isn't designed for general streamability but for
|
||||
/// not allocating memory for the worst case up front. Specifically,
|
||||
/// if the input starts with or ends with an unpaired surrogate, those are
|
||||
/// replaced with the REPLACEMENT CHARACTER.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Note that this function may write garbage beyond the number of bytes
|
||||
/// indicated by the return value, so using a `&mut str` interpreted as
|
||||
/// `&mut [u8]` as the destination is not safe. If you want to convert into
|
||||
/// a `&mut str`, use `convert_utf16_to_str()` instead of this function.
|
||||
#[inline]
|
||||
pub fn convert_utf16_to_utf8_partial(src: &[u16], dst: &mut [u8]) -> (usize, usize) {
|
||||
let mut encoder = Utf8Encoder;
|
||||
let (result, read, written) = encoder.encode_from_utf16_raw(src, dst, true);
|
||||
debug_assert!(result == EncoderResult::OutputFull || read == src.len());
|
||||
(read, written)
|
||||
}
|
||||
|
||||
/// Converts potentially-invalid UTF-16 to valid UTF-8 with errors replaced
|
||||
/// with the REPLACEMENT CHARACTER.
|
||||
///
|
||||
|
@ -1568,12 +1599,42 @@ pub fn convert_str_to_utf16(src: &str, dst: &mut [u16]) -> usize {
|
|||
#[inline]
|
||||
pub fn convert_utf16_to_utf8(src: &[u16], dst: &mut [u8]) -> usize {
|
||||
assert!(dst.len() >= src.len() * 3 + 1);
|
||||
let mut encoder = Utf8Encoder;
|
||||
let (result, _, written) = encoder.encode_from_utf16_raw(src, dst, true);
|
||||
debug_assert!(result == EncoderResult::InputEmpty);
|
||||
let (read, written) = convert_utf16_to_utf8_partial(src, dst);
|
||||
debug_assert_eq!(read, src.len());
|
||||
written
|
||||
}
|
||||
|
||||
/// Converts potentially-invalid UTF-16 to valid UTF-8 with errors replaced
|
||||
/// with the REPLACEMENT CHARACTER such that the validity of the output is
|
||||
/// signaled using the Rust type system with potentially insufficient output
|
||||
/// space.
|
||||
///
|
||||
/// Returns the number of code units read and the number of bytes written.
|
||||
///
|
||||
/// Not all code units are read if there isn't enough output space.
|
||||
///
|
||||
/// Note that this method isn't designed for general streamability but for
|
||||
/// not allocating memory for the worst case up front. Specifically,
|
||||
/// if the input starts with or ends with an unpaired surrogate, those are
|
||||
/// replaced with the REPLACEMENT CHARACTER.
|
||||
#[inline]
|
||||
pub fn convert_utf16_to_str_partial(src: &[u16], dst: &mut str) -> (usize, usize) {
|
||||
let bytes: &mut [u8] = unsafe { ::std::mem::transmute(dst) };
|
||||
let (read, written) = convert_utf16_to_utf8_partial(src, bytes);
|
||||
let len = bytes.len();
|
||||
let mut trail = written;
|
||||
let max = ::std::cmp::min(len, trail + MAX_STRIDE_SIZE);
|
||||
while trail < max {
|
||||
bytes[trail] = 0;
|
||||
trail += 1;
|
||||
}
|
||||
while trail < len && ((bytes[trail] & 0xC0) == 0x80) {
|
||||
bytes[trail] = 0;
|
||||
trail += 1;
|
||||
}
|
||||
(read, written)
|
||||
}
|
||||
|
||||
/// Converts potentially-invalid UTF-16 to valid UTF-8 with errors replaced
|
||||
/// with the REPLACEMENT CHARACTER such that the validity of the output is
|
||||
/// signaled using the Rust type system.
|
||||
|
@ -1588,19 +1649,9 @@ pub fn convert_utf16_to_utf8(src: &[u16], dst: &mut [u8]) -> usize {
|
|||
/// Panics if the destination buffer is shorter than stated above.
|
||||
#[inline]
|
||||
pub fn convert_utf16_to_str(src: &[u16], dst: &mut str) -> usize {
|
||||
let bytes: &mut [u8] = unsafe { ::std::mem::transmute(dst) };
|
||||
let written = convert_utf16_to_utf8(src, bytes);
|
||||
let len = bytes.len();
|
||||
let mut trail = written;
|
||||
let max = ::std::cmp::min(len, trail + MAX_STRIDE_SIZE);
|
||||
while trail < max {
|
||||
bytes[trail] = 0;
|
||||
trail += 1;
|
||||
}
|
||||
while trail < len && ((bytes[trail] & 0xC0) == 0x80) {
|
||||
bytes[trail] = 0;
|
||||
trail += 1;
|
||||
}
|
||||
assert!(dst.len() >= src.len() * 3 + 1);
|
||||
let (read, written) = convert_utf16_to_str_partial(src, dst);
|
||||
debug_assert_eq!(read, src.len());
|
||||
written
|
||||
}
|
||||
|
||||
|
@ -1629,6 +1680,59 @@ pub fn convert_latin1_to_utf16(src: &[u8], dst: &mut [u16]) {
|
|||
}
|
||||
}
|
||||
|
||||
/// Converts bytes whose unsigned value is interpreted as Unicode code point
|
||||
/// (i.e. U+0000 to U+00FF, inclusive) to UTF-8 with potentially insufficient
|
||||
/// output space.
|
||||
///
|
||||
/// Returns the number of bytes read and the number of bytes written.
|
||||
///
|
||||
/// If the output isn't large enough, not all input is consumed.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Note that this function may write garbage beyond the number of bytes
|
||||
/// indicated by the return value, so using a `&mut str` interpreted as
|
||||
/// `&mut [u8]` as the destination is not safe. If you want to convert into
|
||||
/// a `&mut str`, use `convert_utf16_to_str()` instead of this function.
|
||||
#[inline]
|
||||
pub fn convert_latin1_to_utf8_partial(src: &[u8], dst: &mut [u8]) -> (usize, usize) {
|
||||
let src_len = src.len();
|
||||
let src_ptr = src.as_ptr();
|
||||
let dst_ptr = dst.as_mut_ptr();
|
||||
let dst_len = dst.len();
|
||||
let mut total_read = 0usize;
|
||||
let mut total_written = 0usize;
|
||||
loop {
|
||||
// src can't advance more than dst
|
||||
let src_left = src_len - total_read;
|
||||
let dst_left = dst_len - total_written;
|
||||
let min_left = ::std::cmp::min(src_left, dst_left);
|
||||
if let Some((non_ascii, consumed)) = unsafe {
|
||||
ascii_to_ascii(
|
||||
src_ptr.offset(total_read as isize),
|
||||
dst_ptr.offset(total_written as isize),
|
||||
min_left,
|
||||
)
|
||||
} {
|
||||
total_read += consumed;
|
||||
total_written += consumed;
|
||||
if total_written.checked_add(2).unwrap() > dst_len {
|
||||
return (total_read, total_written);
|
||||
}
|
||||
|
||||
total_read += 1; // consume `non_ascii`
|
||||
|
||||
let code_point = non_ascii as u32;
|
||||
dst[total_written] = ((code_point >> 6) | 0xC0u32) as u8;
|
||||
total_written += 1;
|
||||
dst[total_written] = ((code_point as u32 & 0x3Fu32) | 0x80u32) as u8;
|
||||
total_written += 1;
|
||||
continue;
|
||||
}
|
||||
return (total_read + min_left, total_written + min_left);
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts bytes whose unsigned value is interpreted as Unicode code point
|
||||
/// (i.e. U+0000 to U+00FF, inclusive) to UTF-8.
|
||||
///
|
||||
|
@ -1653,33 +1757,35 @@ pub fn convert_latin1_to_utf8(src: &[u8], dst: &mut [u8]) -> usize {
|
|||
dst.len() >= src.len() * 2,
|
||||
"Destination must not be shorter than the source times two."
|
||||
);
|
||||
let src_len = src.len();
|
||||
let src_ptr = src.as_ptr();
|
||||
let dst_ptr = dst.as_mut_ptr();
|
||||
let mut total_read = 0usize;
|
||||
let mut total_written = 0usize;
|
||||
loop {
|
||||
// src can't advance more than dst
|
||||
let src_left = src_len - total_read;
|
||||
if let Some((non_ascii, consumed)) = unsafe {
|
||||
ascii_to_ascii(
|
||||
src_ptr.offset(total_read as isize),
|
||||
dst_ptr.offset(total_written as isize),
|
||||
src_left,
|
||||
)
|
||||
} {
|
||||
total_read += consumed + 1;
|
||||
total_written += consumed;
|
||||
let (read, written) = convert_latin1_to_utf8_partial(src, dst);
|
||||
debug_assert_eq!(read, src.len());
|
||||
written
|
||||
}
|
||||
|
||||
let code_point = non_ascii as u32;
|
||||
dst[total_written] = ((code_point >> 6) | 0xC0u32) as u8;
|
||||
total_written += 1;
|
||||
dst[total_written] = ((code_point as u32 & 0x3Fu32) | 0x80u32) as u8;
|
||||
total_written += 1;
|
||||
continue;
|
||||
}
|
||||
return total_written + src_left;
|
||||
/// Converts bytes whose unsigned value is interpreted as Unicode code point
|
||||
/// (i.e. U+0000 to U+00FF, inclusive) to UTF-8 such that the validity of the
|
||||
/// output is signaled using the Rust type system with potentially insufficient
|
||||
/// output space.
|
||||
///
|
||||
/// Returns the number of bytes read and the number of bytes written.
|
||||
///
|
||||
/// If the output isn't large enough, not all input is consumed.
|
||||
#[inline]
|
||||
pub fn convert_latin1_to_str_partial(src: &[u8], dst: &mut str) -> (usize, usize) {
|
||||
let bytes: &mut [u8] = unsafe { ::std::mem::transmute(dst) };
|
||||
let (read, written) = convert_latin1_to_utf8_partial(src, bytes);
|
||||
let len = bytes.len();
|
||||
let mut trail = written;
|
||||
let max = ::std::cmp::min(len, trail + MAX_STRIDE_SIZE);
|
||||
while trail < max {
|
||||
bytes[trail] = 0;
|
||||
trail += 1;
|
||||
}
|
||||
while trail < len && ((bytes[trail] & 0xC0) == 0x80) {
|
||||
bytes[trail] = 0;
|
||||
trail += 1;
|
||||
}
|
||||
(read, written)
|
||||
}
|
||||
|
||||
/// Converts bytes whose unsigned value is interpreted as Unicode code point
|
||||
|
@ -1696,19 +1802,12 @@ pub fn convert_latin1_to_utf8(src: &[u8], dst: &mut [u8]) -> usize {
|
|||
/// Panics if the destination buffer is shorter than stated above.
|
||||
#[inline]
|
||||
pub fn convert_latin1_to_str(src: &[u8], dst: &mut str) -> usize {
|
||||
let bytes: &mut [u8] = unsafe { ::std::mem::transmute(dst) };
|
||||
let written = convert_latin1_to_utf8(src, bytes);
|
||||
let len = bytes.len();
|
||||
let mut trail = written;
|
||||
let max = ::std::cmp::min(len, trail + MAX_STRIDE_SIZE);
|
||||
while trail < max {
|
||||
bytes[trail] = 0;
|
||||
trail += 1;
|
||||
}
|
||||
while trail < len && ((bytes[trail] & 0xC0) == 0x80) {
|
||||
bytes[trail] = 0;
|
||||
trail += 1;
|
||||
}
|
||||
assert!(
|
||||
dst.len() >= src.len() * 2,
|
||||
"Destination must not be shorter than the source times two."
|
||||
);
|
||||
let (read, written) = convert_latin1_to_str_partial(src, dst);
|
||||
debug_assert_eq!(read, src.len());
|
||||
written
|
||||
}
|
||||
|
||||
|
@ -1718,6 +1817,7 @@ pub fn convert_latin1_to_str(src: &[u8], dst: &mut str) -> usize {
|
|||
/// each output byte.
|
||||
///
|
||||
/// If the input does not fulfill the condition stated above, this function
|
||||
/// panics if debug assertions are enabled (and fuzzing isn't) and otherwise
|
||||
/// does something that is memory-safe without any promises about any
|
||||
/// properties of the output. In particular, callers shouldn't assume the
|
||||
/// output to be the same across crate versions or CPU architectures and
|
||||
|
@ -1731,12 +1831,16 @@ pub fn convert_latin1_to_str(src: &[u8], dst: &mut str) -> usize {
|
|||
/// # Panics
|
||||
///
|
||||
/// Panics if the destination buffer is shorter than stated above.
|
||||
///
|
||||
/// If debug assertions are enabled (and not fuzzing) and the input is
|
||||
/// not in the range U+0000 to U+00FF, inclusive.
|
||||
#[inline]
|
||||
pub fn convert_utf8_to_latin1_lossy(src: &[u8], dst: &mut [u8]) -> usize {
|
||||
assert!(
|
||||
dst.len() >= src.len(),
|
||||
"Destination must not be shorter than the source."
|
||||
);
|
||||
non_fuzz_debug_assert!(is_utf8_latin1(src));
|
||||
let src_len = src.len();
|
||||
let src_ptr = src.as_ptr();
|
||||
let dst_ptr = dst.as_mut_ptr();
|
||||
|
@ -1776,11 +1880,12 @@ pub fn convert_utf8_to_latin1_lossy(src: &[u8], dst: &mut [u8]) -> usize {
|
|||
/// represents the value of each code point as the unsigned byte value of
|
||||
/// each output byte.
|
||||
///
|
||||
/// If the input does not fulfill the condition stated above, this function
|
||||
/// does something that is memory-safe without any promises about any
|
||||
/// properties of the output. In particular, callers shouldn't assume the
|
||||
/// output to be the same across crate versions or CPU architectures and
|
||||
/// should not assume that non-Basic Latin input can't map to ASCII output.
|
||||
/// If the input does not fulfill the condition stated above, does something
|
||||
/// that is memory-safe without any promises about any properties of the
|
||||
/// output and will probably assert in debug builds in future versions.
|
||||
/// In particular, callers shouldn't assume the output to be the same across
|
||||
/// crate versions or CPU architectures and should not assume that non-ASCII
|
||||
/// input can't map to ASCII output.
|
||||
///
|
||||
/// The length of the destination buffer must be at least the length of the
|
||||
/// source buffer.
|
||||
|
@ -1790,12 +1895,16 @@ pub fn convert_utf8_to_latin1_lossy(src: &[u8], dst: &mut [u8]) -> usize {
|
|||
/// # Panics
|
||||
///
|
||||
/// Panics if the destination buffer is shorter than stated above.
|
||||
///
|
||||
/// (Probably in future versions if debug assertions are enabled (and not
|
||||
/// fuzzing) and the input is not in the range U+0000 to U+00FF, inclusive.)
|
||||
#[inline]
|
||||
pub fn convert_utf16_to_latin1_lossy(src: &[u16], dst: &mut [u8]) {
|
||||
assert!(
|
||||
dst.len() >= src.len(),
|
||||
"Destination must not be shorter than the source."
|
||||
);
|
||||
// non_fuzz_debug_assert!(is_utf16_latin1(src));
|
||||
unsafe {
|
||||
pack_latin1(src.as_ptr(), dst.as_mut_ptr(), src.len());
|
||||
}
|
||||
|
@ -2100,6 +2209,18 @@ mod tests {
|
|||
assert_eq!(dst, reference);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_utf16_to_utf8_partial() {
|
||||
let reference = "abcdefghijklmnopqrstu\u{1F4A9}v\u{2603}w\u{00B6}xyzz";
|
||||
let src: Vec<u16> = reference.encode_utf16().collect();
|
||||
let mut dst: Vec<u8> = Vec::with_capacity(src.len() * 3 + 1);
|
||||
dst.resize(src.len() * 3 + 1, 0);
|
||||
let (read, written) = convert_utf16_to_utf8_partial(&src[..], &mut dst[..24]);
|
||||
let len = written + convert_utf16_to_utf8(&src[read..], &mut dst[written..]);
|
||||
dst.truncate(len);
|
||||
assert_eq!(dst, reference.as_bytes());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_utf16_to_utf8() {
|
||||
let reference = "abcdefghijklmnopqrstu\u{1F4A9}v\u{2603}w\u{00B6}xyzz";
|
||||
|
@ -2127,6 +2248,14 @@ mod tests {
|
|||
assert_eq!(dst, reference);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_latin1_to_utf8_partial() {
|
||||
let mut dst = [0u8, 2];
|
||||
let (read, written) = convert_latin1_to_utf8_partial(b"a\xFF", &mut dst[..]);
|
||||
assert_eq!(read, 1);
|
||||
assert_eq!(written, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_latin1_to_utf8() {
|
||||
let mut src: Vec<u8> = Vec::with_capacity(256);
|
||||
|
@ -2163,6 +2292,13 @@ mod tests {
|
|||
assert_eq!(dst, reference);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_convert_utf8_to_latin1_lossy_panics() {
|
||||
let mut dst = [0u8; 16];
|
||||
let _ = convert_utf8_to_latin1_lossy("\u{100}".as_bytes(), &mut dst[..]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_utf16_to_latin1_lossy() {
|
||||
let mut src: Vec<u16> = Vec::with_capacity(256);
|
||||
|
@ -2179,6 +2315,13 @@ mod tests {
|
|||
assert_eq!(dst, reference);
|
||||
}
|
||||
|
||||
#[test]
|
||||
// #[should_panic]
|
||||
fn test_convert_utf16_to_latin1_lossy_panics() {
|
||||
let mut dst = [0u8; 16];
|
||||
let _ = convert_utf16_to_latin1_lossy(&[0x0100u16], &mut dst[..]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_utf16_valid_up_to() {
|
||||
let valid = vec![
|
||||
|
|
Загрузка…
Ссылка в новой задаче