зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1495067 - Make HasRTLChars() consider Hebrew presentation forms as RTL (again) and not consider U+FEFF as RTL (again). r=jfkthame
* Update encoding_rs to 0.8.8. * Change U+FEFD and U+FEFE to RTL in IS_RTL_PRESENTATION_FORM to make the Rust and C++ code agree on what's RTL. MozReview-Commit-ID: CuK6fN4pojG Differential Revision: https://phabricator.services.mozilla.com/D7285 --HG-- extra : moz-landing-system : lando
This commit is contained in:
Родитель
e198ffe488
Коммит
b8545f2cf8
|
@ -726,21 +726,21 @@ name = "encoding_c"
|
|||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"encoding_rs 0.8.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs 0.8.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_glue"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"encoding_rs 0.8.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs 0.8.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"nserror 0.1.0",
|
||||
"nsstring 0.1.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.7"
|
||||
version = "0.8.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
@ -1604,7 +1604,7 @@ name = "nsstring"
|
|||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs 0.8.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs 0.8.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3069,7 +3069,7 @@ dependencies = [
|
|||
"checksum either 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18785c1ba806c258137c937e44ada9ee7e69a37e3c72077542cd2f069d78562a"
|
||||
"checksum ena 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "cabe5a5078ac8c506d3e4430763b1ba9b609b1286913e7d08e581d1c2de9b7e5"
|
||||
"checksum encoding_c 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "769ecb8b33323998e482b218c0d13cd64c267609023b4b7ec3ee740714c318ee"
|
||||
"checksum encoding_rs 0.8.7 (registry+https://github.com/rust-lang/crates.io-index)" = "21a550ec129ca2f8593227888625c7c5331c6ad878e2cee6b7ac25e1c7d05746"
|
||||
"checksum encoding_rs 0.8.8 (registry+https://github.com/rust-lang/crates.io-index)" = "cc9945e460ad969220c1061b9574fb02ed097c6f0704ce2f3e336cb443c40c73"
|
||||
"checksum env_logger 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0561146661ae44c579e993456bc76d11ce1e0c7d745e57b2fa7146b6e49fa2ad"
|
||||
"checksum error-chain 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ff511d5dc435d703f4971bc399647c9bc38e20cb41452e3b9feb4765419ed3f3"
|
||||
"checksum euclid 0.19.0 (registry+https://github.com/rust-lang/crates.io-index)" = "70a2ebdf55fb9d6329046e026329a55ef8fbaae5ea833f56e170beb3125a8a5f"
|
||||
|
|
|
@ -265,7 +265,7 @@ typedef enum nsCharType nsCharType;
|
|||
|
||||
#define IS_IN_BMP_RTL_BLOCK(c) ((0x590 <= (c)) && ((c) <= 0x8ff))
|
||||
#define IS_RTL_PRESENTATION_FORM(c) (((0xfb1d <= (c)) && ((c) <= 0xfdff)) || \
|
||||
((0xfe70 <= (c)) && ((c) <= 0xfefc)))
|
||||
((0xfe70 <= (c)) && ((c) <= 0xfefe)))
|
||||
#define IS_IN_SMP_RTL_BLOCK(c) (((0x10800 <= (c)) && ((c) <= 0x10fff)) || \
|
||||
((0x1e800 <= (c)) && ((c) <= 0x1eFFF)))
|
||||
// Due to the supplementary-plane RTL blocks being identifiable from the
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
[floating-first-letter-05d0.html]
|
||||
type: reftest
|
||||
disabled: https://bugzilla.mozilla.org/show_bug.cgi?id=1495674
|
||||
expected:
|
||||
FAIL
|
|
@ -0,0 +1,20 @@
|
|||
<!doctype html>
|
||||
<meta charset=utf-8>
|
||||
<title>Drop cap with U+05D0 in the document</title>
|
||||
<meta name="assert" content="The text placement within :first-line should not be affected by later presence of a right-to-left character.">
|
||||
<link rel=help href=https://drafts.csswg.org/css-pseudo-4/#first-line-styling>
|
||||
<link rel=match href=/css/selectors/floating-first-letter-ref.html>
|
||||
<style>
|
||||
p:first-line {
|
||||
background: lightblue;
|
||||
}
|
||||
|
||||
p::first-letter {
|
||||
float: left;
|
||||
font-size: 4rem;
|
||||
}
|
||||
div {
|
||||
color: transparent;
|
||||
}
|
||||
</style>
|
||||
<p>Ab</p><div>א</div>
|
|
@ -0,0 +1,17 @@
|
|||
<!doctype html>
|
||||
<meta charset=utf-8>
|
||||
<title>Drop cap with U+FEFF in the document</title>
|
||||
<meta name="assert" content="The text placement within :first-line should not be affected by later presence of U+FEFF.">
|
||||
<link rel=help href=https://drafts.csswg.org/css-pseudo-4/#first-line-styling>
|
||||
<link rel=match href=/css/selectors/floating-first-letter-ref.html>
|
||||
<style>
|
||||
p:first-line {
|
||||
background: lightblue;
|
||||
}
|
||||
|
||||
p::first-letter {
|
||||
float: left;
|
||||
font-size: 4rem;
|
||||
}
|
||||
</style>
|
||||
<p>Ab</p>
|
|
@ -0,0 +1,14 @@
|
|||
<!doctype html>
|
||||
<meta charset=utf-8>
|
||||
<title>Drop cap with no bidi in the document</title>
|
||||
<style>
|
||||
p:first-line {
|
||||
background: lightblue;
|
||||
}
|
||||
|
||||
p::first-letter {
|
||||
float: left;
|
||||
font-size: 4rem;
|
||||
}
|
||||
</style>
|
||||
<p>Ab</p>
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"git": {
|
||||
"sha1": "b67c60025bfebbf186e8b22f03edc9b6dc96df59"
|
||||
"sha1": "ffe863483bcfeb069edf645738ac3650899d2801"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
[package]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.7"
|
||||
version = "0.8.8"
|
||||
authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
|
||||
description = "A Gecko-oriented implementation of the Encoding Standard"
|
||||
homepage = "https://docs.rs/encoding_rs/"
|
||||
|
|
|
@ -244,6 +244,14 @@ used in Firefox.
|
|||
|
||||
## Release Notes
|
||||
|
||||
### 0.8.8
|
||||
|
||||
* Made the `is_foo_bidi()` not treat U+FEFF (ZERO WIDTH NO-BREAK SPACE
|
||||
aka. BYTE ORDER MARK) as right-to-left.
|
||||
* Made the `is_foo_bidi()` functions report `true` if the input contains
|
||||
Hebrew presentations forms (which are right-to-left but not in a
|
||||
right-to-left-roadmapped block).
|
||||
|
||||
### 0.8.7
|
||||
|
||||
* Fixed a panic in the UTF-16LE/UTF-16BE decoder when decoding to UTF-8.
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
// except according to those terms.
|
||||
|
||||
#![cfg_attr(feature = "cargo-clippy", allow(doc_markdown, inline_always, new_ret_no_self))]
|
||||
#![doc(html_root_url = "https://docs.rs/encoding_rs/0.8.7")]
|
||||
#![doc(html_root_url = "https://docs.rs/encoding_rs/0.8.8")]
|
||||
|
||||
//! encoding_rs is a Gecko-oriented Free Software / Open Source implementation
|
||||
//! of the [Encoding Standard](https://encoding.spec.whatwg.org/) in Rust.
|
||||
|
|
|
@ -669,11 +669,14 @@ pub fn is_utf16_latin1(buffer: &[u16]) -> bool {
|
|||
/// that trigger right-to-left processing.
|
||||
///
|
||||
/// The check is done on a Unicode block basis without regard to assigned
|
||||
/// vs. unassigned code points in the block. Additionally, the four
|
||||
/// RIGHT-TO-LEFT FOO controls in General Punctuation are checked for.
|
||||
/// Control characters that are technically bidi controls but do not cause
|
||||
/// right-to-left behavior without the presence of right-to-left characters
|
||||
/// or right-to-left controls are not checked for.
|
||||
/// vs. unassigned code points in the block. Hebrew presentation forms in
|
||||
/// the Alphabetic Presentation Forms block are treated as if they formed
|
||||
/// a block on their own (i.e. it treated as right-to-left). Additionally,
|
||||
/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
|
||||
/// for. Control characters that are technically bidi controls but do not
|
||||
/// cause right-to-left behavior without the presence of right-to-left
|
||||
/// characters or right-to-left controls are not checked for. As a special
|
||||
/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
|
||||
///
|
||||
/// Returns `true` if the input is invalid UTF-8 or the input contains an
|
||||
/// RTL character. Returns `false` if the input is valid UTF-8 and contains
|
||||
|
@ -699,15 +702,15 @@ pub fn is_utf8_bidi(buffer: &[u8]) -> bool {
|
|||
// U+202E: E2 80 AE
|
||||
// U+2067: E2 81 A7
|
||||
//
|
||||
// U+FB4F: EF AD 8F
|
||||
// U+FB50: EF AD 90
|
||||
// U+FB1C: EF AC 9C
|
||||
// U+FB1D: EF AC 9D
|
||||
// U+FDFF: EF B7 BF
|
||||
// U+FE00: EF B8 80
|
||||
//
|
||||
// U+FE6F: EF B9 AF
|
||||
// U+FE70: EF B9 B0
|
||||
// U+FEFE: EF BB BE
|
||||
// U+FEFF: EF BB BF
|
||||
// U+FF00: EF BC 80
|
||||
//
|
||||
// U+107FF: F0 90 9F BF
|
||||
// U+10800: F0 90 A0 80
|
||||
|
@ -797,9 +800,9 @@ pub fn is_utf8_bidi(buffer: &[u8]) -> bool {
|
|||
{
|
||||
return true;
|
||||
}
|
||||
if in_inclusive_range8(second, 0xAD, 0xB7) {
|
||||
if second == 0xAD {
|
||||
if third > 0x8F {
|
||||
if in_inclusive_range8(second, 0xAC, 0xB7) {
|
||||
if second == 0xAC {
|
||||
if third > 0x9C {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
|
@ -810,6 +813,10 @@ pub fn is_utf8_bidi(buffer: &[u8]) -> bool {
|
|||
if third > 0xAF {
|
||||
return true;
|
||||
}
|
||||
} else if second == 0xBB {
|
||||
if third != 0xBF {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
|
@ -1013,9 +1020,9 @@ pub fn is_utf8_bidi(buffer: &[u8]) -> bool {
|
|||
{
|
||||
return true;
|
||||
}
|
||||
if in_inclusive_range8(second, 0xAD, 0xB7) {
|
||||
if second == 0xAD {
|
||||
if third > 0x8F {
|
||||
if in_inclusive_range8(second, 0xAC, 0xB7) {
|
||||
if second == 0xAC {
|
||||
if third > 0x9C {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
|
@ -1026,6 +1033,10 @@ pub fn is_utf8_bidi(buffer: &[u8]) -> bool {
|
|||
if third > 0xAF {
|
||||
return true;
|
||||
}
|
||||
} else if second == 0xBB {
|
||||
if third != 0xBF {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
|
@ -1083,11 +1094,14 @@ pub fn is_utf8_bidi(buffer: &[u8]) -> bool {
|
|||
/// right-to-left processing.
|
||||
///
|
||||
/// The check is done on a Unicode block basis without regard to assigned
|
||||
/// vs. unassigned code points in the block. Additionally, the four
|
||||
/// RIGHT-TO-LEFT FOO controls in General Punctuation are checked for.
|
||||
/// Control characters that are technically bidi controls but do not cause
|
||||
/// right-to-left behavior without the presence of right-to-left characters
|
||||
/// or right-to-left controls are not checked for.
|
||||
/// vs. unassigned code points in the block. Hebrew presentation forms in
|
||||
/// the Alphabetic Presentation Forms block are treated as if they formed
|
||||
/// a block on their own (i.e. it treated as right-to-left). Additionally,
|
||||
/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
|
||||
/// for. Control characters that are technically bidi controls but do not
|
||||
/// cause right-to-left behavior without the presence of right-to-left
|
||||
/// characters or right-to-left controls are not checked for. As a special
|
||||
/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
|
||||
#[inline]
|
||||
pub fn is_str_bidi(buffer: &str) -> bool {
|
||||
// U+058F: D6 8F
|
||||
|
@ -1100,15 +1114,15 @@ pub fn is_str_bidi(buffer: &str) -> bool {
|
|||
// U+202E: E2 80 AE
|
||||
// U+2067: E2 81 A7
|
||||
//
|
||||
// U+FB4F: EF AD 8F
|
||||
// U+FB50: EF AD 90
|
||||
// U+FB1C: EF AC 9C
|
||||
// U+FB1D: EF AC 9D
|
||||
// U+FDFF: EF B7 BF
|
||||
// U+FE00: EF B8 80
|
||||
//
|
||||
// U+FE6F: EF B9 AF
|
||||
// U+FE70: EF B9 B0
|
||||
// U+FEFE: EF BB BE
|
||||
// U+FEFF: EF BB BF
|
||||
// U+FF00: EF BC 80
|
||||
//
|
||||
// U+107FF: F0 90 9F BF
|
||||
// U+10800: F0 90 A0 80
|
||||
|
@ -1178,10 +1192,10 @@ pub fn is_str_bidi(buffer: &str) -> bool {
|
|||
}
|
||||
} else {
|
||||
debug_assert_eq!(byte, 0xEF);
|
||||
if in_inclusive_range8(second, 0xAD, 0xB7) {
|
||||
if second == 0xAD {
|
||||
if in_inclusive_range8(second, 0xAC, 0xB7) {
|
||||
if second == 0xAC {
|
||||
let third = bytes[read + 2];
|
||||
if third > 0x8F {
|
||||
if third > 0x9C {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
|
@ -1193,6 +1207,11 @@ pub fn is_str_bidi(buffer: &str) -> bool {
|
|||
if third > 0xAF {
|
||||
return true;
|
||||
}
|
||||
} else if second == 0xBB {
|
||||
let third = bytes[read + 2];
|
||||
if third != 0xBF {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
|
@ -1230,11 +1249,14 @@ pub fn is_str_bidi(buffer: &str) -> bool {
|
|||
/// right-to-left processing.
|
||||
///
|
||||
/// The check is done on a Unicode block basis without regard to assigned
|
||||
/// vs. unassigned code points in the block. Additionally, the four
|
||||
/// RIGHT-TO-LEFT FOO controls in General Punctuation are checked for.
|
||||
/// Control characters that are technically bidi controls but do not cause
|
||||
/// right-to-left behavior without the presence of right-to-left characters
|
||||
/// or right-to-left controls are not checked for.
|
||||
/// vs. unassigned code points in the block. Hebrew presentation forms in
|
||||
/// the Alphabetic Presentation Forms block are treated as if they formed
|
||||
/// a block on their own (i.e. it treated as right-to-left). Additionally,
|
||||
/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
|
||||
/// for. Control characters that are technically bidi controls but do not
|
||||
/// cause right-to-left behavior without the presence of right-to-left
|
||||
/// characters or right-to-left controls are not checked for. As a special
|
||||
/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
|
||||
///
|
||||
/// Returns `true` if the input contains an RTL character or an unpaired
|
||||
/// high surrogate that could be the high half of an RTL character.
|
||||
|
@ -1248,11 +1270,14 @@ pub fn is_utf16_bidi(buffer: &[u16]) -> bool {
|
|||
/// Checks whether a code point triggers right-to-left processing.
|
||||
///
|
||||
/// The check is done on a Unicode block basis without regard to assigned
|
||||
/// vs. unassigned code points in the block. Additionally, the four
|
||||
/// RIGHT-TO-LEFT FOO controls in General Punctuation are checked for.
|
||||
/// Control characters that are technically bidi controls but do not cause
|
||||
/// right-to-left behavior without the presence of right-to-left characters
|
||||
/// or right-to-left controls are not checked for.
|
||||
/// vs. unassigned code points in the block. Hebrew presentation forms in
|
||||
/// the Alphabetic Presentation Forms block are treated as if they formed
|
||||
/// a block on their own (i.e. it treated as right-to-left). Additionally,
|
||||
/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
|
||||
/// for. Control characters that are technically bidi controls but do not
|
||||
/// cause right-to-left behavior without the presence of right-to-left
|
||||
/// characters or right-to-left controls are not checked for. As a special
|
||||
/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
|
||||
#[inline(always)]
|
||||
pub fn is_char_bidi(c: char) -> bool {
|
||||
// Controls:
|
||||
|
@ -1266,8 +1291,9 @@ pub fn is_char_bidi(c: char) -> bool {
|
|||
// BMP RTL:
|
||||
// https://www.unicode.org/roadmaps/bmp/
|
||||
// U+0590...U+08FF
|
||||
// U+FB50...U+FDFF Arabic Presentation Forms A
|
||||
// U+FE70...U+FEFF Arabic Presentation Forms B
|
||||
// U+FB1D...U+FDFF Hebrew presentation forms and
|
||||
// Arabic Presentation Forms A
|
||||
// U+FE70...U+FEFE Arabic Presentation Forms B (excl. BOM)
|
||||
//
|
||||
// Supplementary RTL:
|
||||
// https://www.unicode.org/roadmaps/smp/
|
||||
|
@ -1278,8 +1304,8 @@ pub fn is_char_bidi(c: char) -> bool {
|
|||
// Below Hebrew
|
||||
return false;
|
||||
}
|
||||
if in_range32(code_point, 0x0900, 0xFB50) {
|
||||
// Above Arabic Extended-A and below Arabic Presentation Forms
|
||||
if in_range32(code_point, 0x0900, 0xFB1D) {
|
||||
// Above Arabic Extended-A and below Hebrew presentation forms
|
||||
if in_inclusive_range32(code_point, 0x200F, 0x2067) {
|
||||
// In the range that contains the RTL controls
|
||||
return code_point == 0x200F
|
||||
|
@ -1297,8 +1323,8 @@ pub fn is_char_bidi(c: char) -> bool {
|
|||
// Between astral RTL blocks
|
||||
return false;
|
||||
}
|
||||
if in_range32(code_point, 0xFF00, 0x10800) {
|
||||
// Above Arabic Presentations Forms B and below first
|
||||
if in_range32(code_point, 0xFEFF, 0x10800) {
|
||||
// Above Arabic Presentations Forms B (excl. BOM) and below first
|
||||
// astral RTL
|
||||
return false;
|
||||
}
|
||||
|
@ -1312,11 +1338,14 @@ pub fn is_char_bidi(c: char) -> bool {
|
|||
/// Checks whether a UTF-16 code unit triggers right-to-left processing.
|
||||
///
|
||||
/// The check is done on a Unicode block basis without regard to assigned
|
||||
/// vs. unassigned code points in the block. Additionally, the four
|
||||
/// RIGHT-TO-LEFT FOO controls in General Punctuation are checked for.
|
||||
/// Control characters that are technically bidi controls but do not cause
|
||||
/// right-to-left behavior without the presence of right-to-left characters
|
||||
/// or right-to-left controls are not checked for.
|
||||
/// vs. unassigned code points in the block. Hebrew presentation forms in
|
||||
/// the Alphabetic Presentation Forms block are treated as if they formed
|
||||
/// a block on their own (i.e. it treated as right-to-left). Additionally,
|
||||
/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
|
||||
/// for. Control characters that are technically bidi controls but do not
|
||||
/// cause right-to-left behavior without the presence of right-to-left
|
||||
/// characters or right-to-left controls are not checked for. As a special
|
||||
/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
|
||||
///
|
||||
/// Since supplementary-plane right-to-left blocks are identifiable from the
|
||||
/// high surrogate without examining the low surrogate, this function returns
|
||||
|
@ -1338,8 +1367,8 @@ pub fn is_utf16_code_unit_bidi(u: u16) -> bool {
|
|||
}
|
||||
return false;
|
||||
}
|
||||
if in_range16(u, 0xD83C, 0xFB50) {
|
||||
// Between astral RTL high surrogates and Arabic Presentation Forms
|
||||
if in_range16(u, 0xD83C, 0xFB1D) {
|
||||
// Between astral RTL high surrogates and Hebrew presentation forms
|
||||
// (Emoji is here)
|
||||
return false;
|
||||
}
|
||||
|
@ -1347,8 +1376,8 @@ pub fn is_utf16_code_unit_bidi(u: u16) -> bool {
|
|||
// Between RTL high surragates
|
||||
return false;
|
||||
}
|
||||
if u > 0xFEFF {
|
||||
// Above Arabic Presentation Forms
|
||||
if u > 0xFEFE {
|
||||
// Above Arabic Presentation Forms (excl. BOM)
|
||||
return false;
|
||||
}
|
||||
if in_range16(u, 0xFE00, 0xFE70) {
|
||||
|
@ -2370,13 +2399,14 @@ mod tests {
|
|||
assert!(!is_char_bidi('\u{1F4A9}'));
|
||||
assert!(!is_char_bidi('\u{FE00}'));
|
||||
assert!(!is_char_bidi('\u{202C}'));
|
||||
assert!(!is_char_bidi('\u{FEFF}'));
|
||||
assert!(is_char_bidi('\u{0590}'));
|
||||
assert!(is_char_bidi('\u{08FF}'));
|
||||
assert!(is_char_bidi('\u{061C}'));
|
||||
assert!(is_char_bidi('\u{FB50}'));
|
||||
assert!(is_char_bidi('\u{FDFF}'));
|
||||
assert!(is_char_bidi('\u{FE70}'));
|
||||
assert!(is_char_bidi('\u{FEFF}'));
|
||||
assert!(is_char_bidi('\u{FEFE}'));
|
||||
assert!(is_char_bidi('\u{200F}'));
|
||||
assert!(is_char_bidi('\u{202B}'));
|
||||
assert!(is_char_bidi('\u{202E}'));
|
||||
|
@ -2395,13 +2425,15 @@ mod tests {
|
|||
assert!(!is_utf16_code_unit_bidi(0xD801));
|
||||
assert!(!is_utf16_code_unit_bidi(0xFE00));
|
||||
assert!(!is_utf16_code_unit_bidi(0x202C));
|
||||
assert!(!is_utf16_code_unit_bidi(0xFEFF));
|
||||
assert!(is_utf16_code_unit_bidi(0x0590));
|
||||
assert!(is_utf16_code_unit_bidi(0x08FF));
|
||||
assert!(is_utf16_code_unit_bidi(0x061C));
|
||||
assert!(is_utf16_code_unit_bidi(0xFB1D));
|
||||
assert!(is_utf16_code_unit_bidi(0xFB50));
|
||||
assert!(is_utf16_code_unit_bidi(0xFDFF));
|
||||
assert!(is_utf16_code_unit_bidi(0xFE70));
|
||||
assert!(is_utf16_code_unit_bidi(0xFEFF));
|
||||
assert!(is_utf16_code_unit_bidi(0xFEFE));
|
||||
assert!(is_utf16_code_unit_bidi(0x200F));
|
||||
assert!(is_utf16_code_unit_bidi(0x202B));
|
||||
assert!(is_utf16_code_unit_bidi(0x202E));
|
||||
|
@ -2420,13 +2452,14 @@ mod tests {
|
|||
assert!(!is_str_bidi("abcdefghijklmnop\u{1F4A9}abcdefghijklmnop"));
|
||||
assert!(!is_str_bidi("abcdefghijklmnop\u{FE00}abcdefghijklmnop"));
|
||||
assert!(!is_str_bidi("abcdefghijklmnop\u{202C}abcdefghijklmnop"));
|
||||
assert!(!is_str_bidi("abcdefghijklmnop\u{FEFF}abcdefghijklmnop"));
|
||||
assert!(is_str_bidi("abcdefghijklmnop\u{0590}abcdefghijklmnop"));
|
||||
assert!(is_str_bidi("abcdefghijklmnop\u{08FF}abcdefghijklmnop"));
|
||||
assert!(is_str_bidi("abcdefghijklmnop\u{061C}abcdefghijklmnop"));
|
||||
assert!(is_str_bidi("abcdefghijklmnop\u{FB50}abcdefghijklmnop"));
|
||||
assert!(is_str_bidi("abcdefghijklmnop\u{FDFF}abcdefghijklmnop"));
|
||||
assert!(is_str_bidi("abcdefghijklmnop\u{FE70}abcdefghijklmnop"));
|
||||
assert!(is_str_bidi("abcdefghijklmnop\u{FEFF}abcdefghijklmnop"));
|
||||
assert!(is_str_bidi("abcdefghijklmnop\u{FEFE}abcdefghijklmnop"));
|
||||
assert!(is_str_bidi("abcdefghijklmnop\u{200F}abcdefghijklmnop"));
|
||||
assert!(is_str_bidi("abcdefghijklmnop\u{202B}abcdefghijklmnop"));
|
||||
assert!(is_str_bidi("abcdefghijklmnop\u{202E}abcdefghijklmnop"));
|
||||
|
@ -2457,6 +2490,9 @@ mod tests {
|
|||
assert!(!is_utf8_bidi(
|
||||
"abcdefghijklmnop\u{202C}abcdefghijklmnop".as_bytes()
|
||||
));
|
||||
assert!(!is_utf8_bidi(
|
||||
"abcdefghijklmnop\u{FEFF}abcdefghijklmnop".as_bytes()
|
||||
));
|
||||
assert!(is_utf8_bidi(
|
||||
"abcdefghijklmnop\u{0590}abcdefghijklmnop".as_bytes()
|
||||
));
|
||||
|
@ -2476,7 +2512,7 @@ mod tests {
|
|||
"abcdefghijklmnop\u{FE70}abcdefghijklmnop".as_bytes()
|
||||
));
|
||||
assert!(is_utf8_bidi(
|
||||
"abcdefghijklmnop\u{FEFF}abcdefghijklmnop".as_bytes()
|
||||
"abcdefghijklmnop\u{FEFE}abcdefghijklmnop".as_bytes()
|
||||
));
|
||||
assert!(is_utf8_bidi(
|
||||
"abcdefghijklmnop\u{200F}abcdefghijklmnop".as_bytes()
|
||||
|
@ -2530,6 +2566,10 @@ mod tests {
|
|||
0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x202C, 0x62, 0x63, 0x64, 0x65, 0x66,
|
||||
0x67, 0x68, 0x69,
|
||||
]));
|
||||
assert!(!is_utf16_bidi(&[
|
||||
0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFEFF, 0x62, 0x63, 0x64, 0x65, 0x66,
|
||||
0x67, 0x68, 0x69,
|
||||
]));
|
||||
assert!(is_utf16_bidi(&[
|
||||
0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x0590, 0x62, 0x63, 0x64, 0x65, 0x66,
|
||||
0x67, 0x68, 0x69,
|
||||
|
@ -2542,6 +2582,10 @@ mod tests {
|
|||
0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x061C, 0x62, 0x63, 0x64, 0x65, 0x66,
|
||||
0x67, 0x68, 0x69,
|
||||
]));
|
||||
assert!(is_utf16_bidi(&[
|
||||
0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFB1D, 0x62, 0x63, 0x64, 0x65, 0x66,
|
||||
0x67, 0x68, 0x69,
|
||||
]));
|
||||
assert!(is_utf16_bidi(&[
|
||||
0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFB50, 0x62, 0x63, 0x64, 0x65, 0x66,
|
||||
0x67, 0x68, 0x69,
|
||||
|
@ -2555,7 +2599,7 @@ mod tests {
|
|||
0x67, 0x68, 0x69,
|
||||
]));
|
||||
assert!(is_utf16_bidi(&[
|
||||
0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFEFF, 0x62, 0x63, 0x64, 0x65, 0x66,
|
||||
0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFEFE, 0x62, 0x63, 0x64, 0x65, 0x66,
|
||||
0x67, 0x68, 0x69,
|
||||
]));
|
||||
assert!(is_utf16_bidi(&[
|
||||
|
@ -2623,6 +2667,10 @@ mod tests {
|
|||
check_str_for_latin1_and_bidi("abcdefghijklmnop\u{202C}abcdefghijklmnop"),
|
||||
Latin1Bidi::Bidi
|
||||
);
|
||||
assert_ne!(
|
||||
check_str_for_latin1_and_bidi("abcdefghijklmnop\u{FEFF}abcdefghijklmnop"),
|
||||
Latin1Bidi::Bidi
|
||||
);
|
||||
assert_eq!(
|
||||
check_str_for_latin1_and_bidi("abcdefghijklmnop\u{0590}abcdefghijklmnop"),
|
||||
Latin1Bidi::Bidi
|
||||
|
@ -2648,7 +2696,7 @@ mod tests {
|
|||
Latin1Bidi::Bidi
|
||||
);
|
||||
assert_eq!(
|
||||
check_str_for_latin1_and_bidi("abcdefghijklmnop\u{FEFF}abcdefghijklmnop"),
|
||||
check_str_for_latin1_and_bidi("abcdefghijklmnop\u{FEFE}abcdefghijklmnop"),
|
||||
Latin1Bidi::Bidi
|
||||
);
|
||||
assert_eq!(
|
||||
|
@ -2711,6 +2759,10 @@ mod tests {
|
|||
check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{202C}abcdefghijklmnop".as_bytes()),
|
||||
Latin1Bidi::Bidi
|
||||
);
|
||||
assert_ne!(
|
||||
check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{FEFF}abcdefghijklmnop".as_bytes()),
|
||||
Latin1Bidi::Bidi
|
||||
);
|
||||
assert_eq!(
|
||||
check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{0590}abcdefghijklmnop".as_bytes()),
|
||||
Latin1Bidi::Bidi
|
||||
|
@ -2736,7 +2788,7 @@ mod tests {
|
|||
Latin1Bidi::Bidi
|
||||
);
|
||||
assert_eq!(
|
||||
check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{FEFF}abcdefghijklmnop".as_bytes()),
|
||||
check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{FEFE}abcdefghijklmnop".as_bytes()),
|
||||
Latin1Bidi::Bidi
|
||||
);
|
||||
assert_eq!(
|
||||
|
@ -2817,6 +2869,13 @@ mod tests {
|
|||
]),
|
||||
Latin1Bidi::Bidi
|
||||
);
|
||||
assert_ne!(
|
||||
check_utf16_for_latin1_and_bidi(&[
|
||||
0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFEFF, 0x62, 0x63, 0x64, 0x65,
|
||||
0x66, 0x67, 0x68, 0x69,
|
||||
]),
|
||||
Latin1Bidi::Bidi
|
||||
);
|
||||
assert_eq!(
|
||||
check_utf16_for_latin1_and_bidi(&[
|
||||
0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x0590, 0x62, 0x63, 0x64, 0x65,
|
||||
|
@ -2838,6 +2897,13 @@ mod tests {
|
|||
]),
|
||||
Latin1Bidi::Bidi
|
||||
);
|
||||
assert_eq!(
|
||||
check_utf16_for_latin1_and_bidi(&[
|
||||
0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFB1D, 0x62, 0x63, 0x64, 0x65,
|
||||
0x66, 0x67, 0x68, 0x69,
|
||||
]),
|
||||
Latin1Bidi::Bidi
|
||||
);
|
||||
assert_eq!(
|
||||
check_utf16_for_latin1_and_bidi(&[
|
||||
0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFB50, 0x62, 0x63, 0x64, 0x65,
|
||||
|
@ -2861,7 +2927,7 @@ mod tests {
|
|||
);
|
||||
assert_eq!(
|
||||
check_utf16_for_latin1_and_bidi(&[
|
||||
0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFEFF, 0x62, 0x63, 0x64, 0x65,
|
||||
0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFEFE, 0x62, 0x63, 0x64, 0x65,
|
||||
0x66, 0x67, 0x68, 0x69,
|
||||
]),
|
||||
Latin1Bidi::Bidi
|
||||
|
@ -2936,8 +3002,8 @@ mod tests {
|
|||
pub fn reference_is_char_bidi(c: char) -> bool {
|
||||
match c {
|
||||
'\u{0590}'...'\u{08FF}'
|
||||
| '\u{FB50}'...'\u{FDFF}'
|
||||
| '\u{FE70}'...'\u{FEFF}'
|
||||
| '\u{FB1D}'...'\u{FDFF}'
|
||||
| '\u{FE70}'...'\u{FEFE}'
|
||||
| '\u{10800}'...'\u{10FFF}'
|
||||
| '\u{1E800}'...'\u{1EFFF}'
|
||||
| '\u{200F}'
|
||||
|
@ -2952,8 +3018,8 @@ mod tests {
|
|||
pub fn reference_is_utf16_code_unit_bidi(u: u16) -> bool {
|
||||
match u {
|
||||
0x0590...0x08FF
|
||||
| 0xFB50...0xFDFF
|
||||
| 0xFE70...0xFEFF
|
||||
| 0xFB1D...0xFDFF
|
||||
| 0xFE70...0xFEFE
|
||||
| 0xD802
|
||||
| 0xD803
|
||||
| 0xD83A
|
||||
|
@ -3049,6 +3115,19 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_utf16_bidi_thoroughly() {
|
||||
let mut buf = [0; 32];
|
||||
for i in 0..0x10000u32 {
|
||||
let u = i as u16;
|
||||
buf[15] = u;
|
||||
assert_eq!(
|
||||
is_utf16_bidi(&buf[..]),
|
||||
reference_is_utf16_code_unit_bidi(u)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_utf8_bidi_edge_cases() {
|
||||
assert!(!is_utf8_bidi(b"\xD5\xBF\x61"));
|
||||
|
|
|
@ -278,8 +278,8 @@ pub fn is_u16x8_bidi(s: u16x8) -> bool {
|
|||
// Quick refutation failed. Let's do the full check.
|
||||
|
||||
(in_range16x8!(s, 0x0590, 0x0900)
|
||||
| in_range16x8!(s, 0xFB50, 0xFE00)
|
||||
| in_range16x8!(s, 0xFE70, 0xFF00)
|
||||
| in_range16x8!(s, 0xFB1D, 0xFE00)
|
||||
| in_range16x8!(s, 0xFE70, 0xFEFF)
|
||||
| in_range16x8!(s, 0xD802, 0xD804)
|
||||
| in_range16x8!(s, 0xD83A, 0xD83C)
|
||||
| s.eq(u16x8::splat(0x200F))
|
||||
|
|
|
@ -401,4 +401,58 @@ mod tests {
|
|||
assert_eq!(output[0], 0xFFFD);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_utf_16le_decode_near_end() {
|
||||
let mut output = [0u8; 4];
|
||||
let mut decoder = UTF_16LE.new_decoder();
|
||||
{
|
||||
let (result, read, written, had_errors) =
|
||||
decoder.decode_to_utf8(&[0x03], &mut output[..], false);
|
||||
assert_eq!(result, CoderResult::InputEmpty);
|
||||
assert_eq!(read, 1);
|
||||
assert_eq!(written, 0);
|
||||
assert!(!had_errors);
|
||||
assert_eq!(output[0], 0x0);
|
||||
}
|
||||
{
|
||||
let (result, read, written, had_errors) =
|
||||
decoder.decode_to_utf8(&[0x26, 0x03, 0x26], &mut output[..], false);
|
||||
assert_eq!(result, CoderResult::OutputFull);
|
||||
assert_eq!(read, 1);
|
||||
assert_eq!(written, 3);
|
||||
assert!(!had_errors);
|
||||
assert_eq!(output[0], 0xE2);
|
||||
assert_eq!(output[1], 0x98);
|
||||
assert_eq!(output[2], 0x83);
|
||||
assert_eq!(output[3], 0x00);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_utf_16be_decode_near_end() {
|
||||
let mut output = [0u8; 4];
|
||||
let mut decoder = UTF_16BE.new_decoder();
|
||||
{
|
||||
let (result, read, written, had_errors) =
|
||||
decoder.decode_to_utf8(&[0x26], &mut output[..], false);
|
||||
assert_eq!(result, CoderResult::InputEmpty);
|
||||
assert_eq!(read, 1);
|
||||
assert_eq!(written, 0);
|
||||
assert!(!had_errors);
|
||||
assert_eq!(output[0], 0x0);
|
||||
}
|
||||
{
|
||||
let (result, read, written, had_errors) =
|
||||
decoder.decode_to_utf8(&[0x03, 0x26, 0x03], &mut output[..], false);
|
||||
assert_eq!(result, CoderResult::OutputFull);
|
||||
assert_eq!(read, 1);
|
||||
assert_eq!(written, 3);
|
||||
assert!(!had_errors);
|
||||
assert_eq!(output[0], 0xE2);
|
||||
assert_eq!(output[1], 0x98);
|
||||
assert_eq!(output[2], 0x83);
|
||||
assert_eq!(output[3], 0x00);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче