gecko-dev/third_party/rust/ucd-util/src/hangul.rs

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

106 строки
2.9 KiB
Rust
Исходник Обычный вид История

Bug 1463251 - Part 2. Revendor dependencies. r=ato,jgraham MozReview-Commit-ID: 6eeqvTqSxam --HG-- rename : third_party/rust/memchr/.cargo-checksum.json => third_party/rust/memchr-1.0.2/.cargo-checksum.json rename : third_party/rust/memchr/Cargo.toml => third_party/rust/memchr-1.0.2/Cargo.toml rename : third_party/rust/memchr/src/lib.rs => third_party/rust/memchr-1.0.2/src/lib.rs rename : third_party/rust/regex/.cargo-checksum.json => third_party/rust/regex-0.2.2/.cargo-checksum.json rename : third_party/rust/regex/.travis.yml => third_party/rust/regex-0.2.2/.travis.yml rename : third_party/rust/regex/CHANGELOG.md => third_party/rust/regex-0.2.2/CHANGELOG.md rename : third_party/rust/regex/Cargo.toml => third_party/rust/regex-0.2.2/Cargo.toml rename : third_party/rust/regex/HACKING.md => third_party/rust/regex-0.2.2/HACKING.md rename : third_party/rust/regex/PERFORMANCE.md => third_party/rust/regex-0.2.2/PERFORMANCE.md rename : third_party/rust/regex/README.md => third_party/rust/regex-0.2.2/README.md rename : third_party/rust/regex/appveyor.yml => third_party/rust/regex-0.2.2/appveyor.yml rename : third_party/rust/regex/ci/after_success.sh => third_party/rust/regex-0.2.2/ci/after_success.sh rename : third_party/rust/regex/ci/run-kcov => third_party/rust/regex-0.2.2/ci/run-kcov rename : third_party/rust/regex/ci/script.sh => third_party/rust/regex-0.2.2/ci/script.sh rename : third_party/rust/regex/examples/bug347.rs => third_party/rust/regex-0.2.2/examples/bug347.rs rename : third_party/rust/regex/examples/shootout-regex-dna-bytes.rs => third_party/rust/regex-0.2.2/examples/shootout-regex-dna-bytes.rs rename : third_party/rust/regex/examples/shootout-regex-dna-single.rs => third_party/rust/regex-0.2.2/examples/shootout-regex-dna-single.rs rename : third_party/rust/regex/examples/shootout-regex-dna.rs => third_party/rust/regex-0.2.2/examples/shootout-regex-dna.rs rename : third_party/rust/regex/examples/shootout-regex-redux-1.rs => third_party/rust/regex-0.2.2/examples/shootout-regex-redux-1.rs rename : third_party/rust/regex/examples/shootout-regex-redux-chunked.rs => third_party/rust/regex-0.2.2/examples/shootout-regex-redux-chunked.rs rename : third_party/rust/regex/examples/shootout-regex-redux.rs => third_party/rust/regex-0.2.2/examples/shootout-regex-redux.rs rename : third_party/rust/regex/scripts/unicode.py => third_party/rust/regex-0.2.2/scripts/unicode.py rename : third_party/rust/regex/src/backtrack.rs => third_party/rust/regex-0.2.2/src/backtrack.rs rename : third_party/rust/regex/src/compile.rs => third_party/rust/regex-0.2.2/src/compile.rs rename : third_party/rust/regex/src/dfa.rs => third_party/rust/regex-0.2.2/src/dfa.rs rename : third_party/rust/regex/src/error.rs => third_party/rust/regex-0.2.2/src/error.rs rename : third_party/rust/regex/src/exec.rs => third_party/rust/regex-0.2.2/src/exec.rs rename : third_party/rust/regex/src/expand.rs => third_party/rust/regex-0.2.2/src/expand.rs rename : third_party/rust/regex/src/input.rs => third_party/rust/regex-0.2.2/src/input.rs rename : third_party/rust/regex/src/lib.rs => third_party/rust/regex-0.2.2/src/lib.rs rename : third_party/rust/regex/src/literals.rs => third_party/rust/regex-0.2.2/src/literals.rs rename : third_party/rust/regex/src/pikevm.rs => third_party/rust/regex-0.2.2/src/pikevm.rs rename : third_party/rust/regex/src/prog.rs => third_party/rust/regex-0.2.2/src/prog.rs rename : third_party/rust/regex/src/re_builder.rs => third_party/rust/regex-0.2.2/src/re_builder.rs rename : third_party/rust/regex/src/re_bytes.rs => third_party/rust/regex-0.2.2/src/re_bytes.rs rename : third_party/rust/regex/src/re_plugin.rs => third_party/rust/regex-0.2.2/src/re_plugin.rs rename : third_party/rust/regex/src/re_set.rs => third_party/rust/regex-0.2.2/src/re_set.rs rename : third_party/rust/regex/src/re_trait.rs => third_party/rust/regex-0.2.2/src/re_trait.rs rename : third_party/rust/regex/src/re_unicode.rs => third_party/rust/regex-0.2.2/src/re_unicode.rs rename : third_party/rust/regex/src/simd_accel/mod.rs => third_party/rust/regex-0.2.2/src/simd_accel/mod.rs rename : third_party/rust/regex/src/simd_accel/teddy128.rs => third_party/rust/regex-0.2.2/src/simd_accel/teddy128.rs rename : third_party/rust/regex/src/simd_fallback/mod.rs => third_party/rust/regex-0.2.2/src/simd_fallback/mod.rs rename : third_party/rust/regex/src/simd_fallback/teddy128.rs => third_party/rust/regex-0.2.2/src/simd_fallback/teddy128.rs rename : third_party/rust/regex/src/utf8.rs => third_party/rust/regex-0.2.2/src/utf8.rs rename : third_party/rust/regex/tests/api_str.rs => third_party/rust/regex-0.2.2/tests/api_str.rs rename : third_party/rust/regex/tests/bytes.rs => third_party/rust/regex-0.2.2/tests/bytes.rs rename : third_party/rust/regex/tests/crazy.rs => third_party/rust/regex-0.2.2/tests/crazy.rs rename : third_party/rust/regex/tests/macros.rs => third_party/rust/regex-0.2.2/tests/macros.rs rename : third_party/rust/regex/tests/macros_bytes.rs => third_party/rust/regex-0.2.2/tests/macros_bytes.rs rename : third_party/rust/regex/tests/macros_str.rs => third_party/rust/regex-0.2.2/tests/macros_str.rs rename : third_party/rust/regex/tests/noparse.rs => third_party/rust/regex-0.2.2/tests/noparse.rs rename : third_party/rust/regex/tests/plugin.rs => third_party/rust/regex-0.2.2/tests/plugin.rs rename : third_party/rust/regex/tests/regression.rs => third_party/rust/regex-0.2.2/tests/regression.rs rename : third_party/rust/regex/tests/replace.rs => third_party/rust/regex-0.2.2/tests/replace.rs rename : third_party/rust/regex/tests/test_default.rs => third_party/rust/regex-0.2.2/tests/test_default.rs rename : third_party/rust/regex/tests/test_default_bytes.rs => third_party/rust/regex-0.2.2/tests/test_default_bytes.rs rename : third_party/rust/regex/tests/test_plugin.rs => third_party/rust/regex-0.2.2/tests/test_plugin.rs rename : third_party/rust/regex/tests/unicode.rs => third_party/rust/regex-0.2.2/tests/unicode.rs rename : third_party/rust/regex/tests/word_boundary_unicode.rs => third_party/rust/regex-0.2.2/tests/word_boundary_unicode.rs rename : third_party/rust/regex-syntax/.cargo-checksum.json => third_party/rust/regex-syntax-0.4.1/.cargo-checksum.json rename : third_party/rust/regex-syntax/Cargo.toml => third_party/rust/regex-syntax-0.4.1/Cargo.toml rename : third_party/rust/regex-syntax/src/lib.rs => third_party/rust/regex-syntax-0.4.1/src/lib.rs rename : third_party/rust/regex-syntax/src/literals.rs => third_party/rust/regex-syntax-0.4.1/src/literals.rs rename : third_party/rust/regex-syntax/src/parser.rs => third_party/rust/regex-syntax-0.4.1/src/parser.rs rename : third_party/rust/regex-syntax/src/properties.rs => third_party/rust/regex-syntax-0.4.1/src/properties.rs rename : third_party/rust/regex-syntax/src/unicode.rs => third_party/rust/regex-syntax-0.4.1/src/unicode.rs extra : rebase_source : b0c643eb68b9262945345145ec1578c14369dbf7
2018-05-21 23:34:18 +03:00
use unicode_tables::jamo_short_name::JAMO_SHORT_NAME;
// This implementation should correspond to the algorithms described in
// Unicode 3.12.
/// A set of ranges that corresponds to the set of all Hangul syllable
/// codepoints.
///
/// These ranges are defined in Unicode 4.8 Table 4-13.
pub const RANGE_HANGUL_SYLLABLE: &'static [(u32, u32)] = &[
(0xAC00, 0xD7A3),
];
const S_BASE: u32 = 0xAC00;
const L_BASE: u32 = 0x1100;
const V_BASE: u32 = 0x1161;
const T_BASE: u32 = 0x11A7;
const T_COUNT: u32 = 28;
const N_COUNT: u32 = 588;
/// Return the character name of the given precomposed Hangul codepoint.
///
/// If the given codepoint does not correspond to a precomposed Hangul
/// codepoint in the inclusive range `AC00..D7A3`, then this returns `None`.
///
/// This implements the algorithms described in Unicode 3.12 and Unicode 4.8.
pub fn hangul_name(cp: u32) -> Option<String> {
let mut name = "HANGUL SYLLABLE ".to_string();
let (lpart, vpart, tpart) = match hangul_full_canonical_decomposition(cp) {
None => return None,
Some(triple) => triple,
};
name.push_str(jamo_short_name(lpart));
name.push_str(jamo_short_name(vpart));
name.push_str(tpart.map_or("", jamo_short_name));
Some(name)
}
/// Return the full canonical decomposition of the given precomposed Hangul
/// codepoint.
///
/// If the decomposition does not have any trailing consonant, then the third
/// part of the tuple returned is `None`.
///
/// If the given codepoint does not correspond to a precomposed Hangul
/// codepoint in the inclusive range `AC00..D7A3`, then this returns `None`.
///
/// This implements the algorithms described in Unicode 3.12 and Unicode 4.8.
pub fn hangul_full_canonical_decomposition(
cp: u32,
) -> Option<(u32, u32, Option<u32>)> {
if !(0xAC00 <= cp && cp <= 0xD7A3) {
return None;
}
let s_index = cp - S_BASE;
let l_index = s_index / N_COUNT;
let v_index = (s_index % N_COUNT) / T_COUNT;
let t_index = s_index % T_COUNT;
let l_part = L_BASE + l_index;
let v_part = V_BASE + v_index;
let t_part =
if t_index == 0 {
None
} else {
Some(T_BASE + t_index)
};
Some((l_part, v_part, t_part))
}
fn jamo_short_name(cp: u32) -> &'static str {
let i = JAMO_SHORT_NAME.binary_search_by_key(&cp, |p| p.0).unwrap();
JAMO_SHORT_NAME[i].1
}
#[cfg(test)]
mod tests {
use super::{hangul_name, hangul_full_canonical_decomposition};
#[test]
fn canon_decomp() {
assert_eq!(
hangul_full_canonical_decomposition(0xD4DB),
Some((0x1111, 0x1171, Some(0x11B6))));
}
#[test]
fn name() {
assert_eq!(hangul_name(0xD4DB).unwrap(), "HANGUL SYLLABLE PWILH");
}
#[test]
fn all() {
for cp in 0xAC00..(0xD7A3 + 1) {
hangul_name(cp).unwrap();
}
}
#[test]
fn invalid() {
assert!(hangul_name(0).is_none());
}
}