Bug 1716518 - Upgrade regex to v1.5.4 and regex-syntax to v0.6.25. r=emilio

This removes thread_local.

Differential Revision: https://phabricator.services.mozilla.com/D117843
This commit is contained in:
Mike Hommey 2021-06-15 22:17:31 +00:00
Родитель 3a91162145
Коммит 8eadacb968
99 изменённых файлов: 3765 добавлений и 4141 удалений

18
Cargo.lock сгенерированный
Просмотреть файл

@ -4178,21 +4178,20 @@ dependencies = [
[[package]]
name = "regex"
version = "1.3.3"
version = "1.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5508c1941e4e7cb19965abef075d35a9a8b5cdf0846f30b4050e9b55dc55e87"
checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
"thread_local",
]
[[package]]
name = "regex-syntax"
version = "0.6.12"
version = "0.6.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11a7e20d1cce64ef2fed88b66d347f88bd9babb82845b2b858f3edbf59a4f716"
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
[[package]]
name = "remote"
@ -5089,15 +5088,6 @@ dependencies = [
"syn",
]
[[package]]
name = "thread_local"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"
dependencies = [
"lazy_static",
]
[[package]]
name = "threadbound"
version = "0.1.0"

Просмотреть файл

@ -1 +1 @@
{"files":{"Cargo.toml":"ba410e4d856743cb87fa471d2ba2e3b14cd35aa816a04a213463fc9c6b9a2111","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","README.md":"7e9a134c72b09540e6f81f02133c5ae7d35067ea6fec44a94a1ebd20af47b151","benches/bench.rs":"f04160a876ee69bc9938bf51227513d6dbf3608643bc8ae422200f7ffc5ca85f","src/ast/mod.rs":"6eb24ba078c25ed59ceefa2d57e6c1b4c621d87d327335ea9a03049f0d4b5d44","src/ast/parse.rs":"a109a7d3ef793fb57277b24bd7791357879836231e322405a52348487433927b","src/ast/print.rs":"b075392a6d5b48713e09aa510199bb7380aca2cf09fa4bc0efb5c49782630dda","src/ast/visitor.rs":"fb1489ed5ce019091dde244acd8b027e391be442aef3a9033c785c81a4c251fb","src/either.rs":"1758e3edd056884eccadd995708d1e374ba9aa65846bd0e13b1aae852607c560","src/error.rs":"a6904d4081379b96853570e32e0aa82d84fc49d5059c48cf801566ff483bedb1","src/hir/interval.rs":"fcd0babe3bddbe411e04adff7f4d8855db1d6aaa7d8e2180bba819abad576736","src/hir/literal/mod.rs":"b0a01a3d7e524277ada88d5b58efcced498a53addfac69355fcc368c4c4dfb0f","src/hir/mod.rs":"73c4cbb48dead01bd03e2da54e1685b620be3fe4b062a9f5b76a93a3bb52236c","src/hir/print.rs":"1bd12a70e5876d85eb2188d83c4b71c9533dc6fff0c1ab5c2b0e4701de7e7a90","src/hir/translate.rs":"bdc82b7aa6e71b9a12092e5e136f4cb2f4965c13183001e7994724bf39eafa8f","src/hir/visitor.rs":"203dbe93e4a8cde395c6ff5a0eb98c9c3737bc5ea11fe7163f5e7bf1babc1f69","src/lib.rs":"db6fd6a65ea30a5b3b1b45c68c17f521302d3408bfe4cec77115913dd25ae072","src/parser.rs":"10cc145d79c275c7e19b8cc9078754f23fc1da9a2c3a2e56041a8616d5f85dea","src/unicode.rs":"873e817b3a8bff11b69eb5053b55f6d1a5f9357758d9e2aedfdc7d833b817c80","src/unicode_tables/LICENSE-UNICODE":"74db5baf44a41b1000312c673544b3374e4198af5605c7f9080a402cec42cfa3","src/unicode_tables/age.rs":"752194f2cb98c483cd98affcbface39431b8039645cc59e8f584a8dde34b34ff","src/unicode_tables/case_folding_simple.rs":"5f4fa71e8abdd01a711247b2c00b46cb4b12e0139b1abcee4be557d127e705fb","src/unicode_tables/general_category.rs":"59423c66260e21c505a901507d6bdd4288f1a1d76362bfae7d7478b943894fe5","src/unicode_tables/grapheme_cluster_break.rs":"d40127918f6015c46b6060c387a5fc2ee083f8d4c2e5ece5bff57ea1d6d031ef","src/unicode_tables/mod.rs":"26c837099cd934c8062e24bc9a0aaecf15fe1de03f9c6da3f3e1e5ac3ca24bee","src/unicode_tables/perl_decimal.rs":"f3ea734f43b123996f8a2c66d54c2b70ded9d333e2e8338bf895ef0f9ec7578e","src/unicode_tables/perl_space.rs":"3304ab6555e950198f9b1714c9a293c7ad80659c2389edb6b56df174a7d317e5","src/unicode_tables/perl_word.rs":"9b493901ebd3d80ed7b26389e9f2a244108ab7eb8a219418e19d5dc040ff52b1","src/unicode_tables/property_bool.rs":"2ae8df389456d0267cc7198420fdd16446f0a5bccda3a73a755a3763975695e2","src/unicode_tables/property_names.rs":"849ff2209af572ef3edeb8a84653098bd38c2497a06758a92ef798b7ffbfb4c8","src/unicode_tables/property_values.rs":"2af9239fbb3ec2458b17a7ed16f3a27a11ae574ee6c9366d3b6768e0560ba134","src/unicode_tables/script.rs":"3cb7442ee2460dce4ab7790801429408e55d8c3b19eac8b32560d693710a7533","src/unicode_tables/script_extension.rs":"71e9dd03f311945225540b2d984a17d224edf051069e31bf834a07382135bf7d","src/unicode_tables/sentence_break.rs":"314401cbbb1afb77b1b2ebcc0e44cb0e4cb7571469d288336d43812c4eeb3d90","src/unicode_tables/word_break.rs":"d03974a4b557670c9d0ac7a3e46175f036bcd258cd188e94af5798dea48cf082","src/utf8.rs":"68353f4303364d058426311893c786ea4b89076978abd11448e5bb4b8cc04a29","test":"9d0bc68616c249f317e783e5083102d2645a6ade3de735e8d8a414e97eaa76d0"},"package":"11a7e20d1cce64ef2fed88b66d347f88bd9babb82845b2b858f3edbf59a4f716"}
{"files":{"Cargo.toml":"f15a235fff5192b488e6259ed785c77cdab87f77ce17de1c91c997c622379722","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","README.md":"7e9a134c72b09540e6f81f02133c5ae7d35067ea6fec44a94a1ebd20af47b151","benches/bench.rs":"d2b6ae5b939abd6093064f144b981b7739d7f474ec0698a1268052fc92406635","src/ast/mod.rs":"b0fe9af7ae15d9448246c204977634e5827bbae247bd59ab2e61411996fc68d6","src/ast/parse.rs":"ed3badf248937c81d280f3f3e7573264e3d3587300bcf959579c42c47518d929","src/ast/print.rs":"521d7abeec518f85fe47803347348ebf08364308ebfa614b5eb471c84af43670","src/ast/visitor.rs":"8ffcad13eb2c2a2f745f7bc8d823bd2f0bb728bd150f439455be5a245731f1d2","src/either.rs":"1758e3edd056884eccadd995708d1e374ba9aa65846bd0e13b1aae852607c560","src/error.rs":"cc99a11392b52f7665ff5ee8ea350f7386ed7c6c6bedd46e216b2f396785317f","src/hir/interval.rs":"2ffab258f204fe47bc5fe9ca84376fcd9ecb4929649f683a9412f2e382e908dc","src/hir/literal/mod.rs":"79aa42009de070058a6388e587bfaa98706f8dd61ee1dca70f23d440f5d8bb70","src/hir/mod.rs":"325dc1e42eb8fb9daeb7a8a5e7f967fdee745a7a7c5e26c20dec0b6c66109ad7","src/hir/print.rs":"ab45ccdb61e32561e246cb564414cd9d0477900bd07b0fba13ef02db8973d8b3","src/hir/translate.rs":"4c595d2faee09aecfdafe5871e7b5b698159d846e3262cf694e6e0a59e8e6a5f","src/hir/visitor.rs":"e5bf7f8c09f6155e59c9d676fe25437f7e3700f9bf5d91101d7e246a64c11d5a","src/lib.rs":"0fc94332a971691862ca17531881302b10ef6fa4aba65c123f0b69ffb14b989a","src/parser.rs":"e45755fcdcc8e5c40c4ecfab34962652fe46ad4f23d445f90885c3c36969c8f7","src/unicode.rs":"3b486b36e2ffcae306cb6d7387a82069163c7269597ff2b50589a05462464c36","src/unicode_tables/LICENSE-UNICODE":"74db5baf44a41b1000312c673544b3374e4198af5605c7f9080a402cec42cfa3","src/unicode_tables/age.rs":"b0932a020d3386478dd2f4839c59e30c525e8591735052b9e791e1ce3a2e2b72","src/unicode_tables/case_folding_simple.rs":"6d1f3d095132639228faf4806d05308c70ce2baa68cce69dca01ea159c4eaa15","src/unicode_tables/general_category.rs":"d21877600d387b8a0c5fbb0942458d0330c69aad6565e28134b8a1a371d2f4f4","src/unicode_tables/grapheme_cluster_break.rs":"f03a8be4a00265b568ca2a41d99f66a5d0f5fb423cb4113494153423a1123cda","src/unicode_tables/mod.rs":"26c837099cd934c8062e24bc9a0aaecf15fe1de03f9c6da3f3e1e5ac3ca24bee","src/unicode_tables/perl_decimal.rs":"e39a5934b504eb3282ccb26bbf50ecd764e720120eb7cf6c43662a2321665ab5","src/unicode_tables/perl_space.rs":"014e5d92b66730557e408c2d5c9b2f46d3d288aa85400ab9193c218c7b98ad21","src/unicode_tables/perl_word.rs":"ddf126f39171776ef83151d7a0dbc41da8dd09186723211fb966c4b304247a5e","src/unicode_tables/property_bool.rs":"21f72bd9f3955e3443549ef6609418817ae6df3c81fb5be90a0ceee9d7d3002d","src/unicode_tables/property_names.rs":"504ea44604cd15a7e827a89066bb81a847dd5c57cef360d9f4a914cf22afcf36","src/unicode_tables/property_values.rs":"4d793ad1b664c1913db146897c8eb4fa29d181b821f096de90dc889b738edb88","src/unicode_tables/script.rs":"5a7d2a958b93056081b8b2eb87c3a5609579ad791ad5b0c42959362ce6ea5b31","src/unicode_tables/script_extension.rs":"1d5f1985f7dcae833e78c3858231666b535bf60e032cfacc09d014c22bda6690","src/unicode_tables/sentence_break.rs":"cd5f0eb7ab6b0ec1c1fb4d78496dfecd691d0d0b76495538b9f376645a753deb","src/unicode_tables/word_break.rs":"eabeacfde7558cfe7b1556b0221f09c65f049de0b08c7cd464c1669040610a6b","src/utf8.rs":"f145b2cb0324e6a39260db685fdf2d88675dead54c5b808fb1b7f73a4b530d66","test":"8a9bd1bd9fb389e08288f951319a9bbb0d4c5284a2ba63cbdab7f6afa2c2f76e"},"package":"f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"}

3
third_party/rust/regex-syntax/Cargo.toml поставляемый
Просмотреть файл

@ -11,8 +11,9 @@
# will likely look very different (and much more reasonable)
[package]
edition = "2018"
name = "regex-syntax"
version = "0.6.12"
version = "0.6.25"
authors = ["The Rust Project Developers"]
description = "A regular expression parser."
homepage = "https://github.com/rust-lang/regex"

Просмотреть файл

@ -1,6 +1,5 @@
#![feature(test)]
extern crate regex_syntax;
extern crate test;
use regex_syntax::Parser;

25
third_party/rust/regex-syntax/src/ast/mod.rs поставляемый
Просмотреть файл

@ -6,7 +6,7 @@ use std::cmp::Ordering;
use std::error;
use std::fmt;
pub use ast::visitor::{visit, Visitor};
pub use crate::ast::visitor::{visit, Visitor};
pub mod parse;
pub mod print;
@ -156,6 +156,9 @@ pub enum ErrorKind {
/// `(?i)*`. It is, however, possible to create a repetition operating on
/// an empty sub-expression. For example, `()*` is still considered valid.
RepetitionMissing,
/// The Unicode class is not valid. This typically occurs when a `\p` is
/// followed by something other than a `{`.
UnicodeClassInvalid,
/// When octal support is disabled, this error is produced when an octal
/// escape is used. The octal escape is assumed to be an invocation of
/// a backreference, which is the common case.
@ -176,6 +179,8 @@ pub enum ErrorKind {
}
impl error::Error for Error {
// TODO: Remove this method entirely on the next breaking semver release.
#[allow(deprecated)]
fn description(&self) -> &str {
use self::ErrorKind::*;
match self.kind {
@ -206,6 +211,7 @@ impl error::Error for Error {
RepetitionCountInvalid => "invalid repetition count range",
RepetitionCountUnclosed => "unclosed counted repetition",
RepetitionMissing => "repetition operator missing expression",
UnicodeClassInvalid => "invalid Unicode character class",
UnsupportedBackreference => "backreferences are not supported",
UnsupportedLookAround => "look-around is not supported",
_ => unreachable!(),
@ -214,13 +220,13 @@ impl error::Error for Error {
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
::error::Formatter::from(self).fmt(f)
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
crate::error::Formatter::from(self).fmt(f)
}
}
impl fmt::Display for ErrorKind {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use self::ErrorKind::*;
match *self {
CaptureLimitExceeded => write!(
@ -293,6 +299,9 @@ impl fmt::Display for ErrorKind {
RepetitionMissing => {
write!(f, "repetition operator missing expression")
}
UnicodeClassInvalid => {
write!(f, "invalid Unicode character class")
}
UnsupportedBackreference => {
write!(f, "backreferences are not supported")
}
@ -319,7 +328,7 @@ pub struct Span {
}
impl fmt::Debug for Span {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Span({:?}, {:?})", self.start, self.end)
}
}
@ -352,7 +361,7 @@ pub struct Position {
}
impl fmt::Debug for Position {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"Position(o: {:?}, l: {:?}, c: {:?})",
@ -533,8 +542,8 @@ impl Ast {
/// This implementation uses constant stack space and heap space proportional
/// to the size of the `Ast`.
impl fmt::Display for Ast {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use ast::print::Printer;
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use crate::ast::print::Printer;
Printer::new().print(self, f)
}
}

Просмотреть файл

@ -7,10 +7,10 @@ use std::cell::{Cell, RefCell};
use std::mem;
use std::result;
use ast::{self, Ast, Position, Span};
use either::Either;
use crate::ast::{self, Ast, Position, Span};
use crate::either::Either;
use is_meta_character;
use crate::is_meta_character;
type Result<T> = result::Result<T, ast::Error>;
@ -58,10 +58,10 @@ impl Primitive {
/// then return an error.
fn into_class_set_item<P: Borrow<Parser>>(
self,
p: &ParserI<P>,
p: &ParserI<'_, P>,
) -> Result<ast::ClassSetItem> {
use self::Primitive::*;
use ast::ClassSetItem;
use crate::ast::ClassSetItem;
match self {
Literal(lit) => Ok(ClassSetItem::Literal(lit)),
@ -79,7 +79,7 @@ impl Primitive {
/// dot), then return an error.
fn into_class_literal<P: Borrow<Parser>>(
self,
p: &ParserI<P>,
p: &ParserI<'_, P>,
) -> Result<ast::Literal> {
use self::Primitive::*;
@ -98,12 +98,13 @@ fn is_hex(c: char) -> bool {
/// Returns true if the given character is a valid in a capture group name.
///
/// If `first` is true, then `c` is treated as the first character in the
/// group name (which is not allowed to be a digit).
/// group name (which must be alphabetic or underscore).
fn is_capture_char(c: char, first: bool) -> bool {
c == '_'
|| (!first && c >= '0' && c <= '9')
|| (c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| (!first
&& (('0' <= c && c <= '9') || c == '.' || c == '[' || c == ']'))
|| ('A' <= c && c <= 'Z')
|| ('a' <= c && c <= 'z')
}
/// A builder for a regular expression parser.
@ -2095,6 +2096,12 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
} else {
let start = self.pos();
let c = self.char();
if c == '\\' {
return Err(self.error(
self.span_char(),
ast::ErrorKind::UnicodeClassInvalid,
));
}
self.bump_and_bump_space();
let kind = ast::ClassUnicodeKind::OneLetter(c);
(start, kind)
@ -2130,7 +2137,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
/// A type that traverses a fully parsed Ast and checks whether its depth
/// exceeds the specified nesting limit. If it does, then an error is returned.
#[derive(Debug)]
struct NestLimiter<'p, 's: 'p, P: 'p + 's> {
struct NestLimiter<'p, 's, P> {
/// The parser that is checking the nest limit.
p: &'p ParserI<'s, P>,
/// The current depth while walking an Ast.
@ -2305,7 +2312,7 @@ mod tests {
use std::ops::Range;
use super::{Parser, ParserBuilder, ParserI, Primitive};
use ast::{self, Ast, Position, Span};
use crate::ast::{self, Ast, Position, Span};
// Our own assert_eq, which has slightly better formatting (but honestly
// still kind of crappy).
@ -2350,21 +2357,24 @@ mod tests {
str.to_string()
}
fn parser(pattern: &str) -> ParserI<Parser> {
fn parser(pattern: &str) -> ParserI<'_, Parser> {
ParserI::new(Parser::new(), pattern)
}
fn parser_octal(pattern: &str) -> ParserI<Parser> {
fn parser_octal(pattern: &str) -> ParserI<'_, Parser> {
let parser = ParserBuilder::new().octal(true).build();
ParserI::new(parser, pattern)
}
fn parser_nest_limit(pattern: &str, nest_limit: u32) -> ParserI<Parser> {
fn parser_nest_limit(
pattern: &str,
nest_limit: u32,
) -> ParserI<'_, Parser> {
let p = ParserBuilder::new().nest_limit(nest_limit).build();
ParserI::new(p, pattern)
}
fn parser_ignore_whitespace(pattern: &str) -> ParserI<Parser> {
fn parser_ignore_whitespace(pattern: &str) -> ParserI<'_, Parser> {
let p = ParserBuilder::new().ignore_whitespace(true).build();
ParserI::new(p, pattern)
}
@ -3845,6 +3855,45 @@ bar
}))
);
assert_eq!(
parser("(?P<a_1>z)").parse(),
Ok(Ast::Group(ast::Group {
span: span(0..10),
kind: ast::GroupKind::CaptureName(ast::CaptureName {
span: span(4..7),
name: s("a_1"),
index: 1,
}),
ast: Box::new(lit('z', 8)),
}))
);
assert_eq!(
parser("(?P<a.1>z)").parse(),
Ok(Ast::Group(ast::Group {
span: span(0..10),
kind: ast::GroupKind::CaptureName(ast::CaptureName {
span: span(4..7),
name: s("a.1"),
index: 1,
}),
ast: Box::new(lit('z', 8)),
}))
);
assert_eq!(
parser("(?P<a[1]>z)").parse(),
Ok(Ast::Group(ast::Group {
span: span(0..11),
kind: ast::GroupKind::CaptureName(ast::CaptureName {
span: span(4..8),
name: s("a[1]"),
index: 1,
}),
ast: Box::new(lit('z', 9)),
}))
);
assert_eq!(
parser("(?P<").parse().unwrap_err(),
TestError {
@ -5713,6 +5762,20 @@ bar
],
}))
);
assert_eq!(
parser(r"\p\{").parse().unwrap_err(),
TestError {
span: span(2..3),
kind: ast::ErrorKind::UnicodeClassInvalid,
}
);
assert_eq!(
parser(r"\P\{").parse().unwrap_err(),
TestError {
span: span(2..3),
kind: ast::ErrorKind::UnicodeClassInvalid,
}
);
}
#[test]

Просмотреть файл

@ -4,8 +4,8 @@ This module provides a regular expression printer for `Ast`.
use std::fmt;
use ast::visitor::{self, Visitor};
use ast::{self, Ast};
use crate::ast::visitor::{self, Visitor};
use crate::ast::{self, Ast};
/// A builder for constructing a printer.
///
@ -86,7 +86,7 @@ impl<'p, W: fmt::Write> Visitor for Writer<'p, W> {
}
fn visit_post(&mut self, ast: &Ast) -> fmt::Result {
use ast::Class;
use crate::ast::Class;
match *ast {
Ast::Empty(_) => Ok(()),
@ -126,7 +126,7 @@ impl<'p, W: fmt::Write> Visitor for Writer<'p, W> {
&mut self,
ast: &ast::ClassSetItem,
) -> Result<(), Self::Err> {
use ast::ClassSetItem::*;
use crate::ast::ClassSetItem::*;
match *ast {
Empty(_) => Ok(()),
@ -155,7 +155,7 @@ impl<'p, W: fmt::Write> Visitor for Writer<'p, W> {
impl<'p, W: fmt::Write> Writer<'p, W> {
fn fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result {
use ast::GroupKind::*;
use crate::ast::GroupKind::*;
match ast.kind {
CaptureIndex(_) => self.wtr.write_str("("),
CaptureName(ref x) => {
@ -178,7 +178,7 @@ impl<'p, W: fmt::Write> Writer<'p, W> {
}
fn fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result {
use ast::RepetitionKind::*;
use crate::ast::RepetitionKind::*;
match ast.op.kind {
ZeroOrOne if ast.greedy => self.wtr.write_str("?"),
ZeroOrOne => self.wtr.write_str("??"),
@ -200,7 +200,7 @@ impl<'p, W: fmt::Write> Writer<'p, W> {
&mut self,
ast: &ast::RepetitionRange,
) -> fmt::Result {
use ast::RepetitionRange::*;
use crate::ast::RepetitionRange::*;
match *ast {
Exactly(x) => write!(self.wtr, "{{{}}}", x),
AtLeast(x) => write!(self.wtr, "{{{},}}", x),
@ -209,7 +209,7 @@ impl<'p, W: fmt::Write> Writer<'p, W> {
}
fn fmt_literal(&mut self, ast: &ast::Literal) -> fmt::Result {
use ast::LiteralKind::*;
use crate::ast::LiteralKind::*;
match ast.kind {
Verbatim => self.wtr.write_char(ast.c),
@ -256,7 +256,7 @@ impl<'p, W: fmt::Write> Writer<'p, W> {
}
fn fmt_assertion(&mut self, ast: &ast::Assertion) -> fmt::Result {
use ast::AssertionKind::*;
use crate::ast::AssertionKind::*;
match ast.kind {
StartLine => self.wtr.write_str("^"),
EndLine => self.wtr.write_str("$"),
@ -275,7 +275,7 @@ impl<'p, W: fmt::Write> Writer<'p, W> {
}
fn fmt_flags(&mut self, ast: &ast::Flags) -> fmt::Result {
use ast::{Flag, FlagsItemKind};
use crate::ast::{Flag, FlagsItemKind};
for item in &ast.items {
match item.kind {
@ -315,7 +315,7 @@ impl<'p, W: fmt::Write> Writer<'p, W> {
&mut self,
ast: &ast::ClassSetBinaryOpKind,
) -> fmt::Result {
use ast::ClassSetBinaryOpKind::*;
use crate::ast::ClassSetBinaryOpKind::*;
match *ast {
Intersection => self.wtr.write_str("&&"),
Difference => self.wtr.write_str("--"),
@ -324,7 +324,7 @@ impl<'p, W: fmt::Write> Writer<'p, W> {
}
fn fmt_class_perl(&mut self, ast: &ast::ClassPerl) -> fmt::Result {
use ast::ClassPerlKind::*;
use crate::ast::ClassPerlKind::*;
match ast.kind {
Digit if ast.negated => self.wtr.write_str(r"\D"),
Digit => self.wtr.write_str(r"\d"),
@ -336,7 +336,7 @@ impl<'p, W: fmt::Write> Writer<'p, W> {
}
fn fmt_class_ascii(&mut self, ast: &ast::ClassAscii) -> fmt::Result {
use ast::ClassAsciiKind::*;
use crate::ast::ClassAsciiKind::*;
match ast.kind {
Alnum if ast.negated => self.wtr.write_str("[:^alnum:]"),
Alnum => self.wtr.write_str("[:alnum:]"),
@ -370,8 +370,8 @@ impl<'p, W: fmt::Write> Writer<'p, W> {
}
fn fmt_class_unicode(&mut self, ast: &ast::ClassUnicode) -> fmt::Result {
use ast::ClassUnicodeKind::*;
use ast::ClassUnicodeOpKind::*;
use crate::ast::ClassUnicodeKind::*;
use crate::ast::ClassUnicodeOpKind::*;
if ast.negated {
self.wtr.write_str(r"\P")?;
@ -397,7 +397,7 @@ impl<'p, W: fmt::Write> Writer<'p, W> {
#[cfg(test)]
mod tests {
use super::Printer;
use ast::parse::ParserBuilder;
use crate::ast::parse::ParserBuilder;
fn roundtrip(given: &str) {
roundtrip_with(|b| b, given);

Просмотреть файл

@ -1,6 +1,6 @@
use std::fmt;
use ast::{self, Ast};
use crate::ast::{self, Ast};
/// A trait for visiting an abstract syntax tree (AST) in depth first order.
///
@ -478,7 +478,7 @@ impl<'a> ClassInduct<'a> {
}
impl<'a> fmt::Debug for ClassFrame<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let x = match *self {
ClassFrame::Union { .. } => "Union",
ClassFrame::Binary { .. } => "Binary",
@ -490,7 +490,7 @@ impl<'a> fmt::Debug for ClassFrame<'a> {
}
impl<'a> fmt::Debug for ClassInduct<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let x = match *self {
ClassInduct::Item(it) => match *it {
ast::ClassSetItem::Empty(_) => "Item(Empty)",

14
third_party/rust/regex-syntax/src/error.rs поставляемый
Просмотреть файл

@ -3,8 +3,8 @@ use std::error;
use std::fmt;
use std::result;
use ast;
use hir;
use crate::ast;
use crate::hir;
/// A type alias for dealing with errors returned by this crate.
pub type Result<T> = result::Result<T, Error>;
@ -40,6 +40,8 @@ impl From<hir::Error> for Error {
}
impl error::Error for Error {
// TODO: Remove this method entirely on the next breaking semver release.
#[allow(deprecated)]
fn description(&self) -> &str {
match *self {
Error::Parse(ref x) => x.description(),
@ -50,7 +52,7 @@ impl error::Error for Error {
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {
Error::Parse(ref x) => x.fmt(f),
Error::Translate(ref x) => x.fmt(f),
@ -65,7 +67,7 @@ impl fmt::Display for Error {
/// readable format. Most of its complexity is from interspersing notational
/// markers pointing out the position where an error occurred.
#[derive(Debug)]
pub struct Formatter<'e, E: 'e> {
pub struct Formatter<'e, E> {
/// The original regex pattern in which the error occurred.
pattern: &'e str,
/// The error kind. It must impl fmt::Display.
@ -100,7 +102,7 @@ impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> {
}
impl<'e, E: fmt::Display> fmt::Display for Formatter<'e, E> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let spans = Spans::from_formatter(self);
if self.pattern.contains('\n') {
let divider = repeat_char('~', 79);
@ -284,7 +286,7 @@ fn repeat_char(c: char, count: usize) -> String {
#[cfg(test)]
mod tests {
use ast::parse::Parser;
use crate::ast::parse::Parser;
fn assert_panic_message(pattern: &str, expected_msg: &str) -> () {
let result = Parser::new().parse(pattern);

Просмотреть файл

@ -4,7 +4,7 @@ use std::fmt::Debug;
use std::slice;
use std::u8;
use unicode;
use crate::unicode;
// This module contains an *internal* implementation of interval sets.
//
@ -60,7 +60,7 @@ impl<I: Interval> IntervalSet<I> {
/// Return an iterator over all intervals in this set.
///
/// The iterator yields intervals in ascending order.
pub fn iter(&self) -> IntervalSetIter<I> {
pub fn iter(&self) -> IntervalSetIter<'_, I> {
IntervalSetIter(self.ranges.iter())
}
@ -322,7 +322,7 @@ impl<I: Interval> IntervalSet<I> {
/// An iterator over intervals.
#[derive(Debug)]
pub struct IntervalSetIter<'a, I: 'a>(slice::Iter<'a, I>);
pub struct IntervalSetIter<'a, I>(slice::Iter<'a, I>);
impl<'a, I> Iterator for IntervalSetIter<'a, I> {
type Item = &'a I;

Просмотреть файл

@ -8,7 +8,7 @@ use std::iter;
use std::mem;
use std::ops;
use hir::{self, Hir, HirKind};
use crate::hir::{self, Hir, HirKind};
/// A set of literal byte strings extracted from a regular expression.
///
@ -838,7 +838,7 @@ fn alternate_literals<F: FnMut(&Hir, &mut Literals)>(
}
impl fmt::Debug for Literals {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Literals")
.field("lits", &self.lits)
.field("limit_size", &self.limit_size)
@ -882,7 +882,7 @@ impl PartialOrd for Literal {
}
impl fmt::Debug for Literal {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.is_cut() {
write!(f, "Cut({})", escape_unicode(&self.v))
} else {
@ -977,8 +977,8 @@ mod tests {
use std::fmt;
use super::{escape_bytes, Literal, Literals};
use hir::Hir;
use ParserBuilder;
use crate::hir::Hir;
use crate::ParserBuilder;
// To make test failures easier to read.
#[derive(Debug, Eq, PartialEq)]
@ -1017,7 +1017,7 @@ mod tests {
}
impl fmt::Debug for ULiteral {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.is_cut() {
write!(f, "Cut({})", self.v)
} else {

66
third_party/rust/regex-syntax/src/hir/mod.rs поставляемый
Просмотреть файл

@ -8,12 +8,12 @@ use std::fmt;
use std::result;
use std::u8;
use ast::Span;
use hir::interval::{Interval, IntervalSet, IntervalSetIter};
use unicode;
use crate::ast::Span;
use crate::hir::interval::{Interval, IntervalSet, IntervalSetIter};
use crate::unicode;
pub use hir::visitor::{visit, Visitor};
pub use unicode::CaseFoldError;
pub use crate::hir::visitor::{visit, Visitor};
pub use crate::unicode::CaseFoldError;
mod interval;
pub mod literal;
@ -91,6 +91,8 @@ pub enum ErrorKind {
}
impl ErrorKind {
// TODO: Remove this method entirely on the next breaking semver release.
#[allow(deprecated)]
fn description(&self) -> &str {
use self::ErrorKind::*;
match *self {
@ -113,19 +115,23 @@ impl ErrorKind {
}
impl error::Error for Error {
// TODO: Remove this method entirely on the next breaking semver release.
#[allow(deprecated)]
fn description(&self) -> &str {
self.kind.description()
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
::error::Formatter::from(self).fmt(f)
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
crate::error::Formatter::from(self).fmt(f)
}
}
impl fmt::Display for ErrorKind {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// TODO: Remove this on the next breaking semver release.
#[allow(deprecated)]
f.write_str(self.description())
}
}
@ -235,8 +241,8 @@ impl Hir {
info.set_any_anchored_start(false);
info.set_any_anchored_end(false);
info.set_match_empty(true);
info.set_literal(true);
info.set_alternation_literal(true);
info.set_literal(false);
info.set_alternation_literal(false);
Hir { kind: HirKind::Empty, info: info }
}
@ -665,8 +671,8 @@ impl Hir {
/// true when this HIR expression is either itself a `Literal` or a
/// concatenation of only `Literal`s.
///
/// For example, `f` and `foo` are literals, but `f+`, `(foo)`, `foo()`
/// are not (even though that contain sub-expressions that are literals).
/// For example, `f` and `foo` are literals, but `f+`, `(foo)`, `foo()`,
/// `` are not (even though that contain sub-expressions that are literals).
pub fn is_literal(&self) -> bool {
self.info.is_literal()
}
@ -676,8 +682,8 @@ impl Hir {
/// true when this HIR expression is either itself a `Literal` or a
/// concatenation of only `Literal`s or an alternation of only `Literal`s.
///
/// For example, `f`, `foo`, `a|b|c`, and `foo|bar|baz` are alternaiton
/// literals, but `f+`, `(foo)`, `foo()`
/// For example, `f`, `foo`, `a|b|c`, and `foo|bar|baz` are alternation
/// literals, but `f+`, `(foo)`, `foo()`, ``
/// are not (even though that contain sub-expressions that are literals).
pub fn is_alternation_literal(&self) -> bool {
self.info.is_alternation_literal()
@ -721,8 +727,8 @@ impl HirKind {
/// This implementation uses constant stack space and heap space proportional
/// to the size of the `Hir`.
impl fmt::Display for Hir {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use hir::print::Printer;
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use crate::hir::print::Printer;
Printer::new().print(self, f)
}
}
@ -853,7 +859,7 @@ impl ClassUnicode {
/// Return an iterator over all ranges in this class.
///
/// The iterator yields ranges in ascending order.
pub fn iter(&self) -> ClassUnicodeIter {
pub fn iter(&self) -> ClassUnicodeIter<'_> {
ClassUnicodeIter(self.set.iter())
}
@ -886,14 +892,11 @@ impl ClassUnicode {
/// this class consists of the range `a-z`, then applying case folding will
/// result in the class containing both the ranges `a-z` and `A-Z`.
///
/// # Panics
/// # Error
///
/// This routine panics when the case mapping data necessary for this
/// routine to complete is unavailable. This occurs when the `unicode-case`
/// feature is not enabled.
///
/// Callers should prefer using `try_case_fold_simple` instead, which will
/// return an error instead of panicking.
/// This routine returns an error when the case mapping data necessary
/// for this routine to complete is unavailable. This occurs when the
/// `unicode-case` feature is not enabled.
pub fn try_case_fold_simple(
&mut self,
) -> result::Result<(), CaseFoldError> {
@ -935,6 +938,13 @@ impl ClassUnicode {
pub fn symmetric_difference(&mut self, other: &ClassUnicode) {
self.set.symmetric_difference(&other.set);
}
/// Returns true if and only if this character class will either match
/// nothing or only ASCII bytes. Stated differently, this returns false
/// if and only if this class contains a non-ASCII codepoint.
pub fn is_all_ascii(&self) -> bool {
self.set.intervals().last().map_or(true, |r| r.end <= '\x7F')
}
}
/// An iterator over all ranges in a Unicode character class.
@ -962,7 +972,7 @@ pub struct ClassUnicodeRange {
}
impl fmt::Debug for ClassUnicodeRange {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let start = if !self.start.is_whitespace() && !self.start.is_control()
{
self.start.to_string()
@ -1092,7 +1102,7 @@ impl ClassBytes {
/// Return an iterator over all ranges in this class.
///
/// The iterator yields ranges in ascending order.
pub fn iter(&self) -> ClassBytesIter {
pub fn iter(&self) -> ClassBytesIter<'_> {
ClassBytesIter(self.set.iter())
}
@ -1248,7 +1258,7 @@ impl ClassBytesRange {
}
impl fmt::Debug for ClassBytesRange {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut debug = f.debug_struct("ClassBytesRange");
if self.start <= 0x7F {
debug.field("start", &(self.start as char));
@ -1486,7 +1496,7 @@ macro_rules! define_bool {
self.bools &= !(1 << $bit);
}
}
}
};
}
impl HirInfo {

Просмотреть файл

@ -4,9 +4,9 @@ This module provides a regular expression printer for `Hir`.
use std::fmt;
use hir::visitor::{self, Visitor};
use hir::{self, Hir, HirKind};
use is_meta_character;
use crate::hir::visitor::{self, Visitor};
use crate::hir::{self, Hir, HirKind};
use crate::is_meta_character;
/// A builder for constructing a printer.
///
@ -239,7 +239,7 @@ impl<'p, W: fmt::Write> Writer<'p, W> {
#[cfg(test)]
mod tests {
use super::Printer;
use ParserBuilder;
use crate::ParserBuilder;
fn roundtrip(given: &str, expected: &str) {
roundtrip_with(|b| b, given, expected);

Просмотреть файл

@ -5,9 +5,9 @@ Defines a translator that converts an `Ast` to an `Hir`.
use std::cell::{Cell, RefCell};
use std::result;
use ast::{self, Ast, Span, Visitor};
use hir::{self, Error, ErrorKind, Hir};
use unicode::{self, ClassQuery};
use crate::ast::{self, Ast, Span, Visitor};
use crate::hir::{self, Error, ErrorKind, Hir};
use crate::unicode::{self, ClassQuery};
type Result<T> = result::Result<T, Error>;
@ -159,18 +159,19 @@ enum HirFrame {
/// indicated by parentheses (including non-capturing groups). It is popped
/// upon leaving a group.
Group {
/// The old active flags, if any, when this group was opened.
/// The old active flags when this group was opened.
///
/// If this group sets flags, then the new active flags are set to the
/// result of merging the old flags with the flags introduced by this
/// group.
/// group. If the group doesn't set any flags, then this is simply
/// equivalent to whatever flags were set when the group was opened.
///
/// When this group is popped, the active flags should be restored to
/// the flags set here.
///
/// The "active" flags correspond to whatever flags are set in the
/// Translator.
old_flags: Option<Flags>,
old_flags: Flags,
},
/// This is pushed whenever a concatenation is observed. After visiting
/// every sub-expression in the concatenation, the translator's stack is
@ -219,8 +220,8 @@ impl HirFrame {
/// Assert that the current stack frame is a group indicator and return
/// its corresponding flags (the flags that were active at the time the
/// group was entered) if they exist.
fn unwrap_group(self) -> Option<Flags> {
/// group was entered).
fn unwrap_group(self) -> Flags {
match self {
HirFrame::Group { old_flags } => old_flags,
_ => {
@ -252,8 +253,11 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
}
}
Ast::Group(ref x) => {
let old_flags = x.flags().map(|ast| self.set_flags(ast));
self.push(HirFrame::Group { old_flags: old_flags });
let old_flags = x
.flags()
.map(|ast| self.set_flags(ast))
.unwrap_or_else(|| self.flags());
self.push(HirFrame::Group { old_flags });
}
Ast::Concat(ref x) if x.asts.is_empty() => {}
Ast::Concat(_) => {
@ -318,7 +322,7 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
ast.negated,
&mut cls,
)?;
if cls.iter().next().is_none() {
if cls.ranges().is_empty() {
return Err(self.error(
ast.span,
ErrorKind::EmptyClassNotAllowed,
@ -333,7 +337,7 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
ast.negated,
&mut cls,
)?;
if cls.iter().next().is_none() {
if cls.ranges().is_empty() {
return Err(self.error(
ast.span,
ErrorKind::EmptyClassNotAllowed,
@ -350,9 +354,8 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
}
Ast::Group(ref x) => {
let expr = self.pop().unwrap().unwrap_expr();
if let Some(flags) = self.pop().unwrap().unwrap_group() {
self.trans().flags.set(flags);
}
let old_flags = self.pop().unwrap().unwrap_group();
self.trans().flags.set(old_flags);
self.push(HirFrame::Expr(self.hir_group(x, expr)));
}
Ast::Concat(_) => {
@ -530,7 +533,7 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
&mut self,
op: &ast::ClassSetBinaryOp,
) -> Result<()> {
use ast::ClassSetBinaryOpKind::*;
use crate::ast::ClassSetBinaryOpKind::*;
if self.flags().unicode() {
let mut rhs = self.pop().unwrap().unwrap_class_unicode();
@ -816,7 +819,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
&self,
ast_class: &ast::ClassUnicode,
) -> Result<hir::ClassUnicode> {
use ast::ClassUnicodeKind::*;
use crate::ast::ClassUnicodeKind::*;
if !self.flags().unicode() {
return Err(
@ -841,6 +844,11 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
ast_class.negated,
class,
)?;
if class.ranges().is_empty() {
let err = self
.error(ast_class.span, ErrorKind::EmptyClassNotAllowed);
return Err(err);
}
}
result
}
@ -849,7 +857,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
&self,
ast_class: &ast::ClassPerl,
) -> Result<hir::ClassUnicode> {
use ast::ClassPerlKind::*;
use crate::ast::ClassPerlKind::*;
assert!(self.flags().unicode());
let result = match ast_class.kind {
@ -871,7 +879,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
&self,
ast_class: &ast::ClassPerl,
) -> hir::ClassBytes {
use ast::ClassPerlKind::*;
use crate::ast::ClassPerlKind::*;
assert!(!self.flags().unicode());
let mut class = match ast_class.kind {
@ -1069,7 +1077,7 @@ fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
}
fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] {
use ast::ClassAsciiKind::*;
use crate::ast::ClassAsciiKind::*;
match *kind {
Alnum => &[('0', '9'), ('A', 'Z'), ('a', 'z')],
Alpha => &[('A', 'Z'), ('a', 'z')],
@ -1097,10 +1105,10 @@ fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] {
#[cfg(test)]
mod tests {
use ast::parse::ParserBuilder;
use ast::{self, Ast, Position, Span};
use hir::{self, Hir, HirKind};
use unicode::{self, ClassQuery};
use crate::ast::parse::ParserBuilder;
use crate::ast::{self, Ast, Position, Span};
use crate::hir::{self, Hir, HirKind};
use crate::unicode::{self, ClassQuery};
use super::{ascii_class, TranslatorBuilder};
@ -1248,7 +1256,7 @@ mod tests {
}
#[allow(dead_code)]
fn hir_uclass_query(query: ClassQuery) -> Hir {
fn hir_uclass_query(query: ClassQuery<'_>) -> Hir {
Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
}
@ -1307,7 +1315,7 @@ mod tests {
#[allow(dead_code)]
fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
use hir::Class::{Bytes, Unicode};
use crate::hir::Class::{Bytes, Unicode};
match (expr1.into_kind(), expr2.into_kind()) {
(HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
@ -1324,7 +1332,7 @@ mod tests {
#[allow(dead_code)]
fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
use hir::Class::{Bytes, Unicode};
use crate::hir::Class::{Bytes, Unicode};
match (expr1.into_kind(), expr2.into_kind()) {
(HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
@ -1641,6 +1649,20 @@ mod tests {
hir_lit("β"),
])
);
assert_eq!(
t("(?:(?i-u)a)b"),
hir_cat(vec![
hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
hir_lit("b"),
])
);
assert_eq!(
t("((?i-u)a)b"),
hir_cat(vec![
hir_group(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
hir_lit("b"),
])
);
#[cfg(feature = "unicode-case")]
assert_eq!(
t("(?i)(?-i:a)a"),
@ -2300,6 +2322,21 @@ mod tests {
);
}
#[test]
#[cfg(feature = "unicode-gencat")]
fn class_unicode_any_empty() {
assert_eq!(
t_err(r"\P{any}"),
TestError {
kind: hir::ErrorKind::EmptyClassNotAllowed,
span: Span::new(
Position::new(0, 1, 1),
Position::new(7, 1, 8)
),
}
);
}
#[test]
#[cfg(not(feature = "unicode-age"))]
fn class_unicode_age_disabled() {
@ -3088,13 +3125,13 @@ mod tests {
#[test]
fn analysis_is_literal() {
// Positive examples.
assert!(t(r"").is_literal());
assert!(t(r"a").is_literal());
assert!(t(r"ab").is_literal());
assert!(t(r"abc").is_literal());
assert!(t(r"(?m)abc").is_literal());
// Negative examples.
assert!(!t(r"").is_literal());
assert!(!t(r"^").is_literal());
assert!(!t(r"a|b").is_literal());
assert!(!t(r"(a)").is_literal());
@ -3107,7 +3144,6 @@ mod tests {
#[test]
fn analysis_is_alternation_literal() {
// Positive examples.
assert!(t(r"").is_alternation_literal());
assert!(t(r"a").is_alternation_literal());
assert!(t(r"ab").is_alternation_literal());
assert!(t(r"abc").is_alternation_literal());
@ -3118,6 +3154,7 @@ mod tests {
assert!(t(r"foo|bar|baz").is_alternation_literal());
// Negative examples.
assert!(!t(r"").is_alternation_literal());
assert!(!t(r"^").is_alternation_literal());
assert!(!t(r"(a)").is_alternation_literal());
assert!(!t(r"a+").is_alternation_literal());

Просмотреть файл

@ -1,4 +1,4 @@
use hir::{self, Hir, HirKind};
use crate::hir::{self, Hir, HirKind};
/// A trait for visiting the high-level IR (HIR) in depth first order.
///

16
third_party/rust/regex-syntax/src/lib.rs поставляемый
Просмотреть файл

@ -155,11 +155,12 @@ The following features are available:
*/
#![deny(missing_docs)]
#![warn(missing_debug_implementations)]
#![forbid(unsafe_code)]
pub use error::{Error, Result};
pub use parser::{Parser, ParserBuilder};
pub use unicode::UnicodeWordError;
pub use crate::error::{Error, Result};
pub use crate::parser::{Parser, ParserBuilder};
pub use crate::unicode::UnicodeWordError;
pub mod ast;
mod either;
@ -175,7 +176,7 @@ pub mod utf8;
/// The string returned may be safely used as a literal in a regular
/// expression.
pub fn escape(text: &str) -> String {
let mut quoted = String::with_capacity(text.len());
let mut quoted = String::new();
escape_into(text, &mut quoted);
quoted
}
@ -185,6 +186,7 @@ pub fn escape(text: &str) -> String {
/// This will append escape characters into the given buffer. The characters
/// that are appended are safe to use as a literal in a regular expression.
pub fn escape_into(text: &str, buf: &mut String) {
buf.reserve(text.len());
for c in text.chars() {
if is_meta_character(c) {
buf.push('\\');
@ -197,7 +199,7 @@ pub fn escape_into(text: &str, buf: &mut String) {
///
/// These are the only characters that are allowed to be escaped, with one
/// exception: an ASCII space character may be escaped when extended mode (with
/// the `x` flag) is enabld. In particular, `is_meta_character(' ')` returns
/// the `x` flag) is enabled. In particular, `is_meta_character(' ')` returns
/// `false`.
///
/// Note that the set of characters for which this function returns `true` or
@ -214,7 +216,7 @@ pub fn is_meta_character(c: char) -> bool {
/// character.
///
/// A Unicode word character is defined by
/// [UTS#18 Annex C](http://unicode.org/reports/tr18/#Compatibility_Properties).
/// [UTS#18 Annex C](https://unicode.org/reports/tr18/#Compatibility_Properties).
/// In particular, a character
/// is considered a word character if it is in either of the `Alphabetic` or
/// `Join_Control` properties, or is in one of the `Decimal_Number`, `Mark`
@ -234,7 +236,7 @@ pub fn is_word_character(c: char) -> bool {
/// character.
///
/// A Unicode word character is defined by
/// [UTS#18 Annex C](http://unicode.org/reports/tr18/#Compatibility_Properties).
/// [UTS#18 Annex C](https://unicode.org/reports/tr18/#Compatibility_Properties).
/// In particular, a character
/// is considered a word character if it is in either of the `Alphabetic` or
/// `Join_Control` properties, or is in one of the `Decimal_Number`, `Mark`

6
third_party/rust/regex-syntax/src/parser.rs поставляемый
Просмотреть файл

@ -1,7 +1,7 @@
use ast;
use hir;
use crate::ast;
use crate::hir;
use Result;
use crate::Result;
/// A builder for a regular expression parser.
///

61
third_party/rust/regex-syntax/src/unicode.rs поставляемый
Просмотреть файл

@ -2,7 +2,7 @@ use std::error;
use std::fmt;
use std::result;
use hir;
use crate::hir;
/// A type alias for errors specific to Unicode handling of classes.
pub type Result<T> = result::Result<T, Error>;
@ -38,7 +38,7 @@ pub struct CaseFoldError(());
impl error::Error for CaseFoldError {}
impl fmt::Display for CaseFoldError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"Unicode-aware case folding is not available \
@ -58,7 +58,7 @@ pub struct UnicodeWordError(());
impl error::Error for UnicodeWordError {}
impl fmt::Display for UnicodeWordError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"Unicode-aware \\w class is not available \
@ -95,7 +95,7 @@ pub fn simple_fold(
c: char,
) -> FoldResult<result::Result<impl Iterator<Item = char>, Option<char>>>
{
use unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE;
use crate::unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE;
Ok(CASE_FOLDING_SIMPLE
.binary_search_by_key(&c, |&(c1, _)| c1)
@ -130,8 +130,8 @@ pub fn contains_simple_case_mapping(
#[cfg(feature = "unicode-case")]
fn imp(start: char, end: char) -> FoldResult<bool> {
use crate::unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE;
use std::cmp::Ordering;
use unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE;
assert!(start <= end);
Ok(CASE_FOLDING_SIMPLE
@ -237,8 +237,16 @@ impl<'a> ClassQuery<'a> {
fn canonical_binary(&self, name: &str) -> Result<CanonicalClassQuery> {
let norm = symbolic_name_normalize(name);
if let Some(canon) = canonical_prop(&norm)? {
return Ok(CanonicalClassQuery::Binary(canon));
// This is a special case where 'cf' refers to the 'Format' general
// category, but where the 'cf' abbreviation is also an abbreviation
// for the 'Case_Folding' property. But we want to treat it as
// a general category. (Currently, we don't even support the
// 'Case_Folding' property. But if we do in the future, users will be
// required to spell it out.)
if norm != "cf" {
if let Some(canon) = canonical_prop(&norm)? {
return Ok(CanonicalClassQuery::Binary(canon));
}
}
if let Some(canon) = canonical_gencat(&norm)? {
return Ok(CanonicalClassQuery::GeneralCategory(canon));
@ -277,7 +285,7 @@ enum CanonicalClassQuery {
/// Looks up a Unicode class given a query. If one doesn't exist, then
/// `None` is returned.
pub fn class<'a>(query: ClassQuery<'a>) -> Result<hir::ClassUnicode> {
pub fn class(query: ClassQuery<'_>) -> Result<hir::ClassUnicode> {
use self::CanonicalClassQuery::*;
match query.canonicalize()? {
@ -322,7 +330,7 @@ pub fn perl_word() -> Result<hir::ClassUnicode> {
#[cfg(feature = "unicode-perl")]
fn imp() -> Result<hir::ClassUnicode> {
use unicode_tables::perl_word::PERL_WORD;
use crate::unicode_tables::perl_word::PERL_WORD;
Ok(hir_class(PERL_WORD))
}
@ -340,13 +348,13 @@ pub fn perl_space() -> Result<hir::ClassUnicode> {
#[cfg(all(feature = "unicode-perl", not(feature = "unicode-bool")))]
fn imp() -> Result<hir::ClassUnicode> {
use unicode_tables::perl_space::WHITE_SPACE;
use crate::unicode_tables::perl_space::WHITE_SPACE;
Ok(hir_class(WHITE_SPACE))
}
#[cfg(feature = "unicode-bool")]
fn imp() -> Result<hir::ClassUnicode> {
use unicode_tables::property_bool::WHITE_SPACE;
use crate::unicode_tables::property_bool::WHITE_SPACE;
Ok(hir_class(WHITE_SPACE))
}
@ -364,13 +372,13 @@ pub fn perl_digit() -> Result<hir::ClassUnicode> {
#[cfg(all(feature = "unicode-perl", not(feature = "unicode-gencat")))]
fn imp() -> Result<hir::ClassUnicode> {
use unicode_tables::perl_decimal::DECIMAL_NUMBER;
use crate::unicode_tables::perl_decimal::DECIMAL_NUMBER;
Ok(hir_class(DECIMAL_NUMBER))
}
#[cfg(feature = "unicode-gencat")]
fn imp() -> Result<hir::ClassUnicode> {
use unicode_tables::general_category::DECIMAL_NUMBER;
use crate::unicode_tables::general_category::DECIMAL_NUMBER;
Ok(hir_class(DECIMAL_NUMBER))
}
@ -397,9 +405,9 @@ pub fn is_word_character(c: char) -> result::Result<bool, UnicodeWordError> {
#[cfg(feature = "unicode-perl")]
fn imp(c: char) -> result::Result<bool, UnicodeWordError> {
use is_word_byte;
use crate::is_word_byte;
use crate::unicode_tables::perl_word::PERL_WORD;
use std::cmp::Ordering;
use unicode_tables::perl_word::PERL_WORD;
if c <= 0x7F as char && is_word_byte(c as u8) {
return Ok(true);
@ -474,7 +482,7 @@ fn canonical_prop(normalized_name: &str) -> Result<Option<&'static str>> {
feature = "unicode-segment",
))]
fn imp(name: &str) -> Result<Option<&'static str>> {
use unicode_tables::property_names::PROPERTY_NAMES;
use crate::unicode_tables::property_names::PROPERTY_NAMES;
Ok(PROPERTY_NAMES
.binary_search_by_key(&name, |&(n, _)| n)
@ -531,7 +539,7 @@ fn property_values(
feature = "unicode-segment",
))]
fn imp(name: &'static str) -> Result<Option<PropertyValues>> {
use unicode_tables::property_values::PROPERTY_VALUES;
use crate::unicode_tables::property_values::PROPERTY_VALUES;
Ok(PROPERTY_VALUES
.binary_search_by_key(&name, |&(n, _)| n)
@ -570,7 +578,7 @@ fn ages(canonical_age: &str) -> Result<impl Iterator<Item = Range>> {
#[cfg(feature = "unicode-age")]
fn imp(canonical_age: &str) -> Result<impl Iterator<Item = Range>> {
use unicode_tables::age;
use crate::unicode_tables::age;
const AGES: &'static [(&'static str, Range)] = &[
("V1_1", age::V1_1),
@ -595,6 +603,7 @@ fn ages(canonical_age: &str) -> Result<impl Iterator<Item = Range>> {
("V11_0", age::V11_0),
("V12_0", age::V12_0),
("V12_1", age::V12_1),
("V13_0", age::V13_0),
];
assert_eq!(AGES.len(), age::BY_NAME.len(), "ages are out of sync");
@ -622,7 +631,7 @@ fn gencat(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
#[cfg(feature = "unicode-gencat")]
fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
use unicode_tables::general_category::BY_NAME;
use crate::unicode_tables::general_category::BY_NAME;
match name {
"ASCII" => Ok(hir_class(&[('\0', '\x7F')])),
"Any" => Ok(hir_class(&[('\0', '\u{10FFFF}')])),
@ -657,7 +666,7 @@ fn script(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
#[cfg(feature = "unicode-script")]
fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
use unicode_tables::script::BY_NAME;
use crate::unicode_tables::script::BY_NAME;
property_set(BY_NAME, name)
.map(hir_class)
.ok_or(Error::PropertyValueNotFound)
@ -682,7 +691,7 @@ fn script_extension(
#[cfg(feature = "unicode-script")]
fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
use unicode_tables::script_extension::BY_NAME;
use crate::unicode_tables::script_extension::BY_NAME;
property_set(BY_NAME, name)
.map(hir_class)
.ok_or(Error::PropertyValueNotFound)
@ -706,7 +715,7 @@ fn bool_property(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
#[cfg(feature = "unicode-bool")]
fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
use unicode_tables::property_bool::BY_NAME;
use crate::unicode_tables::property_bool::BY_NAME;
property_set(BY_NAME, name)
.map(hir_class)
.ok_or(Error::PropertyNotFound)
@ -734,7 +743,7 @@ fn gcb(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
#[cfg(feature = "unicode-segment")]
fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
use unicode_tables::grapheme_cluster_break::BY_NAME;
use crate::unicode_tables::grapheme_cluster_break::BY_NAME;
property_set(BY_NAME, name)
.map(hir_class)
.ok_or(Error::PropertyValueNotFound)
@ -758,7 +767,7 @@ fn wb(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
#[cfg(feature = "unicode-segment")]
fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
use unicode_tables::word_break::BY_NAME;
use crate::unicode_tables::word_break::BY_NAME;
property_set(BY_NAME, name)
.map(hir_class)
.ok_or(Error::PropertyValueNotFound)
@ -782,7 +791,7 @@ fn sb(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
#[cfg(feature = "unicode-segment")]
fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
use unicode_tables::sentence_break::BY_NAME;
use crate::unicode_tables::sentence_break::BY_NAME;
property_set(BY_NAME, name)
.map(hir_class)
.ok_or(Error::PropertyValueNotFound)
@ -814,7 +823,7 @@ fn symbolic_name_normalize(x: &str) -> String {
/// The slice returned is guaranteed to be valid UTF-8 for all possible values
/// of `slice`.
///
/// See: http://unicode.org/reports/tr44/#UAX44-LM3
/// See: https://unicode.org/reports/tr44/#UAX44-LM3
fn symbolic_name_normalize_bytes(slice: &mut [u8]) -> &mut [u8] {
// I couldn't find a place in the standard that specified that property
// names/aliases had a particular structure (unlike character names), but

Просмотреть файл

@ -1,14 +1,17 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate age /tmp/ucd/12.1.0/ --chars
// ucd-generate age ucd-13.0.0 --chars
//
// ucd-generate is available on crates.io.
// Unicode version: 13.0.0.
//
// ucd-generate 0.2.8 is available on crates.io.
pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
("V10_0", V10_0),
("V11_0", V11_0),
("V12_0", V12_0),
("V12_1", V12_1),
("V13_0", V13_0),
("V1_1", V1_1),
("V2_0", V2_0),
("V2_1", V2_1),
@ -135,69 +138,131 @@ pub const V11_0: &'static [(char, char)] = &[
];
pub const V12_0: &'static [(char, char)] = &[
('\u{c77}', '\u{c77}'),
('\u{e86}', '\u{e86}'),
('\u{e89}', '\u{e89}'),
('\u{e8c}', '\u{e8c}'),
('\u{e8e}', '\u{e93}'),
('\u{e98}', '\u{e98}'),
('\u{ea0}', '\u{ea0}'),
('\u{ea8}', '\u{ea9}'),
('\u{eac}', '\u{eac}'),
('౷', '౷'),
('ຆ', 'ຆ'),
('ຉ', 'ຉ'),
('ຌ', 'ຌ'),
('ຎ', 'ຓ'),
('ຘ', 'ຘ'),
('ຠ', 'ຠ'),
('ຨ', 'ຩ'),
('ຬ', 'ຬ'),
('\u{eba}', '\u{eba}'),
('\u{1cfa}', '\u{1cfa}'),
('\u{2bc9}', '\u{2bc9}'),
('\u{2bff}', '\u{2bff}'),
('\u{2e4f}', '\u{2e4f}'),
('\u{a7ba}', '\u{a7bf}'),
('\u{a7c2}', '\u{a7c6}'),
('\u{ab66}', '\u{ab67}'),
('\u{10fe0}', '\u{10ff6}'),
('\u{1145f}', '\u{1145f}'),
('\u{116b8}', '\u{116b8}'),
('\u{119a0}', '\u{119a7}'),
('\u{119aa}', '\u{119d7}'),
('\u{119da}', '\u{119e4}'),
('\u{11a84}', '\u{11a85}'),
('\u{11fc0}', '\u{11ff1}'),
('\u{11fff}', '\u{11fff}'),
('ᳺ', 'ᳺ'),
('⯉', '⯉'),
('⯿', '⯿'),
('⹏', '⹏'),
('Ꞻ', 'ꞿ'),
('Ꟃ', 'Ᶎ'),
('ꭦ', 'ꭧ'),
('𐿠', '𐿶'),
('𑑟', '𑑟'),
('𑚸', '𑚸'),
('𑦠', '𑦧'),
('𑦪', '\u{119d7}'),
('\u{119da}', '𑧤'),
('𑪄', '𑪅'),
('𑿀', '𑿱'),
('𑿿', '𑿿'),
('\u{13430}', '\u{13438}'),
('\u{16f45}', '\u{16f4a}'),
('𖽅', '𖽊'),
('\u{16f4f}', '\u{16f4f}'),
('\u{16f7f}', '\u{16f87}'),
('\u{16fe2}', '\u{16fe3}'),
('\u{187f2}', '\u{187f7}'),
('\u{1b150}', '\u{1b152}'),
('\u{1b164}', '\u{1b167}'),
('\u{1e100}', '\u{1e12c}'),
('\u{1e130}', '\u{1e13d}'),
('\u{1e140}', '\u{1e149}'),
('\u{1e14e}', '\u{1e14f}'),
('\u{1e2c0}', '\u{1e2f9}'),
('\u{1e2ff}', '\u{1e2ff}'),
('\u{1e94b}', '\u{1e94b}'),
('\u{1ed01}', '\u{1ed3d}'),
('\u{1f16c}', '\u{1f16c}'),
('\u{1f6d5}', '\u{1f6d5}'),
('\u{1f6fa}', '\u{1f6fa}'),
('\u{1f7e0}', '\u{1f7eb}'),
('\u{1f90d}', '\u{1f90f}'),
('\u{1f93f}', '\u{1f93f}'),
('\u{1f971}', '\u{1f971}'),
('\u{1f97b}', '\u{1f97b}'),
('\u{1f9a5}', '\u{1f9aa}'),
('\u{1f9ae}', '\u{1f9af}'),
('\u{1f9ba}', '\u{1f9bf}'),
('\u{1f9c3}', '\u{1f9ca}'),
('\u{1f9cd}', '\u{1f9cf}'),
('\u{1fa00}', '\u{1fa53}'),
('\u{1fa70}', '\u{1fa73}'),
('\u{1fa78}', '\u{1fa7a}'),
('\u{1fa80}', '\u{1fa82}'),
('\u{1fa90}', '\u{1fa95}'),
('𖽿', '𖾇'),
('𖿢', '𖿣'),
('𘟲', '𘟷'),
('𛅐', '𛅒'),
('𛅤', '𛅧'),
('𞄀', '𞄬'),
('\u{1e130}', '𞄽'),
('𞅀', '𞅉'),
('𞅎', '𞅏'),
('𞋀', '𞋹'),
('𞋿', '𞋿'),
('𞥋', '𞥋'),
('𞴁', '𞴽'),
('🅬', '🅬'),
('🛕', '🛕'),
('🛺', '🛺'),
('🟠', '🟫'),
('🤍', '🤏'),
('🤿', '🤿'),
('🥱', '🥱'),
('🥻', '🥻'),
('🦥', '🦪'),
('🦮', '🦯'),
('🦺', '🦿'),
('🧃', '🧊'),
('🧍', '🧏'),
('🨀', '🩓'),
('🩰', '🩳'),
('🩸', '🩺'),
('🪀', '🪂'),
('🪐', '🪕'),
];
pub const V12_1: &'static [(char, char)] = &[('\u{32ff}', '\u{32ff}')];
pub const V12_1: &'static [(char, char)] = &[('㋿', '㋿')];
pub const V13_0: &'static [(char, char)] = &[
('\u{8be}', '\u{8c7}'),
('\u{b55}', '\u{b55}'),
('\u{d04}', '\u{d04}'),
('\u{d81}', '\u{d81}'),
('\u{1abf}', '\u{1ac0}'),
('\u{2b97}', '\u{2b97}'),
('\u{2e50}', '\u{2e52}'),
('\u{31bb}', '\u{31bf}'),
('\u{4db6}', '\u{4dbf}'),
('\u{9ff0}', '\u{9ffc}'),
('\u{a7c7}', '\u{a7ca}'),
('\u{a7f5}', '\u{a7f6}'),
('\u{a82c}', '\u{a82c}'),
('\u{ab68}', '\u{ab6b}'),
('\u{1019c}', '\u{1019c}'),
('\u{10e80}', '\u{10ea9}'),
('\u{10eab}', '\u{10ead}'),
('\u{10eb0}', '\u{10eb1}'),
('\u{10fb0}', '\u{10fcb}'),
('\u{11147}', '\u{11147}'),
('\u{111ce}', '\u{111cf}'),
('\u{1145a}', '\u{1145a}'),
('\u{11460}', '\u{11461}'),
('\u{11900}', '\u{11906}'),
('\u{11909}', '\u{11909}'),
('\u{1190c}', '\u{11913}'),
('\u{11915}', '\u{11916}'),
('\u{11918}', '\u{11935}'),
('\u{11937}', '\u{11938}'),
('\u{1193b}', '\u{11946}'),
('\u{11950}', '\u{11959}'),
('\u{11fb0}', '\u{11fb0}'),
('\u{16fe4}', '\u{16fe4}'),
('\u{16ff0}', '\u{16ff1}'),
('\u{18af3}', '\u{18cd5}'),
('\u{18d00}', '\u{18d08}'),
('\u{1f10d}', '\u{1f10f}'),
('\u{1f16d}', '\u{1f16f}'),
('\u{1f1ad}', '\u{1f1ad}'),
('\u{1f6d6}', '\u{1f6d7}'),
('\u{1f6fb}', '\u{1f6fc}'),
('\u{1f8b0}', '\u{1f8b1}'),
('\u{1f90c}', '\u{1f90c}'),
('\u{1f972}', '\u{1f972}'),
('\u{1f977}', '\u{1f978}'),
('\u{1f9a3}', '\u{1f9a4}'),
('\u{1f9ab}', '\u{1f9ad}'),
('\u{1f9cb}', '\u{1f9cb}'),
('\u{1fa74}', '\u{1fa74}'),
('\u{1fa83}', '\u{1fa86}'),
('\u{1fa96}', '\u{1faa8}'),
('\u{1fab0}', '\u{1fab6}'),
('\u{1fac0}', '\u{1fac2}'),
('\u{1fad0}', '\u{1fad6}'),
('\u{1fb00}', '\u{1fb92}'),
('\u{1fb94}', '\u{1fbca}'),
('\u{1fbf0}', '\u{1fbf9}'),
('\u{2a6d7}', '\u{2a6dd}'),
('\u{30000}', '\u{3134a}'),
];
pub const V1_1: &'static [(char, char)] = &[
('\u{0}', 'ǵ'),

Просмотреть файл

@ -1,8 +1,10 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate case-folding-simple /tmp/ucd/12.1.0/ --chars --all-pairs
// ucd-generate case-folding-simple ucd-13.0.0 --chars --all-pairs
//
// ucd-generate is available on crates.io.
// Unicode version: 13.0.0.
//
// ucd-generate 0.2.8 is available on crates.io.
pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[
('A', &['a']),
@ -459,7 +461,7 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[
('ɵ', &['Ɵ']),
('ɽ', &['Ɽ']),
('ʀ', &['Ʀ']),
('ʂ', &['\u{a7c5}']),
('ʂ', &['']),
('ʃ', &['Ʃ']),
('ʇ', &['Ʇ']),
('ʈ', &['Ʈ']),
@ -1199,7 +1201,7 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[
('Ჿ', &['']),
('ᵹ', &['Ᵹ']),
('ᵽ', &['Ᵽ']),
('ᶎ', &['\u{a7c6}']),
('ᶎ', &['']),
('Ḁ', &['ḁ']),
('ḁ', &['Ḁ']),
('Ḃ', &['ḃ']),
@ -2167,7 +2169,7 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[
('ꞑ', &['Ꞑ']),
('Ꞓ', &['ꞓ']),
('ꞓ', &['Ꞓ']),
('ꞔ', &['\u{a7c4}']),
('ꞔ', &['']),
('Ꞗ', &['ꞗ']),
('ꞗ', &['Ꞗ']),
('', &['']),
@ -2203,17 +2205,23 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[
('ꞷ', &['Ꞷ']),
('Ꞹ', &['ꞹ']),
('ꞹ', &['Ꞹ']),
('\u{a7ba}', &['\u{a7bb}']),
('\u{a7bb}', &['\u{a7ba}']),
('\u{a7bc}', &['\u{a7bd}']),
('\u{a7bd}', &['\u{a7bc}']),
('\u{a7be}', &['\u{a7bf}']),
('\u{a7bf}', &['\u{a7be}']),
('\u{a7c2}', &['\u{a7c3}']),
('\u{a7c3}', &['\u{a7c2}']),
('\u{a7c4}', &['ꞔ']),
('\u{a7c5}', &['ʂ']),
('\u{a7c6}', &['ᶎ']),
('Ꞻ', &['ꞻ']),
('ꞻ', &['Ꞻ']),
('Ꞽ', &['ꞽ']),
('ꞽ', &['Ꞽ']),
('Ꞿ', &['ꞿ']),
('ꞿ', &['Ꞿ']),
('Ꟃ', &['ꟃ']),
('ꟃ', &['Ꟃ']),
('Ꞔ', &['ꞔ']),
('Ʂ', &['ʂ']),
('Ᶎ', &['ᶎ']),
('\u{a7c7}', &['\u{a7c8}']),
('\u{a7c8}', &['\u{a7c7}']),
('\u{a7c9}', &['\u{a7ca}']),
('\u{a7ca}', &['\u{a7c9}']),
('\u{a7f5}', &['\u{a7f6}']),
('\u{a7f6}', &['\u{a7f5}']),
('ꭓ', &['']),
('ꭰ', &['']),
('ꭱ', &['']),

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,8 +1,10 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate grapheme-cluster-break /tmp/ucd/12.1.0/ --chars
// ucd-generate grapheme-cluster-break ucd-13.0.0 --chars
//
// ucd-generate is available on crates.io.
// Unicode version: 13.0.0.
//
// ucd-generate 0.2.8 is available on crates.io.
pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
("CR", CR),
@ -105,7 +107,7 @@ pub const EXTEND: &'static [(char, char)] = &[
('\u{b3e}', '\u{b3f}'),
('\u{b41}', '\u{b44}'),
('\u{b4d}', '\u{b4d}'),
('\u{b56}', '\u{b57}'),
('\u{b55}', '\u{b57}'),
('\u{b62}', '\u{b63}'),
('\u{b82}', '\u{b82}'),
('\u{bbe}', '\u{bbe}'),
@ -134,6 +136,7 @@ pub const EXTEND: &'static [(char, char)] = &[
('\u{d4d}', '\u{d4d}'),
('\u{d57}', '\u{d57}'),
('\u{d62}', '\u{d63}'),
('\u{d81}', '\u{d81}'),
('\u{dca}', '\u{dca}'),
('\u{dcf}', '\u{dcf}'),
('\u{dd2}', '\u{dd4}'),
@ -192,7 +195,7 @@ pub const EXTEND: &'static [(char, char)] = &[
('\u{1a65}', '\u{1a6c}'),
('\u{1a73}', '\u{1a7c}'),
('\u{1a7f}', '\u{1a7f}'),
('\u{1ab0}', '\u{1abe}'),
('\u{1ab0}', '\u{1ac0}'),
('\u{1b00}', '\u{1b03}'),
('\u{1b34}', '\u{1b3a}'),
('\u{1b3c}', '\u{1b3c}'),
@ -231,6 +234,7 @@ pub const EXTEND: &'static [(char, char)] = &[
('\u{a806}', '\u{a806}'),
('\u{a80b}', '\u{a80b}'),
('\u{a825}', '\u{a826}'),
('\u{a82c}', '\u{a82c}'),
('\u{a8c4}', '\u{a8c5}'),
('\u{a8e0}', '\u{a8f1}'),
('\u{a8ff}', '\u{a8ff}'),
@ -239,7 +243,7 @@ pub const EXTEND: &'static [(char, char)] = &[
('\u{a980}', '\u{a982}'),
('\u{a9b3}', '\u{a9b3}'),
('\u{a9b6}', '\u{a9b9}'),
('\u{a9bc}', ''),
('\u{a9bc}', '\u{a9bd}'),
('\u{a9e5}', '\u{a9e5}'),
('\u{aa29}', '\u{aa2e}'),
('\u{aa31}', '\u{aa32}'),
@ -271,6 +275,7 @@ pub const EXTEND: &'static [(char, char)] = &[
('\u{10a3f}', '\u{10a3f}'),
('\u{10ae5}', '\u{10ae6}'),
('\u{10d24}', '\u{10d27}'),
('\u{10eab}', '\u{10eac}'),
('\u{10f46}', '\u{10f50}'),
('\u{11001}', '\u{11001}'),
('\u{11038}', '\u{11046}'),
@ -284,6 +289,7 @@ pub const EXTEND: &'static [(char, char)] = &[
('\u{11180}', '\u{11181}'),
('\u{111b6}', '\u{111be}'),
('\u{111c9}', '\u{111cc}'),
('\u{111cf}', '\u{111cf}'),
('\u{1122f}', '\u{11231}'),
('\u{11234}', '\u{11234}'),
('\u{11236}', '\u{11237}'),
@ -324,6 +330,10 @@ pub const EXTEND: &'static [(char, char)] = &[
('\u{11727}', '\u{1172b}'),
('\u{1182f}', '\u{11837}'),
('\u{11839}', '\u{1183a}'),
('\u{11930}', '\u{11930}'),
('\u{1193b}', '\u{1193c}'),
('\u{1193e}', '\u{1193e}'),
('\u{11943}', '\u{11943}'),
('\u{119d4}', '\u{119d7}'),
('\u{119da}', '\u{119db}'),
('\u{119e0}', '\u{119e0}'),
@ -355,6 +365,7 @@ pub const EXTEND: &'static [(char, char)] = &[
('\u{16b30}', '\u{16b36}'),
('\u{16f4f}', '\u{16f4f}'),
('\u{16f8f}', '\u{16f92}'),
('\u{16fe4}', '\u{16fe4}'),
('\u{1bc9d}', '\u{1bc9e}'),
('\u{1d165}', '\u{1d165}'),
('\u{1d167}', '\u{1d169}'),
@ -1200,8 +1211,10 @@ pub const PREPEND: &'static [(char, char)] = &[
('\u{110bd}', '\u{110bd}'),
('\u{110cd}', '\u{110cd}'),
('𑇂', '𑇃'),
('\u{1193f}', '\u{1193f}'),
('\u{11941}', '\u{11941}'),
('𑨺', '𑨺'),
('\u{11a84}', '𑪉'),
('𑪄', '𑪉'),
('𑵆', '𑵆'),
];
@ -1311,6 +1324,7 @@ pub const SPACINGMARK: &'static [(char, char)] = &[
('𑆂', '𑆂'),
('𑆳', '𑆵'),
('𑆿', '𑇀'),
('\u{111ce}', '\u{111ce}'),
('𑈬', '𑈮'),
('𑈲', '𑈳'),
('𑈵', '𑈵'),
@ -1342,9 +1356,14 @@ pub const SPACINGMARK: &'static [(char, char)] = &[
('𑜦', '𑜦'),
('𑠬', '𑠮'),
('𑠸', '𑠸'),
('\u{119d1}', '\u{119d3}'),
('\u{119dc}', '\u{119df}'),
('\u{119e4}', '\u{119e4}'),
('\u{11931}', '\u{11935}'),
('\u{11937}', '\u{11938}'),
('\u{1193d}', '\u{1193d}'),
('\u{11940}', '\u{11940}'),
('\u{11942}', '\u{11942}'),
('𑧑', '𑧓'),
('𑧜', '𑧟'),
('𑧤', '𑧤'),
('𑨹', '𑨹'),
('𑩗', '𑩘'),
('𑪗', '𑪗'),
@ -1357,7 +1376,8 @@ pub const SPACINGMARK: &'static [(char, char)] = &[
('𑶓', '𑶔'),
('𑶖', '𑶖'),
('𑻵', '𑻶'),
('𖽑', '\u{16f87}'),
('𖽑', '𖾇'),
('\u{16ff0}', '\u{16ff1}'),
('𝅦', '𝅦'),
('𝅭', '𝅭'),
];

Просмотреть файл

@ -1,8 +1,10 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate general-category /tmp/ucd/12.1.0/ --chars --include decimalnumber
// ucd-generate general-category ucd-13.0.0 --chars --include decimalnumber
//
// ucd-generate is available on crates.io.
// Unicode version: 13.0.0.
//
// ucd-generate 0.2.8 is available on crates.io.
pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] =
&[("Decimal_Number", DECIMAL_NUMBER)];
@ -58,13 +60,15 @@ pub const DECIMAL_NUMBER: &'static [(char, char)] = &[
('𑛀', '𑛉'),
('𑜰', '𑜹'),
('𑣠', '𑣩'),
('\u{11950}', '\u{11959}'),
('𑱐', '𑱙'),
('𑵐', '𑵙'),
('𑶠', '𑶩'),
('𖩠', '𖩩'),
('𖭐', '𖭙'),
('𝟎', '𝟿'),
('\u{1e140}', '\u{1e149}'),
('\u{1e2f0}', '\u{1e2f9}'),
('𞅀', '𞅉'),
('𞋰', '𞋹'),
('𞥐', '𞥙'),
('\u{1fbf0}', '\u{1fbf9}'),
];

Просмотреть файл

@ -1,8 +1,10 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate property-bool /tmp/ucd/12.1.0/ --chars --include whitespace
// ucd-generate property-bool ucd-13.0.0 --chars --include whitespace
//
// ucd-generate is available on crates.io.
// Unicode version: 13.0.0.
//
// ucd-generate 0.2.8 is available on crates.io.
pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] =
&[("White_Space", WHITE_SPACE)];

Просмотреть файл

@ -1,8 +1,10 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate perl-word /tmp/ucd/12.1.0/ --chars
// ucd-generate perl-word ucd-13.0.0 --chars
//
// ucd-generate is available on crates.io.
// Unicode version: 13.0.0.
//
// ucd-generate 0.2.8 is available on crates.io.
pub const PERL_WORD: &'static [(char, char)] = &[
('0', '9'),
@ -56,7 +58,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('ࡀ', '\u{85b}'),
('ࡠ', 'ࡪ'),
('ࢠ', 'ࢴ'),
('ࢶ', ''),
('ࢶ', '\u{8c7}'),
('\u{8d3}', '\u{8e1}'),
('\u{8e3}', '\u{963}'),
('', '९'),
@ -116,7 +118,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('\u{b3c}', '\u{b44}'),
('େ', 'ୈ'),
('ୋ', '\u{b4d}'),
('\u{b56}', '\u{b57}'),
('\u{b55}', '\u{b57}'),
('ଡ଼', 'ଢ଼'),
('ୟ', '\u{b63}'),
('', '୯'),
@ -162,8 +164,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('ೠ', '\u{ce3}'),
('', '೯'),
('ೱ', 'ೲ'),
('\u{d00}', 'ഃ'),
('അ', 'ഌ'),
('\u{d00}', 'ഌ'),
('എ', 'ഐ'),
('ഒ', '\u{d44}'),
('െ', 'ൈ'),
@ -172,7 +173,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('ൟ', '\u{d63}'),
('', '൯'),
('ൺ', 'ൿ'),
('', 'ඃ'),
('\u{d81}', 'ඃ'),
('අ', 'ඖ'),
('ක', 'න'),
('ඳ', 'ර'),
@ -189,8 +190,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('', '๙'),
('ກ', 'ຂ'),
('ຄ', 'ຄ'),
('\u{e86}', 'ຊ'),
('\u{e8c}', 'ຣ'),
('', 'ຊ'),
('', 'ຣ'),
('ລ', 'ລ'),
('ວ', 'ຽ'),
('ເ', 'ໄ'),
@ -271,7 +272,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('\u{1a7f}', '᪉'),
('᪐', '᪙'),
('ᪧ', 'ᪧ'),
('\u{1ab0}', '\u{1abe}'),
('\u{1ab0}', '\u{1ac0}'),
('\u{1b00}', 'ᭋ'),
('᭐', '᭙'),
('\u{1b6b}', '\u{1b73}'),
@ -283,7 +284,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('Ა', 'Ჺ'),
('Ჽ', 'Ჿ'),
('\u{1cd0}', '\u{1cd2}'),
('\u{1cd4}', '\u{1cfa}'),
('\u{1cd4}', ''),
('ᴀ', '\u{1df9}'),
('\u{1dfb}', 'ἕ'),
('Ἐ', 'Ἕ'),
@ -357,10 +358,10 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('ー', 'ヿ'),
('ㄅ', 'ㄯ'),
('ㄱ', 'ㆎ'),
('ㆠ', ''),
('ㆠ', '\u{31bf}'),
('ㇰ', 'ㇿ'),
('㐀', ''),
('一', ''),
('㐀', '\u{4dbf}'),
('一', '\u{9ffc}'),
('ꀀ', 'ꒌ'),
('', ''),
('ꔀ', 'ꘌ'),
@ -370,9 +371,10 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('ꙿ', '\u{a6f1}'),
('ꜗ', 'ꜟ'),
('Ꜣ', 'ꞈ'),
('Ꞌ', '\u{a7bf}'),
('\u{a7c2}', '\u{a7c6}'),
('ꟷ', 'ꠧ'),
('Ꞌ', 'ꞿ'),
('Ꟃ', '\u{a7ca}'),
('\u{a7f5}', 'ꠧ'),
('\u{a82c}', '\u{a82c}'),
('ꡀ', 'ꡳ'),
('ꢀ', '\u{a8c5}'),
('꣐', '꣙'),
@ -398,7 +400,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('ꬠ', 'ꬦ'),
('ꬨ', 'ꬮ'),
('ꬰ', ''),
('ꭜ', '\u{ab67}'),
('ꭜ', '\u{ab69}'),
('ꭰ', 'ꯪ'),
('꯬', '\u{abed}'),
('꯰', '꯹'),
@ -497,10 +499,14 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('𐳀', '𐳲'),
('𐴀', '\u{10d27}'),
('𐴰', '𐴹'),
('\u{10e80}', '\u{10ea9}'),
('\u{10eab}', '\u{10eac}'),
('\u{10eb0}', '\u{10eb1}'),
('𐼀', '𐼜'),
('𐼧', '𐼧'),
('𐼰', '\u{10f50}'),
('\u{10fe0}', '\u{10ff6}'),
('\u{10fb0}', '\u{10fc4}'),
('𐿠', '𐿶'),
('𑀀', '\u{11046}'),
('𑁦', '𑁯'),
('\u{1107f}', '\u{110ba}'),
@ -508,12 +514,12 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('𑃰', '𑃹'),
('\u{11100}', '\u{11134}'),
('𑄶', '𑄿'),
('𑅄', '𑅆'),
('𑅄', '\u{11147}'),
('𑅐', '\u{11173}'),
('𑅶', '𑅶'),
('\u{11180}', '𑇄'),
('\u{111c9}', '\u{111cc}'),
('𑇐', '𑇚'),
('\u{111ce}', '𑇚'),
('𑇜', '𑇜'),
('𑈀', '𑈑'),
('𑈓', '\u{11237}'),
@ -542,7 +548,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('\u{11370}', '\u{11374}'),
('𑐀', '𑑊'),
('𑑐', '𑑙'),
('\u{1145e}', '\u{1145f}'),
('\u{1145e}', '\u{11461}'),
('𑒀', '𑓅'),
('𑓇', '𑓇'),
('𑓐', '𑓙'),
@ -552,18 +558,25 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('𑘀', '\u{11640}'),
('𑙄', '𑙄'),
('𑙐', '𑙙'),
('𑚀', '\u{116b8}'),
('𑚀', '𑚸'),
('𑛀', '𑛉'),
('𑜀', '𑜚'),
('\u{1171d}', '\u{1172b}'),
('𑜰', '𑜹'),
('𑠀', '\u{1183a}'),
('𑢠', '𑣩'),
('𑣿', '𑣿'),
('\u{119a0}', '\u{119a7}'),
('\u{119aa}', '\u{119d7}'),
('\u{119da}', '\u{119e1}'),
('\u{119e3}', '\u{119e4}'),
('𑣿', '\u{11906}'),
('\u{11909}', '\u{11909}'),
('\u{1190c}', '\u{11913}'),
('\u{11915}', '\u{11916}'),
('\u{11918}', '\u{11935}'),
('\u{11937}', '\u{11938}'),
('\u{1193b}', '\u{11943}'),
('\u{11950}', '\u{11959}'),
('𑦠', '𑦧'),
('𑦪', '\u{119d7}'),
('\u{119da}', '𑧡'),
('𑧣', '𑧤'),
('𑨀', '\u{11a3e}'),
('\u{11a47}', '\u{11a47}'),
('𑩐', '\u{11a99}'),
@ -590,6 +603,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('𑶓', '𑶘'),
('𑶠', '𑶩'),
('𑻠', '𑻶'),
('\u{11fb0}', '\u{11fb0}'),
('𒀀', '𒎙'),
('𒐀', '𒑮'),
('𒒀', '𒕃'),
@ -606,16 +620,18 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('𖭣', '𖭷'),
('𖭽', '𖮏'),
('𖹀', '𖹿'),
('𖼀', '\u{16f4a}'),
('\u{16f4f}', '\u{16f87}'),
('𖼀', '𖽊'),
('\u{16f4f}', '𖾇'),
('\u{16f8f}', '𖾟'),
('𖿠', '𖿡'),
('\u{16fe3}', '\u{16fe3}'),
('𗀀', '\u{187f7}'),
('𘠀', '𘫲'),
('𖿣', '\u{16fe4}'),
('\u{16ff0}', '\u{16ff1}'),
('𗀀', '𘟷'),
('𘠀', '\u{18cd5}'),
('\u{18d00}', '\u{18d08}'),
('𛀀', '𛄞'),
('\u{1b150}', '\u{1b152}'),
('\u{1b164}', '\u{1b167}'),
('𛅐', '𛅒'),
('𛅤', '𛅧'),
('𛅰', '𛋻'),
('𛰀', '𛱪'),
('𛱰', '𛱼'),
@ -670,14 +686,14 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('\u{1e01b}', '\u{1e021}'),
('\u{1e023}', '\u{1e024}'),
('\u{1e026}', '\u{1e02a}'),
('\u{1e100}', '\u{1e12c}'),
('\u{1e130}', '\u{1e13d}'),
('\u{1e140}', '\u{1e149}'),
('\u{1e14e}', '\u{1e14e}'),
('\u{1e2c0}', '\u{1e2f9}'),
('𞄀', '𞄬'),
('\u{1e130}', '𞄽'),
('𞅀', '𞅉'),
('𞅎', '𞅎'),
('𞋀', '𞋹'),
('𞠀', '𞣄'),
('\u{1e8d0}', '\u{1e8d6}'),
('𞤀', '\u{1e94b}'),
('𞤀', '𞥋'),
('𞥐', '𞥙'),
('𞸀', '𞸃'),
('𞸅', '𞸟'),
@ -715,11 +731,13 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('🄰', '🅉'),
('🅐', '🅩'),
('🅰', '🆉'),
('𠀀', '𪛖'),
('\u{1fbf0}', '\u{1fbf9}'),
('𠀀', '\u{2a6dd}'),
('𪜀', '𫜴'),
('𫝀', '𫠝'),
('𫠠', '𬺡'),
('𬺰', '𮯠'),
('丽', '𪘀'),
('\u{30000}', '\u{3134a}'),
('\u{e0100}', '\u{e01ef}'),
];

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,8 +1,10 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate property-names /tmp/ucd/12.1.0/
// ucd-generate property-names ucd-13.0.0
//
// ucd-generate is available on crates.io.
// Unicode version: 13.0.0.
//
// ucd-generate 0.2.8 is available on crates.io.
pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[
("age", "Age"),
@ -47,7 +49,9 @@ pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[
("cjkirgkpsource", "kIRG_KPSource"),
("cjkirgksource", "kIRG_KSource"),
("cjkirgmsource", "kIRG_MSource"),
("cjkirgssource", "kIRG_SSource"),
("cjkirgtsource", "kIRG_TSource"),
("cjkirguksource", "kIRG_UKSource"),
("cjkirgusource", "kIRG_USource"),
("cjkirgvsource", "kIRG_VSource"),
("cjkothernumeric", "kOtherNumeric"),
@ -74,11 +78,15 @@ pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[
("dt", "Decomposition_Type"),
("ea", "East_Asian_Width"),
("eastasianwidth", "East_Asian_Width"),
("ebase", "Emoji_Modifier_Base"),
("ecomp", "Emoji_Component"),
("emod", "Emoji_Modifier"),
("emoji", "Emoji"),
("emojicomponent", "Emoji_Component"),
("emojimodifier", "Emoji_Modifier"),
("emojimodifierbase", "Emoji_Modifier_Base"),
("emojipresentation", "Emoji_Presentation"),
("epres", "Emoji_Presentation"),
("equideo", "Equivalent_Unified_Ideograph"),
("equivalentunifiedideograph", "Equivalent_Unified_Ideograph"),
("expandsonnfc", "Expands_On_NFC"),
@ -88,6 +96,7 @@ pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[
("ext", "Extender"),
("extendedpictographic", "Extended_Pictographic"),
("extender", "Extender"),
("extpict", "Extended_Pictographic"),
("fcnfkc", "FC_NFKC_Closure"),
("fcnfkcclosure", "FC_NFKC_Closure"),
("fullcompositionexclusion", "Full_Composition_Exclusion"),
@ -138,7 +147,9 @@ pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[
("kirgkpsource", "kIRG_KPSource"),
("kirgksource", "kIRG_KSource"),
("kirgmsource", "kIRG_MSource"),
("kirgssource", "kIRG_SSource"),
("kirgtsource", "kIRG_TSource"),
("kirguksource", "kIRG_UKSource"),
("kirgusource", "kIRG_USource"),
("kirgvsource", "kIRG_VSource"),
("kothernumeric", "kOtherNumeric"),

Просмотреть файл

@ -1,8 +1,10 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate property-values /tmp/ucd/12.1.0/ --include gc,script,scx,age,gcb,wb,sb
// ucd-generate property-values ucd-13.0.0 --include gc,script,scx,age,gcb,wb,sb
//
// ucd-generate is available on crates.io.
// Unicode version: 13.0.0.
//
// ucd-generate 0.2.8 is available on crates.io.
pub const PROPERTY_VALUES: &'static [(
&'static str,
@ -16,6 +18,7 @@ pub const PROPERTY_VALUES: &'static [(
("11.0", "V11_0"),
("12.0", "V12_0"),
("12.1", "V12_1"),
("13.0", "V13_0"),
("2.0", "V2_0"),
("2.1", "V2_1"),
("3.0", "V3_0"),
@ -40,6 +43,7 @@ pub const PROPERTY_VALUES: &'static [(
("v110", "V11_0"),
("v120", "V12_0"),
("v121", "V12_1"),
("v130", "V13_0"),
("v20", "V2_0"),
("v21", "V2_1"),
("v30", "V3_0"),
@ -224,6 +228,8 @@ pub const PROPERTY_VALUES: &'static [(
("cham", "Cham"),
("cher", "Cherokee"),
("cherokee", "Cherokee"),
("chorasmian", "Chorasmian"),
("chrs", "Chorasmian"),
("common", "Common"),
("copt", "Coptic"),
("coptic", "Coptic"),
@ -235,6 +241,8 @@ pub const PROPERTY_VALUES: &'static [(
("deseret", "Deseret"),
("deva", "Devanagari"),
("devanagari", "Devanagari"),
("diak", "Dives_Akuru"),
("divesakuru", "Dives_Akuru"),
("dogr", "Dogra"),
("dogra", "Dogra"),
("dsrt", "Deseret"),
@ -299,11 +307,13 @@ pub const PROPERTY_VALUES: &'static [(
("kayahli", "Kayah_Li"),
("khar", "Kharoshthi"),
("kharoshthi", "Kharoshthi"),
("khitansmallscript", "Khitan_Small_Script"),
("khmer", "Khmer"),
("khmr", "Khmer"),
("khoj", "Khojki"),
("khojki", "Khojki"),
("khudawadi", "Khudawadi"),
("kits", "Khitan_Small_Script"),
("knda", "Kannada"),
("kthi", "Kaithi"),
("lana", "Tai_Tham"),
@ -477,6 +487,8 @@ pub const PROPERTY_VALUES: &'static [(
("wcho", "Wancho"),
("xpeo", "Old_Persian"),
("xsux", "Cuneiform"),
("yezi", "Yezidi"),
("yezidi", "Yezidi"),
("yi", "Yi"),
("yiii", "Yi"),
("zanabazarsquare", "Zanabazar_Square"),
@ -533,6 +545,8 @@ pub const PROPERTY_VALUES: &'static [(
("cham", "Cham"),
("cher", "Cherokee"),
("cherokee", "Cherokee"),
("chorasmian", "Chorasmian"),
("chrs", "Chorasmian"),
("common", "Common"),
("copt", "Coptic"),
("coptic", "Coptic"),
@ -544,6 +558,8 @@ pub const PROPERTY_VALUES: &'static [(
("deseret", "Deseret"),
("deva", "Devanagari"),
("devanagari", "Devanagari"),
("diak", "Dives_Akuru"),
("divesakuru", "Dives_Akuru"),
("dogr", "Dogra"),
("dogra", "Dogra"),
("dsrt", "Deseret"),
@ -608,11 +624,13 @@ pub const PROPERTY_VALUES: &'static [(
("kayahli", "Kayah_Li"),
("khar", "Kharoshthi"),
("kharoshthi", "Kharoshthi"),
("khitansmallscript", "Khitan_Small_Script"),
("khmer", "Khmer"),
("khmr", "Khmer"),
("khoj", "Khojki"),
("khojki", "Khojki"),
("khudawadi", "Khudawadi"),
("kits", "Khitan_Small_Script"),
("knda", "Kannada"),
("kthi", "Kaithi"),
("lana", "Tai_Tham"),
@ -786,6 +804,8 @@ pub const PROPERTY_VALUES: &'static [(
("wcho", "Wancho"),
("xpeo", "Old_Persian"),
("xsux", "Cuneiform"),
("yezi", "Yezidi"),
("yezidi", "Yezidi"),
("yi", "Yi"),
("yiii", "Yi"),
("zanabazarsquare", "Zanabazar_Square"),

Просмотреть файл

@ -1,8 +1,10 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate script /tmp/ucd/12.1.0/ --chars
// ucd-generate script ucd-13.0.0 --chars
//
// ucd-generate is available on crates.io.
// Unicode version: 13.0.0.
//
// ucd-generate 0.2.8 is available on crates.io.
pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
("Adlam", ADLAM),
@ -28,6 +30,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
("Chakma", CHAKMA),
("Cham", CHAM),
("Cherokee", CHEROKEE),
("Chorasmian", CHORASMIAN),
("Common", COMMON),
("Coptic", COPTIC),
("Cuneiform", CUNEIFORM),
@ -35,6 +38,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
("Cyrillic", CYRILLIC),
("Deseret", DESERET),
("Devanagari", DEVANAGARI),
("Dives_Akuru", DIVES_AKURU),
("Dogra", DOGRA),
("Duployan", DUPLOYAN),
("Egyptian_Hieroglyphs", EGYPTIAN_HIEROGLYPHS),
@ -66,6 +70,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
("Katakana", KATAKANA),
("Kayah_Li", KAYAH_LI),
("Kharoshthi", KHAROSHTHI),
("Khitan_Small_Script", KHITAN_SMALL_SCRIPT),
("Khmer", KHMER),
("Khojki", KHOJKI),
("Khudawadi", KHUDAWADI),
@ -155,12 +160,13 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
("Vai", VAI),
("Wancho", WANCHO),
("Warang_Citi", WARANG_CITI),
("Yezidi", YEZIDI),
("Yi", YI),
("Zanabazar_Square", ZANABAZAR_SQUARE),
];
pub const ADLAM: &'static [(char, char)] =
&[('𞤀', '\u{1e94b}'), ('𞥐', '𞥙'), ('𞥞', '𞥟')];
&[('𞤀', '𞥋'), ('𞥐', '𞥙'), ('𞥞', '𞥟')];
pub const AHOM: &'static [(char, char)] =
&[('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑜿')];
@ -180,7 +186,7 @@ pub const ARABIC: &'static [(char, char)] = &[
('۞', 'ۿ'),
('ݐ', 'ݿ'),
('ࢠ', 'ࢴ'),
('ࢶ', ''),
('ࢶ', '\u{8c7}'),
('\u{8d3}', '\u{8e1}'),
('\u{8e3}', '\u{8ff}'),
('ﭐ', '﯁'),
@ -228,13 +234,11 @@ pub const ARABIC: &'static [(char, char)] = &[
];
pub const ARMENIAN: &'static [(char, char)] =
&[('Ա', 'Ֆ'), ('ՙ', 'ֈ'), ('֊', '֊'), ('֍', '֏'), ('ﬓ', 'ﬗ')];
&[('Ա', 'Ֆ'), ('ՙ', '֊'), ('֍', '֏'), ('ﬓ', 'ﬗ')];
pub const AVESTAN: &'static [(char, char)] =
&[('𐬀', '𐬵'), ('𐬹', '𐬿')];
pub const AVESTAN: &'static [(char, char)] = &[('𐬀', '𐬵'), ('𐬹', '𐬿')];
pub const BALINESE: &'static [(char, char)] =
&[('\u{1b00}', 'ᭋ'), ('᭐', '᭼')];
pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'ᭋ'), ('᭐', '᭼')];
pub const BAMUM: &'static [(char, char)] = &[('ꚠ', '꛷'), ('𖠀', '𖨸')];
@ -260,23 +264,18 @@ pub const BENGALI: &'static [(char, char)] = &[
('', '\u{9fe}'),
];
pub const BHAIKSUKI: &'static [(char, char)] = &[
('𑰀', '𑰈'),
('𑰊', '\u{11c36}'),
('\u{11c38}', '𑱅'),
('𑱐', '𑱬'),
];
pub const BHAIKSUKI: &'static [(char, char)] =
&[('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱅'), ('𑱐', '𑱬')];
pub const BOPOMOFO: &'static [(char, char)] =
&[('˪', '˫'), ('ㄅ', 'ㄯ'), ('ㆠ', '')];
&[('˪', '˫'), ('ㄅ', 'ㄯ'), ('ㆠ', '\u{31bf}')];
pub const BRAHMI: &'static [(char, char)] =
&[('𑀀', '𑁍'), ('𑁒', '𑁯'), ('\u{1107f}', '\u{1107f}')];
pub const BRAILLE: &'static [(char, char)] = &[('', '⣿')];
pub const BUGINESE: &'static [(char, char)] =
&[('ᨀ', '\u{1a1b}'), ('᨞', '᨟')];
pub const BUGINESE: &'static [(char, char)] = &[('ᨀ', '\u{1a1b}'), ('᨞', '᨟')];
pub const BUHID: &'static [(char, char)] = &[('ᝀ', '\u{1753}')];
@ -289,7 +288,7 @@ pub const CAUCASIAN_ALBANIAN: &'static [(char, char)] =
&[('𐔰', '𐕣'), ('𐕯', '𐕯')];
pub const CHAKMA: &'static [(char, char)] =
&[('\u{11100}', '\u{11134}'), ('𑄶', '𑅆')];
&[('\u{11100}', '\u{11134}'), ('𑄶', '\u{11147}')];
pub const CHAM: &'static [(char, char)] =
&[('ꨀ', '\u{aa36}'), ('ꩀ', 'ꩍ'), ('꩐', '꩙'), ('꩜', '꩟')];
@ -297,6 +296,8 @@ pub const CHAM: &'static [(char, char)] =
pub const CHEROKEE: &'static [(char, char)] =
&[('', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ꭰ', 'ꮿ')];
pub const CHORASMIAN: &'static [(char, char)] = &[('\u{10fb0}', '\u{10fcb}')];
pub const COMMON: &'static [(char, char)] = &[
('\u{0}', '@'),
('[', '`'),
@ -312,7 +313,6 @@ pub const COMMON: &'static [(char, char)] = &[
(';', ';'),
('΅', '΅'),
('·', '·'),
('։', '։'),
('\u{605}', '\u{605}'),
('،', '،'),
('؛', '؛'),
@ -333,7 +333,7 @@ pub const COMMON: &'static [(char, char)] = &[
('ᳩ', 'ᳬ'),
('ᳮ', 'ᳳ'),
('ᳵ', '᳷'),
('\u{1cfa}', '\u{1cfa}'),
('ᳺ', 'ᳺ'),
('\u{2000}', '\u{200b}'),
('\u{200e}', '\u{2064}'),
('\u{2066}', '⁰'),
@ -351,8 +351,8 @@ pub const COMMON: &'static [(char, char)] = &[
('①', '⟿'),
('⤀', '⭳'),
('⭶', '⮕'),
('⮘', '\u{2bff}'),
('⸀', '\u{2e4f}'),
('\u{2b97}', '⯿'),
('⸀', '\u{2e52}'),
('⿰', '⿻'),
('\u{3000}', '〄'),
('〆', '〆'),
@ -366,7 +366,7 @@ pub const COMMON: &'static [(char, char)] = &[
('㇀', '㇣'),
('㈠', '㉟'),
('㉿', '㋏'),
('\u{32ff}', '\u{32ff}'),
('㋿', '㋿'),
('㍘', '㏿'),
('䷀', '䷿'),
('꜀', '꜡'),
@ -375,6 +375,7 @@ pub const COMMON: &'static [(char, char)] = &[
('꤮', '꤮'),
('ꧏ', 'ꧏ'),
('꭛', '꭛'),
('\u{ab6a}', '\u{ab6b}'),
('', '﴿'),
('︐', '︙'),
('', '﹒'),
@ -392,10 +393,10 @@ pub const COMMON: &'static [(char, char)] = &[
('𐄀', '𐄂'),
('𐄇', '𐄳'),
('𐄷', '𐄿'),
('𐆐', '𐆛'),
('𐆐', '\u{1019c}'),
('𐇐', '𐇼'),
('𐋡', '𐋻'),
('\u{16fe2}', '\u{16fe3}'),
('𖿢', '𖿣'),
('\u{1bca0}', '\u{1bca3}'),
('𝀀', '𝃵'),
('𝄀', '𝄦'),
@ -429,45 +430,46 @@ pub const COMMON: &'static [(char, char)] = &[
('𝚨', '𝟋'),
('𝟎', '𝟿'),
('𞱱', '𞲴'),
('\u{1ed01}', '\u{1ed3d}'),
('𞴁', '𞴽'),
('🀀', '🀫'),
('🀰', '🂓'),
('🂠', '🂮'),
('🂱', '🂿'),
('🃁', '🃏'),
('🃑', '🃵'),
('🄀', '🄌'),
('🄐', '\u{1f16c}'),
('🅰', '🆬'),
('🄀', '\u{1f1ad}'),
('🇦', '🇿'),
('🈁', '🈂'),
('🈐', '🈻'),
('🉀', '🉈'),
('🉐', '🉑'),
('🉠', '🉥'),
('🌀', '\u{1f6d5}'),
('🌀', '\u{1f6d7}'),
('🛠', '🛬'),
('🛰', '\u{1f6fa}'),
('🛰', '\u{1f6fc}'),
('🜀', '🝳'),
('🞀', '🟘'),
('\u{1f7e0}', '\u{1f7eb}'),
('🟠', '🟫'),
('🠀', '🠋'),
('🠐', '🡇'),
('🡐', '🡙'),
('🡠', '🢇'),
('🢐', '🢭'),
('🤀', '🤋'),
('\u{1f90d}', '\u{1f971}'),
('🥳', '🥶'),
('🥺', '🦢'),
('\u{1f9a5}', '\u{1f9aa}'),
('\u{1f9ae}', '\u{1f9ca}'),
('\u{1f9cd}', '\u{1fa53}'),
('\u{1f8b0}', '\u{1f8b1}'),
('🤀', '\u{1f978}'),
('🥺', '\u{1f9cb}'),
('🧍', '🩓'),
('🩠', '🩭'),
('\u{1fa70}', '\u{1fa73}'),
('\u{1fa78}', '\u{1fa7a}'),
('\u{1fa80}', '\u{1fa82}'),
('\u{1fa90}', '\u{1fa95}'),
('🩰', '\u{1fa74}'),
('🩸', '🩺'),
('🪀', '\u{1fa86}'),
('🪐', '\u{1faa8}'),
('\u{1fab0}', '\u{1fab6}'),
('\u{1fac0}', '\u{1fac2}'),
('\u{1fad0}', '\u{1fad6}'),
('\u{1fb00}', '\u{1fb92}'),
('\u{1fb94}', '\u{1fbca}'),
('\u{1fbf0}', '\u{1fbf9}'),
('\u{e0001}', '\u{e0001}'),
('\u{e0020}', '\u{e007f}'),
];
@ -478,14 +480,8 @@ pub const COPTIC: &'static [(char, char)] =
pub const CUNEIFORM: &'static [(char, char)] =
&[('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒑰', '𒑴'), ('𒒀', '𒕃')];
pub const CYPRIOT: &'static [(char, char)] = &[
('𐠀', '𐠅'),
('𐠈', '𐠈'),
('𐠊', '𐠵'),
('𐠷', '𐠸'),
('𐠼', '𐠼'),
('𐠿', '𐠿'),
];
pub const CYPRIOT: &'static [(char, char)] =
&[('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'), ('𐠷', '𐠸'), ('𐠼', '𐠼'), ('𐠿', '𐠿')];
pub const CYRILLIC: &'static [(char, char)] = &[
('Ѐ', '\u{484}'),
@ -507,22 +503,28 @@ pub const DEVANAGARI: &'static [(char, char)] = &[
('\u{a8e0}', '\u{a8ff}'),
];
pub const DIVES_AKURU: &'static [(char, char)] = &[
('\u{11900}', '\u{11906}'),
('\u{11909}', '\u{11909}'),
('\u{1190c}', '\u{11913}'),
('\u{11915}', '\u{11916}'),
('\u{11918}', '\u{11935}'),
('\u{11937}', '\u{11938}'),
('\u{1193b}', '\u{11946}'),
('\u{11950}', '\u{11959}'),
];
pub const DOGRA: &'static [(char, char)] = &[('𑠀', '𑠻')];
pub const DUPLOYAN: &'static [(char, char)] = &[
('𛰀', '𛱪'),
('𛱰', '𛱼'),
('𛲀', '𛲈'),
('𛲐', '𛲙'),
('𛲜', '𛲟'),
];
pub const DUPLOYAN: &'static [(char, char)] =
&[('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('𛲜', '𛲟')];
pub const EGYPTIAN_HIEROGLYPHS: &'static [(char, char)] =
&[('𓀀', '𓐮'), ('\u{13430}', '\u{13438}')];
pub const ELBASAN: &'static [(char, char)] = &[('𐔀', '𐔧')];
pub const ELYMAIC: &'static [(char, char)] = &[('\u{10fe0}', '\u{10ff6}')];
pub const ELYMAIC: &'static [(char, char)] = &[('𐿠', '𐿶')];
pub const ETHIOPIC: &'static [(char, char)] = &[
('', 'ቈ'),
@ -694,16 +696,18 @@ pub const HAN: &'static [(char, char)] = &[
('', ''),
('〡', '〩'),
('〸', '〻'),
('㐀', ''),
('一', ''),
('㐀', '\u{4dbf}'),
('一', '\u{9ffc}'),
('豈', '舘'),
('並', '龎'),
('𠀀', '𪛖'),
('\u{16ff0}', '\u{16ff1}'),
('𠀀', '\u{2a6dd}'),
('𪜀', '𫜴'),
('𫝀', '𫠝'),
('𫠠', '𬺡'),
('𬺰', '𮯠'),
('丽', '𪘀'),
('\u{30000}', '\u{3134a}'),
];
pub const HANGUL: &'static [(char, char)] = &[
@ -743,13 +747,8 @@ pub const HEBREW: &'static [(char, char)] = &[
('צּ', 'ﭏ'),
];
pub const HIRAGANA: &'static [(char, char)] = &[
('ぁ', 'ゖ'),
('ゝ', 'ゟ'),
('𛀁', '𛄞'),
('\u{1b150}', '\u{1b152}'),
('🈀', '🈀'),
];
pub const HIRAGANA: &'static [(char, char)] =
&[('ぁ', 'ゖ'), ('ゝ', 'ゟ'), ('𛀁', '𛄞'), ('𛅐', '𛅒'), ('🈀', '🈀')];
pub const IMPERIAL_ARAMAIC: &'static [(char, char)] =
&[('𐡀', '𐡕'), ('𐡗', '𐡟')];
@ -760,7 +759,7 @@ pub const INHERITED: &'static [(char, char)] = &[
('\u{64b}', '\u{655}'),
('\u{670}', '\u{670}'),
('\u{951}', '\u{954}'),
('\u{1ab0}', '\u{1abe}'),
('\u{1ab0}', '\u{1ac0}'),
('\u{1cd0}', '\u{1cd2}'),
('\u{1cd4}', '\u{1ce0}'),
('\u{1ce2}', '\u{1ce8}'),
@ -822,11 +821,10 @@ pub const KATAKANA: &'static [(char, char)] = &[
('ヲ', 'ッ'),
('ア', 'ン'),
('𛀀', '𛀀'),
('\u{1b164}', '\u{1b167}'),
('𛅤', '𛅧'),
];
pub const KAYAH_LI: &'static [(char, char)] =
&[('꤀', '\u{a92d}'), ('꤯', '꤯')];
pub const KAYAH_LI: &'static [(char, char)] = &[('꤀', '\u{a92d}'), ('꤯', '꤯')];
pub const KHAROSHTHI: &'static [(char, char)] = &[
('𐨀', '\u{10a03}'),
@ -839,11 +837,13 @@ pub const KHAROSHTHI: &'static [(char, char)] = &[
('𐩐', '𐩘'),
];
pub const KHITAN_SMALL_SCRIPT: &'static [(char, char)] =
&[('\u{16fe4}', '\u{16fe4}'), ('\u{18b00}', '\u{18cd5}')];
pub const KHMER: &'static [(char, char)] =
&[('ក', '\u{17dd}'), ('០', '៩'), ('៰', '៹'), ('᧠', '᧿')];
pub const KHOJKI: &'static [(char, char)] =
&[('𑈀', '𑈑'), ('𑈓', '\u{1123e}')];
pub const KHOJKI: &'static [(char, char)] = &[('𑈀', '𑈑'), ('𑈓', '\u{1123e}')];
pub const KHUDAWADI: &'static [(char, char)] =
&[('𑊰', '\u{112ea}'), ('𑋰', '𑋹')];
@ -851,8 +851,8 @@ pub const KHUDAWADI: &'static [(char, char)] =
pub const LAO: &'static [(char, char)] = &[
('ກ', 'ຂ'),
('ຄ', 'ຄ'),
('\u{e86}', 'ຊ'),
('\u{e8c}', 'ຣ'),
('', 'ຊ'),
('', 'ຣ'),
('ລ', 'ລ'),
('ວ', 'ຽ'),
('ເ', 'ໄ'),
@ -886,12 +886,12 @@ pub const LATIN: &'static [(char, char)] = &[
('', 'ↈ'),
('Ⱡ', 'Ɀ'),
('Ꜣ', 'ꞇ'),
('Ꞌ', '\u{a7bf}'),
('\u{a7c2}', '\u{a7c6}'),
('', 'ꟿ'),
('Ꞌ', ''),
('Ꟃ', '\u{a7ca}'),
('\u{a7f5}', 'ꟿ'),
('ꬰ', ''),
('ꭜ', 'ꭤ'),
('\u{ab66}', '\u{ab67}'),
('ꭦ', '\u{ab69}'),
('ff', 'st'),
('', ''),
('', ''),
@ -921,20 +921,19 @@ pub const LINEAR_B: &'static [(char, char)] = &[
('𐂀', '𐃺'),
];
pub const LISU: &'static [(char, char)] = &[('', '')];
pub const LISU: &'static [(char, char)] =
&[('', ''), ('\u{11fb0}', '\u{11fb0}')];
pub const LYCIAN: &'static [(char, char)] = &[('𐊀', '𐊜')];
pub const LYDIAN: &'static [(char, char)] =
&[('𐤠', '𐤹'), ('𐤿', '𐤿')];
pub const LYDIAN: &'static [(char, char)] = &[('𐤠', '𐤹'), ('𐤿', '𐤿')];
pub const MAHAJANI: &'static [(char, char)] = &[('𑅐', '𑅶')];
pub const MAKASAR: &'static [(char, char)] = &[('𑻠', '𑻸')];
pub const MALAYALAM: &'static [(char, char)] = &[
('\u{d00}', 'ഃ'),
('അ', 'ഌ'),
('\u{d00}', 'ഌ'),
('എ', 'ഐ'),
('ഒ', '\u{d44}'),
('െ', 'ൈ'),
@ -943,8 +942,7 @@ pub const MALAYALAM: &'static [(char, char)] = &[
('', 'ൿ'),
];
pub const MANDAIC: &'static [(char, char)] =
&[('ࡀ', '\u{85b}'), ('࡞', '࡞')];
pub const MANDAIC: &'static [(char, char)] = &[('ࡀ', '\u{85b}'), ('࡞', '࡞')];
pub const MANICHAEAN: &'static [(char, char)] =
&[('𐫀', '\u{10ae6}'), ('𐫫', '𐫶')];
@ -975,14 +973,10 @@ pub const MEROITIC_CURSIVE: &'static [(char, char)] =
pub const MEROITIC_HIEROGLYPHS: &'static [(char, char)] = &[('𐦀', '𐦟')];
pub const MIAO: &'static [(char, char)] = &[
('𖼀', '\u{16f4a}'),
('\u{16f4f}', '\u{16f87}'),
('\u{16f8f}', '𖾟'),
];
pub const MIAO: &'static [(char, char)] =
&[('𖼀', '𖽊'), ('\u{16f4f}', '𖾇'), ('\u{16f8f}', '𖾟')];
pub const MODI: &'static [(char, char)] =
&[('𑘀', '𑙄'), ('𑙐', '𑙙')];
pub const MODI: &'static [(char, char)] = &[('𑘀', '𑙄'), ('𑙐', '𑙙')];
pub const MONGOLIAN: &'static [(char, char)] = &[
('᠀', '᠁'),
@ -994,46 +988,30 @@ pub const MONGOLIAN: &'static [(char, char)] = &[
('𑙠', '𑙬'),
];
pub const MRO: &'static [(char, char)] =
&[('𖩀', '𖩞'), ('𖩠', '𖩩'), ('𖩮', '𖩯')];
pub const MRO: &'static [(char, char)] = &[('𖩀', '𖩞'), ('𖩠', '𖩩'), ('𖩮', '𖩯')];
pub const MULTANI: &'static [(char, char)] = &[
('𑊀', '𑊆'),
('𑊈', '𑊈'),
('𑊊', '𑊍'),
('𑊏', '𑊝'),
('𑊟', '𑊩'),
];
pub const MULTANI: &'static [(char, char)] =
&[('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'), ('𑊟', '𑊩')];
pub const MYANMAR: &'static [(char, char)] =
&[('က', '႟'), ('ꧠ', 'ꧾ'), ('ꩠ', 'ꩿ')];
pub const NABATAEAN: &'static [(char, char)] =
&[('𐢀', '𐢞'), ('𐢧', '𐢯')];
pub const NABATAEAN: &'static [(char, char)] = &[('𐢀', '𐢞'), ('𐢧', '𐢯')];
pub const NANDINAGARI: &'static [(char, char)] = &[
('\u{119a0}', '\u{119a7}'),
('\u{119aa}', '\u{119d7}'),
('\u{119da}', '\u{119e4}'),
];
pub const NANDINAGARI: &'static [(char, char)] =
&[('𑦠', '𑦧'), ('𑦪', '\u{119d7}'), ('\u{119da}', '𑧤')];
pub const NEW_TAI_LUE: &'static [(char, char)] =
&[('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'), ('᧐', '᧚'), ('᧞', '᧟')];
pub const NEWA: &'static [(char, char)] =
&[('𑐀', '𑑙'), ('𑑛', '𑑛'), ('𑑝', '\u{1145f}')];
pub const NEWA: &'static [(char, char)] = &[('𑐀', '𑑛'), ('𑑝', '\u{11461}')];
pub const NKO: &'static [(char, char)] = &[('߀', 'ߺ'), ('\u{7fd}', '߿')];
pub const NUSHU: &'static [(char, char)] =
&[('𖿡', '𖿡'), ('𛅰', '𛋻')];
pub const NUSHU: &'static [(char, char)] = &[('𖿡', '𖿡'), ('𛅰', '𛋻')];
pub const NYIAKENG_PUACHUE_HMONG: &'static [(char, char)] = &[
('\u{1e100}', '\u{1e12c}'),
('\u{1e130}', '\u{1e13d}'),
('\u{1e140}', '\u{1e149}'),
('\u{1e14e}', '\u{1e14f}'),
];
pub const NYIAKENG_PUACHUE_HMONG: &'static [(char, char)] =
&[('𞄀', '𞄬'), ('\u{1e130}', '𞄽'), ('𞅀', '𞅉'), ('𞅎', '𞅏')];
pub const OGHAM: &'static [(char, char)] = &[('\u{1680}', '᚜')];
@ -1042,15 +1020,13 @@ pub const OL_CHIKI: &'static [(char, char)] = &[('᱐', '᱿')];
pub const OLD_HUNGARIAN: &'static [(char, char)] =
&[('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐳺', '𐳿')];
pub const OLD_ITALIC: &'static [(char, char)] =
&[('𐌀', '𐌣'), ('𐌭', '𐌯')];
pub const OLD_ITALIC: &'static [(char, char)] = &[('𐌀', '𐌣'), ('𐌭', '𐌯')];
pub const OLD_NORTH_ARABIAN: &'static [(char, char)] = &[('𐪀', '𐪟')];
pub const OLD_PERMIC: &'static [(char, char)] = &[('𐍐', '\u{1037a}')];
pub const OLD_PERSIAN: &'static [(char, char)] =
&[('𐎠', '𐏃'), ('𐏈', '𐏕')];
pub const OLD_PERSIAN: &'static [(char, char)] = &[('𐎠', '𐏃'), ('𐏈', '𐏕')];
pub const OLD_SOGDIAN: &'static [(char, char)] = &[('𐼀', '𐼧')];
@ -1069,25 +1045,18 @@ pub const ORIYA: &'static [(char, char)] = &[
('\u{b3c}', '\u{b44}'),
('େ', 'ୈ'),
('ୋ', '\u{b4d}'),
('\u{b56}', '\u{b57}'),
('\u{b55}', '\u{b57}'),
('ଡ଼', 'ଢ଼'),
('ୟ', '\u{b63}'),
('', '୷'),
];
pub const OSAGE: &'static [(char, char)] =
&[('𐒰', '𐓓'), ('𐓘', '𐓻')];
pub const OSAGE: &'static [(char, char)] = &[('𐒰', '𐓓'), ('𐓘', '𐓻')];
pub const OSMANYA: &'static [(char, char)] =
&[('𐒀', '𐒝'), ('𐒠', '𐒩')];
pub const OSMANYA: &'static [(char, char)] = &[('𐒀', '𐒝'), ('𐒠', '𐒩')];
pub const PAHAWH_HMONG: &'static [(char, char)] = &[
('𖬀', '𖭅'),
('𖭐', '𖭙'),
('𖭛', '𖭡'),
('𖭣', '𖭷'),
('𖭽', '𖮏'),
];
pub const PAHAWH_HMONG: &'static [(char, char)] =
&[('𖬀', '𖭅'), ('𖭐', '𖭙'), ('𖭛', '𖭡'), ('𖭣', '𖭷'), ('𖭽', '𖮏')];
pub const PALMYRENE: &'static [(char, char)] = &[('𐡠', '𐡿')];
@ -1095,8 +1064,7 @@ pub const PAU_CIN_HAU: &'static [(char, char)] = &[('𑫀', '𑫸')];
pub const PHAGS_PA: &'static [(char, char)] = &[('ꡀ', '꡷')];
pub const PHOENICIAN: &'static [(char, char)] =
&[('𐤀', '𐤛'), ('𐤟', '𐤟')];
pub const PHOENICIAN: &'static [(char, char)] = &[('𐤀', '𐤛'), ('𐤟', '𐤟')];
pub const PSALTER_PAHLAVI: &'static [(char, char)] =
&[('𐮀', '𐮑'), ('𐮙', '𐮜'), ('𐮩', '𐮯')];
@ -1105,28 +1073,23 @@ pub const REJANG: &'static [(char, char)] = &[('ꤰ', '꥓'), ('꥟', '꥟')];
pub const RUNIC: &'static [(char, char)] = &[('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ')];
pub const SAMARITAN: &'static [(char, char)] =
&[('ࠀ', '\u{82d}'), ('࠰', '࠾')];
pub const SAMARITAN: &'static [(char, char)] = &[('ࠀ', '\u{82d}'), ('࠰', '࠾')];
pub const SAURASHTRA: &'static [(char, char)] =
&[('ꢀ', '\u{a8c5}'), ('꣎', '꣙')];
pub const SHARADA: &'static [(char, char)] =
&[('\u{11180}', '𑇍'), ('𑇐', '𑇟')];
pub const SHARADA: &'static [(char, char)] = &[('\u{11180}', '𑇟')];
pub const SHAVIAN: &'static [(char, char)] = &[('𐑐', '𐑿')];
pub const SIDDHAM: &'static [(char, char)] =
&[('𑖀', '\u{115b5}'), ('𑖸', '\u{115dd}')];
pub const SIGNWRITING: &'static [(char, char)] = &[
('𝠀', '𝪋'),
('\u{1da9b}', '\u{1da9f}'),
('\u{1daa1}', '\u{1daaf}'),
];
pub const SIGNWRITING: &'static [(char, char)] =
&[('𝠀', '𝪋'), ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}')];
pub const SINHALA: &'static [(char, char)] = &[
('', 'ඃ'),
('\u{d81}', 'ඃ'),
('අ', 'ඖ'),
('ක', 'න'),
('ඳ', 'ර'),
@ -1143,21 +1106,19 @@ pub const SINHALA: &'static [(char, char)] = &[
pub const SOGDIAN: &'static [(char, char)] = &[('𐼰', '𐽙')];
pub const SORA_SOMPENG: &'static [(char, char)] =
&[('𑃐', '𑃨'), ('𑃰', '𑃹')];
pub const SORA_SOMPENG: &'static [(char, char)] = &[('𑃐', '𑃨'), ('𑃰', '𑃹')];
pub const SOYOMBO: &'static [(char, char)] = &[('𑩐', '𑪢')];
pub const SUNDANESE: &'static [(char, char)] =
&[('\u{1b80}', 'ᮿ'), ('᳀', '᳇')];
pub const SYLOTI_NAGRI: &'static [(char, char)] = &[('ꠀ', '')];
pub const SYLOTI_NAGRI: &'static [(char, char)] = &[('ꠀ', '\u{a82c}')];
pub const SYRIAC: &'static [(char, char)] =
&[('܀', '܍'), ('\u{70f}', '\u{74a}'), ('ݍ', 'ݏ'), ('ࡠ', 'ࡪ')];
pub const TAGALOG: &'static [(char, char)] =
&[('ᜀ', 'ᜌ'), ('ᜎ', '\u{1714}')];
pub const TAGALOG: &'static [(char, char)] = &[('ᜀ', 'ᜌ'), ('ᜎ', '\u{1714}')];
pub const TAGBANWA: &'static [(char, char)] =
&[('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('\u{1772}', '\u{1773}')];
@ -1172,11 +1133,9 @@ pub const TAI_THAM: &'static [(char, char)] = &[
('᪠', '᪭'),
];
pub const TAI_VIET: &'static [(char, char)] =
&[('ꪀ', 'ꫂ'), ('ꫛ', '꫟')];
pub const TAI_VIET: &'static [(char, char)] = &[('ꪀ', 'ꫂ'), ('ꫛ', '꫟')];
pub const TAKRI: &'static [(char, char)] =
&[('𑚀', '\u{116b8}'), ('𑛀', '𑛉')];
pub const TAKRI: &'static [(char, char)] = &[('𑚀', '𑚸'), ('𑛀', '𑛉')];
pub const TAMIL: &'static [(char, char)] = &[
('\u{b82}', 'ஃ'),
@ -1195,12 +1154,16 @@ pub const TAMIL: &'static [(char, char)] = &[
('ௐ', 'ௐ'),
('\u{bd7}', '\u{bd7}'),
('', '௺'),
('\u{11fc0}', '\u{11ff1}'),
('\u{11fff}', '\u{11fff}'),
('𑿀', '𑿱'),
('𑿿', '𑿿'),
];
pub const TANGUT: &'static [(char, char)] =
&[('𖿠', '𖿠'), ('𗀀', '\u{187f7}'), ('𘠀', '𘫲')];
pub const TANGUT: &'static [(char, char)] = &[
('𖿠', '𖿠'),
('𗀀', '𘟷'),
('𘠀', '\u{18aff}'),
('\u{18d00}', '\u{18d08}'),
];
pub const TELUGU: &'static [(char, char)] = &[
('\u{c00}', 'ఌ'),
@ -1214,13 +1177,12 @@ pub const TELUGU: &'static [(char, char)] = &[
('ౘ', 'ౚ'),
('ౠ', '\u{c63}'),
('', '౯'),
('\u{c77}', '౿'),
('', '౿'),
];
pub const THAANA: &'static [(char, char)] = &[('ހ', 'ޱ')];
pub const THAI: &'static [(char, char)] =
&[('ก', '\u{e3a}'), ('เ', '๛')];
pub const THAI: &'static [(char, char)] = &[('ก', '\u{e3a}'), ('เ', '๛')];
pub const TIBETAN: &'static [(char, char)] = &[
('ༀ', 'ཇ'),
@ -1235,19 +1197,21 @@ pub const TIBETAN: &'static [(char, char)] = &[
pub const TIFINAGH: &'static [(char, char)] =
&[('ⴰ', 'ⵧ'), ('ⵯ', '⵰'), ('\u{2d7f}', '\u{2d7f}')];
pub const TIRHUTA: &'static [(char, char)] =
&[('𑒀', '𑓇'), ('𑓐', '𑓙')];
pub const TIRHUTA: &'static [(char, char)] = &[('𑒀', '𑓇'), ('𑓐', '𑓙')];
pub const UGARITIC: &'static [(char, char)] =
&[('𐎀', '𐎝'), ('𐎟', '𐎟')];
pub const UGARITIC: &'static [(char, char)] = &[('𐎀', '𐎝'), ('𐎟', '𐎟')];
pub const VAI: &'static [(char, char)] = &[('ꔀ', 'ꘫ')];
pub const WANCHO: &'static [(char, char)] =
&[('\u{1e2c0}', '\u{1e2f9}'), ('\u{1e2ff}', '\u{1e2ff}')];
pub const WANCHO: &'static [(char, char)] = &[('𞋀', '𞋹'), ('𞋿', '𞋿')];
pub const WARANG_CITI: &'static [(char, char)] =
&[('𑢠', '𑣲'), ('𑣿', '𑣿')];
pub const WARANG_CITI: &'static [(char, char)] = &[('𑢠', '𑣲'), ('𑣿', '𑣿')];
pub const YEZIDI: &'static [(char, char)] = &[
('\u{10e80}', '\u{10ea9}'),
('\u{10eab}', '\u{10ead}'),
('\u{10eb0}', '\u{10eb1}'),
];
pub const YI: &'static [(char, char)] = &[('ꀀ', 'ꒌ'), ('꒐', '꓆')];

Просмотреть файл

@ -1,8 +1,10 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate script-extension /tmp/ucd/12.1.0/ --chars
// ucd-generate script-extension ucd-13.0.0 --chars
//
// ucd-generate is available on crates.io.
// Unicode version: 13.0.0.
//
// ucd-generate 0.2.8 is available on crates.io.
pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
("Adlam", ADLAM),
@ -28,6 +30,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
("Chakma", CHAKMA),
("Cham", CHAM),
("Cherokee", CHEROKEE),
("Chorasmian", CHORASMIAN),
("Common", COMMON),
("Coptic", COPTIC),
("Cuneiform", CUNEIFORM),
@ -35,6 +38,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
("Cyrillic", CYRILLIC),
("Deseret", DESERET),
("Devanagari", DEVANAGARI),
("Dives_Akuru", DIVES_AKURU),
("Dogra", DOGRA),
("Duployan", DUPLOYAN),
("Egyptian_Hieroglyphs", EGYPTIAN_HIEROGLYPHS),
@ -66,6 +70,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
("Katakana", KATAKANA),
("Kayah_Li", KAYAH_LI),
("Kharoshthi", KHAROSHTHI),
("Khitan_Small_Script", KHITAN_SMALL_SCRIPT),
("Khmer", KHMER),
("Khojki", KHOJKI),
("Khudawadi", KHUDAWADI),
@ -155,12 +160,13 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
("Vai", VAI),
("Wancho", WANCHO),
("Warang_Citi", WARANG_CITI),
("Yezidi", YEZIDI),
("Yi", YI),
("Zanabazar_Square", ZANABAZAR_SQUARE),
];
pub const ADLAM: &'static [(char, char)] =
&[('ـ', 'ـ'), ('𞤀', '\u{1e94b}'), ('𞥐', '𞥙'), ('𞥞', '𞥟')];
&[('ـ', 'ـ'), ('𞤀', '𞥋'), ('𞥐', '𞥙'), ('𞥞', '𞥟')];
pub const AHOM: &'static [(char, char)] =
&[('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑜿')];
@ -174,7 +180,7 @@ pub const ARABIC: &'static [(char, char)] = &[
('۞', 'ۿ'),
('ݐ', 'ݿ'),
('ࢠ', 'ࢴ'),
('ࢶ', ''),
('ࢶ', '\u{8c7}'),
('\u{8d3}', '\u{8e1}'),
('\u{8e3}', '\u{8ff}'),
('ﭐ', '﯁'),
@ -225,11 +231,9 @@ pub const ARABIC: &'static [(char, char)] = &[
pub const ARMENIAN: &'static [(char, char)] =
&[('Ա', 'Ֆ'), ('ՙ', '֊'), ('֍', '֏'), ('ﬓ', 'ﬗ')];
pub const AVESTAN: &'static [(char, char)] =
&[('𐬀', '𐬵'), ('𐬹', '𐬿')];
pub const AVESTAN: &'static [(char, char)] = &[('𐬀', '𐬵'), ('𐬹', '𐬿')];
pub const BALINESE: &'static [(char, char)] =
&[('\u{1b00}', 'ᭋ'), ('᭐', '᭼')];
pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'ᭋ'), ('᭐', '᭼')];
pub const BAMUM: &'static [(char, char)] = &[('ꚠ', '꛷'), ('𖠀', '𖨸')];
@ -267,12 +271,8 @@ pub const BENGALI: &'static [(char, char)] = &[
('\u{a8f1}', '\u{a8f1}'),
];
pub const BHAIKSUKI: &'static [(char, char)] = &[
('𑰀', '𑰈'),
('𑰊', '\u{11c36}'),
('\u{11c38}', '𑱅'),
('𑱐', '𑱬'),
];
pub const BHAIKSUKI: &'static [(char, char)] =
&[('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱅'), ('𑱐', '𑱬')];
pub const BOPOMOFO: &'static [(char, char)] = &[
('˪', '˫'),
@ -284,7 +284,7 @@ pub const BOPOMOFO: &'static [(char, char)] = &[
('〷', '〷'),
('・', '・'),
('ㄅ', 'ㄯ'),
('ㆠ', ''),
('ㆠ', '\u{31bf}'),
('﹅', '﹆'),
('。', '・'),
];
@ -297,8 +297,7 @@ pub const BRAILLE: &'static [(char, char)] = &[('', '⣿')];
pub const BUGINESE: &'static [(char, char)] =
&[('ᨀ', '\u{1a1b}'), ('᨞', '᨟'), ('ꧏ', 'ꧏ')];
pub const BUHID: &'static [(char, char)] =
&[('', '᜶'), ('ᝀ', '\u{1753}')];
pub const BUHID: &'static [(char, char)] = &[('', '᜶'), ('ᝀ', '\u{1753}')];
pub const CANADIAN_ABORIGINAL: &'static [(char, char)] =
&[('', 'ᙿ'), ('ᢰ', 'ᣵ')];
@ -308,12 +307,8 @@ pub const CARIAN: &'static [(char, char)] = &[('𐊠', '𐋐')];
pub const CAUCASIAN_ALBANIAN: &'static [(char, char)] =
&[('𐔰', '𐕣'), ('𐕯', '𐕯')];
pub const CHAKMA: &'static [(char, char)] = &[
('', '৯'),
('', '၉'),
('\u{11100}', '\u{11134}'),
('𑄶', '𑅆'),
];
pub const CHAKMA: &'static [(char, char)] =
&[('', '৯'), ('', '၉'), ('\u{11100}', '\u{11134}'), ('𑄶', '\u{11147}')];
pub const CHAM: &'static [(char, char)] =
&[('ꨀ', '\u{aa36}'), ('ꩀ', 'ꩍ'), ('꩐', '꩙'), ('꩜', '꩟')];
@ -321,6 +316,8 @@ pub const CHAM: &'static [(char, char)] =
pub const CHEROKEE: &'static [(char, char)] =
&[('', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ꭰ', 'ꮿ')];
pub const CHORASMIAN: &'static [(char, char)] = &[('\u{10fb0}', '\u{10fcb}')];
pub const COMMON: &'static [(char, char)] = &[
('\u{0}', '@'),
('[', '`'),
@ -360,9 +357,9 @@ pub const COMMON: &'static [(char, char)] = &[
('①', '⟿'),
('⤀', '⭳'),
('⭶', '⮕'),
('⮘', '\u{2bff}'),
('\u{2b97}', '⯿'),
('⸀', '⹂'),
('⹄', '\u{2e4f}'),
('⹄', '\u{2e52}'),
('⿰', '⿻'),
('\u{3000}', '\u{3000}'),
('〄', '〄'),
@ -377,9 +374,10 @@ pub const COMMON: &'static [(char, char)] = &[
('㎀', '㏟'),
('㏿', '㏿'),
('䷀', '䷿'),
('', '꜡'),
('', '꜡'),
('ꞈ', '꞊'),
('꭛', '꭛'),
('\u{ab6a}', '\u{ab6b}'),
('', '﴿'),
('︐', '︙'),
('', '﹄'),
@ -393,9 +391,9 @@ pub const COMMON: &'static [(char, char)] = &[
('¢', '₩'),
('', '○'),
('\u{fff9}', '<27>'),
('𐆐', '𐆛'),
('𐆐', '\u{1019c}'),
('𐇐', '𐇼'),
('\u{16fe2}', '\u{16fe3}'),
('𖿢', '𖿣'),
('𝀀', '𝃵'),
('𝄀', '𝄦'),
('𝄩', '𝅦'),
@ -428,44 +426,45 @@ pub const COMMON: &'static [(char, char)] = &[
('𝚨', '𝟋'),
('𝟎', '𝟿'),
('𞱱', '𞲴'),
('\u{1ed01}', '\u{1ed3d}'),
('𞴁', '𞴽'),
('🀀', '🀫'),
('🀰', '🂓'),
('🂠', '🂮'),
('🂱', '🂿'),
('🃁', '🃏'),
('🃑', '🃵'),
('🄀', '🄌'),
('🄐', '\u{1f16c}'),
('🅰', '🆬'),
('🄀', '\u{1f1ad}'),
('🇦', '🇿'),
('🈁', '🈂'),
('🈐', '🈻'),
('🉀', '🉈'),
('🉠', '🉥'),
('🌀', '\u{1f6d5}'),
('🌀', '\u{1f6d7}'),
('🛠', '🛬'),
('🛰', '\u{1f6fa}'),
('🛰', '\u{1f6fc}'),
('🜀', '🝳'),
('🞀', '🟘'),
('\u{1f7e0}', '\u{1f7eb}'),
('🟠', '🟫'),
('🠀', '🠋'),
('🠐', '🡇'),
('🡐', '🡙'),
('🡠', '🢇'),
('🢐', '🢭'),
('🤀', '🤋'),
('\u{1f90d}', '\u{1f971}'),
('🥳', '🥶'),
('🥺', '🦢'),
('\u{1f9a5}', '\u{1f9aa}'),
('\u{1f9ae}', '\u{1f9ca}'),
('\u{1f9cd}', '\u{1fa53}'),
('\u{1f8b0}', '\u{1f8b1}'),
('🤀', '\u{1f978}'),
('🥺', '\u{1f9cb}'),
('🧍', '🩓'),
('🩠', '🩭'),
('\u{1fa70}', '\u{1fa73}'),
('\u{1fa78}', '\u{1fa7a}'),
('\u{1fa80}', '\u{1fa82}'),
('\u{1fa90}', '\u{1fa95}'),
('🩰', '\u{1fa74}'),
('🩸', '🩺'),
('🪀', '\u{1fa86}'),
('🪐', '\u{1faa8}'),
('\u{1fab0}', '\u{1fab6}'),
('\u{1fac0}', '\u{1fac2}'),
('\u{1fad0}', '\u{1fad6}'),
('\u{1fb00}', '\u{1fb92}'),
('\u{1fb94}', '\u{1fbca}'),
('\u{1fbf0}', '\u{1fbf9}'),
('\u{e0001}', '\u{e0001}'),
('\u{e0020}', '\u{e007f}'),
];
@ -493,6 +492,7 @@ pub const CYRILLIC: &'static [(char, char)] = &[
('ᲀ', 'ᲈ'),
('ᴫ', 'ᴫ'),
('ᵸ', 'ᵸ'),
('\u{1df8}', '\u{1df8}'),
('\u{2de0}', '\u{2dff}'),
('⹃', '⹃'),
('Ꙁ', '\u{a69f}'),
@ -511,23 +511,29 @@ pub const DEVANAGARI: &'static [(char, char)] = &[
('\u{a8e0}', '\u{a8ff}'),
];
pub const DIVES_AKURU: &'static [(char, char)] = &[
('\u{11900}', '\u{11906}'),
('\u{11909}', '\u{11909}'),
('\u{1190c}', '\u{11913}'),
('\u{11915}', '\u{11916}'),
('\u{11918}', '\u{11935}'),
('\u{11937}', '\u{11938}'),
('\u{1193b}', '\u{11946}'),
('\u{11950}', '\u{11959}'),
];
pub const DOGRA: &'static [(char, char)] =
&[('।', '९'), ('꠰', '꠹'), ('𑠀', '𑠻')];
pub const DUPLOYAN: &'static [(char, char)] = &[
('𛰀', '𛱪'),
('𛱰', '𛱼'),
('𛲀', '𛲈'),
('𛲐', '𛲙'),
('𛲜', '\u{1bca3}'),
];
pub const DUPLOYAN: &'static [(char, char)] =
&[('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('𛲜', '\u{1bca3}')];
pub const EGYPTIAN_HIEROGLYPHS: &'static [(char, char)] =
&[('𓀀', '𓐮'), ('\u{13430}', '\u{13438}')];
pub const ELBASAN: &'static [(char, char)] = &[('𐔀', '𐔧')];
pub const ELYMAIC: &'static [(char, char)] = &[('\u{10fe0}', '\u{10ff6}')];
pub const ELYMAIC: &'static [(char, char)] = &[('𐿠', '𐿶')];
pub const ETHIOPIC: &'static [(char, char)] = &[
('', 'ቈ'),
@ -565,7 +571,6 @@ pub const ETHIOPIC: &'static [(char, char)] = &[
];
pub const GEORGIAN: &'static [(char, char)] = &[
('։', '։'),
('Ⴀ', 'Ⴥ'),
('Ⴧ', 'Ⴧ'),
('Ⴭ', 'Ⴭ'),
@ -617,8 +622,8 @@ pub const GRANTHA: &'static [(char, char)] = &[
('𑍝', '𑍣'),
('\u{11366}', '\u{1136c}'),
('\u{11370}', '\u{11374}'),
('\u{11fd0}', '\u{11fd1}'),
('\u{11fd3}', '\u{11fd3}'),
('𑿐', '𑿑'),
('𑿓', '𑿓'),
];
pub const GREEK: &'static [(char, char)] = &[
@ -730,24 +735,27 @@ pub const HAN: &'static [(char, char)] = &[
('㈠', '㉇'),
('㊀', '㊰'),
('㋀', '㋋'),
('\u{32ff}', '\u{32ff}'),
('㋿', '㋿'),
('㍘', '㍰'),
('㍻', '㍿'),
('㏠', '㏾'),
('㐀', '䶵'),
('一', '鿯'),
('㐀', '\u{4dbf}'),
('一', '\u{9ffc}'),
('꜀', '꜇'),
('豈', '舘'),
('並', '龎'),
('﹅', '﹆'),
('。', '・'),
('\u{16ff0}', '\u{16ff1}'),
('𝍠', '𝍱'),
('🉐', '🉑'),
('𠀀', '𪛖'),
('𠀀', '\u{2a6dd}'),
('𪜀', '𫜴'),
('𫝀', '𫠝'),
('𫠠', '𬺡'),
('𬺰', '𮯠'),
('丽', '𪘀'),
('\u{30000}', '\u{3134a}'),
];
pub const HANGUL: &'static [(char, char)] = &[
@ -816,7 +824,7 @@ pub const HIRAGANA: &'static [(char, char)] = &[
('ー', 'ー'),
('\u{ff9e}', '\u{ff9f}'),
('𛀁', '𛄞'),
('\u{1b150}', '\u{1b152}'),
('𛅐', '𛅒'),
('🈀', '🈀'),
];
@ -828,8 +836,9 @@ pub const INHERITED: &'static [(char, char)] = &[
('\u{343}', '\u{344}'),
('\u{346}', '\u{362}'),
('\u{953}', '\u{954}'),
('\u{1ab0}', '\u{1abe}'),
('\u{1dc2}', '\u{1df9}'),
('\u{1ab0}', '\u{1ac0}'),
('\u{1dc2}', '\u{1df7}'),
('\u{1df9}', '\u{1df9}'),
('\u{1dfb}', '\u{1dff}'),
('\u{200c}', '\u{200d}'),
('\u{20d0}', '\u{20ef}'),
@ -852,12 +861,8 @@ pub const INSCRIPTIONAL_PARTHIAN: &'static [(char, char)] =
pub const JAVANESE: &'static [(char, char)] =
&[('\u{a980}', '꧍'), ('ꧏ', '꧙'), ('꧞', '꧟')];
pub const KAITHI: &'static [(char, char)] = &[
('', '९'),
('꠰', '꠹'),
('\u{11080}', '𑃁'),
('\u{110cd}', '\u{110cd}'),
];
pub const KAITHI: &'static [(char, char)] =
&[('', '९'), ('꠰', '꠹'), ('\u{11080}', '𑃁'), ('\u{110cd}', '\u{110cd}')];
pub const KANNADA: &'static [(char, char)] = &[
('\u{951}', '\u{952}'),
@ -898,7 +903,7 @@ pub const KATAKANA: &'static [(char, char)] = &[
('﹅', '﹆'),
('。', '\u{ff9f}'),
('𛀀', '𛀀'),
('\u{1b164}', '\u{1b167}'),
('𛅤', '𛅧'),
];
pub const KAYAH_LI: &'static [(char, char)] = &[('꤀', '꤯')];
@ -914,6 +919,9 @@ pub const KHAROSHTHI: &'static [(char, char)] = &[
('𐩐', '𐩘'),
];
pub const KHITAN_SMALL_SCRIPT: &'static [(char, char)] =
&[('\u{16fe4}', '\u{16fe4}'), ('\u{18b00}', '\u{18cd5}')];
pub const KHMER: &'static [(char, char)] =
&[('ក', '\u{17dd}'), ('០', '៩'), ('៰', '៹'), ('᧠', '᧿')];
@ -926,8 +934,8 @@ pub const KHUDAWADI: &'static [(char, char)] =
pub const LAO: &'static [(char, char)] = &[
('ກ', 'ຂ'),
('ຄ', 'ຄ'),
('\u{e86}', 'ຊ'),
('\u{e8c}', 'ຣ'),
('', 'ຊ'),
('', 'ຣ'),
('ລ', 'ລ'),
('ວ', 'ຽ'),
('ເ', 'ໄ'),
@ -966,14 +974,15 @@ pub const LATIN: &'static [(char, char)] = &[
('ⅎ', 'ⅎ'),
('', 'ↈ'),
('Ⱡ', 'Ɀ'),
('꜀', '꜇'),
('Ꜣ', 'ꞇ'),
('Ꞌ', '\u{a7bf}'),
('\u{a7c2}', '\u{a7c6}'),
('', 'ꟿ'),
('Ꞌ', ''),
('Ꟃ', '\u{a7ca}'),
('\u{a7f5}', 'ꟿ'),
('꤮', '꤮'),
('ꬰ', ''),
('ꭜ', 'ꭤ'),
('\u{ab66}', '\u{ab67}'),
('ꭦ', '\u{ab69}'),
('ff', 'st'),
('', ''),
('', ''),
@ -1007,12 +1016,12 @@ pub const LINEAR_B: &'static [(char, char)] = &[
('𐄷', '𐄿'),
];
pub const LISU: &'static [(char, char)] = &[('', '')];
pub const LISU: &'static [(char, char)] =
&[('', ''), ('\u{11fb0}', '\u{11fb0}')];
pub const LYCIAN: &'static [(char, char)] = &[('𐊀', '𐊜')];
pub const LYDIAN: &'static [(char, char)] =
&[('𐤠', '𐤹'), ('𐤿', '𐤿')];
pub const LYDIAN: &'static [(char, char)] = &[('𐤠', '𐤹'), ('𐤿', '𐤿')];
pub const MAHAJANI: &'static [(char, char)] =
&[('।', '९'), ('꠰', '꠹'), ('𑅐', '𑅶')];
@ -1022,8 +1031,7 @@ pub const MAKASAR: &'static [(char, char)] = &[('𑻠', '𑻸')];
pub const MALAYALAM: &'static [(char, char)] = &[
('\u{951}', '\u{952}'),
('।', '॥'),
('\u{d00}', 'ഃ'),
('അ', 'ഌ'),
('\u{d00}', 'ഌ'),
('എ', 'ഐ'),
('ഒ', '\u{d44}'),
('െ', 'ൈ'),
@ -1067,11 +1075,8 @@ pub const MEROITIC_CURSIVE: &'static [(char, char)] =
pub const MEROITIC_HIEROGLYPHS: &'static [(char, char)] = &[('𐦀', '𐦟')];
pub const MIAO: &'static [(char, char)] = &[
('𖼀', '\u{16f4a}'),
('\u{16f4f}', '\u{16f87}'),
('\u{16f8f}', '𖾟'),
];
pub const MIAO: &'static [(char, char)] =
&[('𖼀', '𖽊'), ('\u{16f4f}', '𖾇'), ('\u{16f8f}', '𖾟')];
pub const MODI: &'static [(char, char)] =
&[('꠰', '꠹'), ('𑘀', '𑙄'), ('𑙐', '𑙙')];
@ -1085,53 +1090,39 @@ pub const MONGOLIAN: &'static [(char, char)] = &[
('𑙠', '𑙬'),
];
pub const MRO: &'static [(char, char)] =
&[('𖩀', '𖩞'), ('𖩠', '𖩩'), ('𖩮', '𖩯')];
pub const MRO: &'static [(char, char)] = &[('𖩀', '𖩞'), ('𖩠', '𖩩'), ('𖩮', '𖩯')];
pub const MULTANI: &'static [(char, char)] = &[
('', '੯'),
('𑊀', '𑊆'),
('𑊈', '𑊈'),
('𑊊', '𑊍'),
('𑊏', '𑊝'),
('𑊟', '𑊩'),
];
pub const MULTANI: &'static [(char, char)] =
&[('', '੯'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'), ('𑊟', '𑊩')];
pub const MYANMAR: &'static [(char, char)] =
&[('က', '႟'), ('꤮', '꤮'), ('ꧠ', 'ꧾ'), ('ꩠ', 'ꩿ')];
pub const NABATAEAN: &'static [(char, char)] =
&[('𐢀', '𐢞'), ('𐢧', '𐢯')];
pub const NABATAEAN: &'static [(char, char)] = &[('𐢀', '𐢞'), ('𐢧', '𐢯')];
pub const NANDINAGARI: &'static [(char, char)] = &[
('।', '॥'),
('', '೯'),
('ᳩ', 'ᳩ'),
('ᳲ', 'ᳲ'),
('\u{1cfa}', '\u{1cfa}'),
('ᳺ', 'ᳺ'),
('꠰', '꠵'),
('\u{119a0}', '\u{119a7}'),
('\u{119aa}', '\u{119d7}'),
('\u{119da}', '\u{119e4}'),
('𑦠', '𑦧'),
('𑦪', '\u{119d7}'),
('\u{119da}', '𑧤'),
];
pub const NEW_TAI_LUE: &'static [(char, char)] =
&[('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'), ('᧐', '᧚'), ('᧞', '᧟')];
pub const NEWA: &'static [(char, char)] =
&[('𑐀', '𑑙'), ('𑑛', '𑑛'), ('𑑝', '\u{1145f}')];
pub const NEWA: &'static [(char, char)] = &[('𑐀', '𑑛'), ('𑑝', '\u{11461}')];
pub const NKO: &'static [(char, char)] = &[('߀', 'ߺ'), ('\u{7fd}', '߿')];
pub const NUSHU: &'static [(char, char)] =
&[('𖿡', '𖿡'), ('𛅰', '𛋻')];
pub const NUSHU: &'static [(char, char)] = &[('𖿡', '𖿡'), ('𛅰', '𛋻')];
pub const NYIAKENG_PUACHUE_HMONG: &'static [(char, char)] = &[
('\u{1e100}', '\u{1e12c}'),
('\u{1e130}', '\u{1e13d}'),
('\u{1e140}', '\u{1e149}'),
('\u{1e14e}', '\u{1e14f}'),
];
pub const NYIAKENG_PUACHUE_HMONG: &'static [(char, char)] =
&[('𞄀', '𞄬'), ('\u{1e130}', '𞄽'), ('𞅀', '𞅉'), ('𞅎', '𞅏')];
pub const OGHAM: &'static [(char, char)] = &[('\u{1680}', '᚜')];
@ -1140,16 +1131,14 @@ pub const OL_CHIKI: &'static [(char, char)] = &[('᱐', '᱿')];
pub const OLD_HUNGARIAN: &'static [(char, char)] =
&[('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐳺', '𐳿')];
pub const OLD_ITALIC: &'static [(char, char)] =
&[('𐌀', '𐌣'), ('𐌭', '𐌯')];
pub const OLD_ITALIC: &'static [(char, char)] = &[('𐌀', '𐌣'), ('𐌭', '𐌯')];
pub const OLD_NORTH_ARABIAN: &'static [(char, char)] = &[('𐪀', '𐪟')];
pub const OLD_PERMIC: &'static [(char, char)] =
&[('\u{483}', '\u{483}'), ('𐍐', '\u{1037a}')];
pub const OLD_PERSIAN: &'static [(char, char)] =
&[('𐎠', '𐏃'), ('𐏈', '𐏕')];
pub const OLD_PERSIAN: &'static [(char, char)] = &[('𐎠', '𐏃'), ('𐏈', '𐏕')];
pub const OLD_SOGDIAN: &'static [(char, char)] = &[('𐼀', '𐼧')];
@ -1170,7 +1159,7 @@ pub const ORIYA: &'static [(char, char)] = &[
('\u{b3c}', '\u{b44}'),
('େ', 'ୈ'),
('ୋ', '\u{b4d}'),
('\u{b56}', '\u{b57}'),
('\u{b55}', '\u{b57}'),
('ଡ଼', 'ଢ଼'),
('ୟ', '\u{b63}'),
('', '୷'),
@ -1178,19 +1167,12 @@ pub const ORIYA: &'static [(char, char)] = &[
('ᳲ', 'ᳲ'),
];
pub const OSAGE: &'static [(char, char)] =
&[('𐒰', '𐓓'), ('𐓘', '𐓻')];
pub const OSAGE: &'static [(char, char)] = &[('𐒰', '𐓓'), ('𐓘', '𐓻')];
pub const OSMANYA: &'static [(char, char)] =
&[('𐒀', '𐒝'), ('𐒠', '𐒩')];
pub const OSMANYA: &'static [(char, char)] = &[('𐒀', '𐒝'), ('𐒠', '𐒩')];
pub const PAHAWH_HMONG: &'static [(char, char)] = &[
('𖬀', '𖭅'),
('𖭐', '𖭙'),
('𖭛', '𖭡'),
('𖭣', '𖭷'),
('𖭽', '𖮏'),
];
pub const PAHAWH_HMONG: &'static [(char, char)] =
&[('𖬀', '𖭅'), ('𖭐', '𖭙'), ('𖭛', '𖭡'), ('𖭣', '𖭷'), ('𖭽', '𖮏')];
pub const PALMYRENE: &'static [(char, char)] = &[('𐡠', '𐡿')];
@ -1199,8 +1181,7 @@ pub const PAU_CIN_HAU: &'static [(char, char)] = &[('𑫀', '𑫸')];
pub const PHAGS_PA: &'static [(char, char)] =
&[('᠂', ''), ('᠅', '᠅'), ('ꡀ', '꡷')];
pub const PHOENICIAN: &'static [(char, char)] =
&[('𐤀', '𐤛'), ('𐤟', '𐤟')];
pub const PHOENICIAN: &'static [(char, char)] = &[('𐤀', '𐤛'), ('𐤟', '𐤟')];
pub const PSALTER_PAHLAVI: &'static [(char, char)] =
&[('ـ', 'ـ'), ('𐮀', '𐮑'), ('𐮙', '𐮜'), ('𐮩', '𐮯')];
@ -1209,8 +1190,7 @@ pub const REJANG: &'static [(char, char)] = &[('ꤰ', '꥓'), ('꥟', '꥟')];
pub const RUNIC: &'static [(char, char)] = &[('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ')];
pub const SAMARITAN: &'static [(char, char)] =
&[('ࠀ', '\u{82d}'), ('࠰', '࠾')];
pub const SAMARITAN: &'static [(char, char)] = &[('ࠀ', '\u{82d}'), ('࠰', '࠾')];
pub const SAURASHTRA: &'static [(char, char)] =
&[('ꢀ', '\u{a8c5}'), ('꣎', '꣙')];
@ -1221,8 +1201,7 @@ pub const SHARADA: &'static [(char, char)] = &[
('\u{1cd9}', '\u{1cd9}'),
('\u{1cdc}', '\u{1cdd}'),
('\u{1ce0}', '\u{1ce0}'),
('\u{11180}', '𑇍'),
('𑇐', '𑇟'),
('\u{11180}', '𑇟'),
];
pub const SHAVIAN: &'static [(char, char)] = &[('𐑐', '𐑿')];
@ -1230,15 +1209,12 @@ pub const SHAVIAN: &'static [(char, char)] = &[('𐑐', '𐑿')];
pub const SIDDHAM: &'static [(char, char)] =
&[('𑖀', '\u{115b5}'), ('𑖸', '\u{115dd}')];
pub const SIGNWRITING: &'static [(char, char)] = &[
('𝠀', '𝪋'),
('\u{1da9b}', '\u{1da9f}'),
('\u{1daa1}', '\u{1daaf}'),
];
pub const SIGNWRITING: &'static [(char, char)] =
&[('𝠀', '𝪋'), ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}')];
pub const SINHALA: &'static [(char, char)] = &[
('।', '॥'),
('', 'ඃ'),
('\u{d81}', 'ඃ'),
('අ', 'ඖ'),
('ක', 'න'),
('ඳ', 'ර'),
@ -1255,8 +1231,7 @@ pub const SINHALA: &'static [(char, char)] = &[
pub const SOGDIAN: &'static [(char, char)] = &[('ـ', 'ـ'), ('𐼰', '𐽙')];
pub const SORA_SOMPENG: &'static [(char, char)] =
&[('𑃐', '𑃨'), ('𑃰', '𑃹')];
pub const SORA_SOMPENG: &'static [(char, char)] = &[('𑃐', '𑃨'), ('𑃰', '𑃹')];
pub const SOYOMBO: &'static [(char, char)] = &[('𑩐', '𑪢')];
@ -1264,7 +1239,7 @@ pub const SUNDANESE: &'static [(char, char)] =
&[('\u{1b80}', 'ᮿ'), ('᳀', '᳇')];
pub const SYLOTI_NAGRI: &'static [(char, char)] =
&[('।', '॥'), ('', '৯'), ('ꠀ', '')];
&[('।', '॥'), ('', '৯'), ('ꠀ', '\u{a82c}')];
pub const SYRIAC: &'static [(char, char)] = &[
('،', '،'),
@ -1277,17 +1252,14 @@ pub const SYRIAC: &'static [(char, char)] = &[
('\u{70f}', '\u{74a}'),
('ݍ', 'ݏ'),
('ࡠ', 'ࡪ'),
('\u{1df8}', '\u{1df8}'),
];
pub const TAGALOG: &'static [(char, char)] =
&[('ᜀ', 'ᜌ'), ('ᜎ', '\u{1714}'), ('', '᜶')];
pub const TAGBANWA: &'static [(char, char)] = &[
('', '᜶'),
('ᝠ', 'ᝬ'),
('ᝮ', 'ᝰ'),
('\u{1772}', '\u{1773}'),
];
pub const TAGBANWA: &'static [(char, char)] =
&[('', '᜶'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('\u{1772}', '\u{1773}')];
pub const TAI_LE: &'static [(char, char)] =
&[('', '၉'), ('ᥐ', 'ᥭ'), ('ᥰ', 'ᥴ')];
@ -1300,11 +1272,10 @@ pub const TAI_THAM: &'static [(char, char)] = &[
('᪠', '᪭'),
];
pub const TAI_VIET: &'static [(char, char)] =
&[('ꪀ', 'ꫂ'), ('ꫛ', '꫟')];
pub const TAI_VIET: &'static [(char, char)] = &[('ꪀ', 'ꫂ'), ('ꫛ', '꫟')];
pub const TAKRI: &'static [(char, char)] =
&[('।', '॥'), ('꠰', '꠹'), ('𑚀', '\u{116b8}'), ('𑛀', '𑛉')];
&[('।', '॥'), ('꠰', '꠹'), ('𑚀', '𑚸'), ('𑛀', '𑛉')];
pub const TAMIL: &'static [(char, char)] = &[
('\u{951}', '\u{952}'),
@ -1330,12 +1301,16 @@ pub const TAMIL: &'static [(char, char)] = &[
('\u{11301}', '\u{11301}'),
('𑌃', '𑌃'),
('\u{1133b}', '\u{1133c}'),
('\u{11fc0}', '\u{11ff1}'),
('\u{11fff}', '\u{11fff}'),
('𑿀', '𑿱'),
('𑿿', '𑿿'),
];
pub const TANGUT: &'static [(char, char)] =
&[('𖿠', '𖿠'), ('𗀀', '\u{187f7}'), ('𘠀', '𘫲')];
pub const TANGUT: &'static [(char, char)] = &[
('𖿠', '𖿠'),
('𗀀', '𘟷'),
('𘠀', '\u{18aff}'),
('\u{18d00}', '\u{18d08}'),
];
pub const TELUGU: &'static [(char, char)] = &[
('\u{951}', '\u{952}'),
@ -1351,7 +1326,7 @@ pub const TELUGU: &'static [(char, char)] = &[
('ౘ', 'ౚ'),
('ౠ', '\u{c63}'),
('', '౯'),
('\u{c77}', '౿'),
('', '౿'),
('\u{1cda}', '\u{1cda}'),
('ᳲ', 'ᳲ'),
];
@ -1366,8 +1341,7 @@ pub const THAANA: &'static [(char, char)] = &[
('﷽', '﷽'),
];
pub const THAI: &'static [(char, char)] =
&[('ก', '\u{e3a}'), ('เ', '๛')];
pub const THAI: &'static [(char, char)] = &[('ก', '\u{e3a}'), ('เ', '๛')];
pub const TIBETAN: &'static [(char, char)] = &[
('ༀ', 'ཇ'),
@ -1391,16 +1365,23 @@ pub const TIRHUTA: &'static [(char, char)] = &[
('𑓐', '𑓙'),
];
pub const UGARITIC: &'static [(char, char)] =
&[('𐎀', '𐎝'), ('𐎟', '𐎟')];
pub const UGARITIC: &'static [(char, char)] = &[('𐎀', '𐎝'), ('𐎟', '𐎟')];
pub const VAI: &'static [(char, char)] = &[('ꔀ', 'ꘫ')];
pub const WANCHO: &'static [(char, char)] =
&[('\u{1e2c0}', '\u{1e2f9}'), ('\u{1e2ff}', '\u{1e2ff}')];
pub const WANCHO: &'static [(char, char)] = &[('𞋀', '𞋹'), ('𞋿', '𞋿')];
pub const WARANG_CITI: &'static [(char, char)] =
&[('𑢠', '𑣲'), ('𑣿', '𑣿')];
pub const WARANG_CITI: &'static [(char, char)] = &[('𑢠', '𑣲'), ('𑣿', '𑣿')];
pub const YEZIDI: &'static [(char, char)] = &[
('،', '،'),
('؛', '؛'),
('؟', '؟'),
('٠', '٩'),
('\u{10e80}', '\u{10ea9}'),
('\u{10eab}', '\u{10ead}'),
('\u{10eb0}', '\u{10eb1}'),
];
pub const YI: &'static [(char, char)] = &[
('、', '。'),

Просмотреть файл

@ -1,8 +1,10 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate sentence-break /tmp/ucd/12.1.0/ --chars
// ucd-generate sentence-break ucd-13.0.0 --chars
//
// ucd-generate is available on crates.io.
// Unicode version: 13.0.0.
//
// ucd-generate 0.2.8 is available on crates.io.
pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
("ATerm", ATERM),
@ -132,7 +134,7 @@ pub const EXTEND: &'static [(char, char)] = &[
('\u{b3e}', '\u{b44}'),
('େ', 'ୈ'),
('ୋ', '\u{b4d}'),
('\u{b56}', '\u{b57}'),
('\u{b55}', '\u{b57}'),
('\u{b62}', '\u{b63}'),
('\u{b82}', '\u{b82}'),
('\u{bbe}', 'ூ'),
@ -159,7 +161,7 @@ pub const EXTEND: &'static [(char, char)] = &[
('ൊ', '\u{d4d}'),
('\u{d57}', '\u{d57}'),
('\u{d62}', '\u{d63}'),
('', 'ඃ'),
('\u{d81}', 'ඃ'),
('\u{dca}', '\u{dca}'),
('\u{dcf}', '\u{dd4}'),
('\u{dd6}', '\u{dd6}'),
@ -206,7 +208,7 @@ pub const EXTEND: &'static [(char, char)] = &[
('ᩕ', '\u{1a5e}'),
('\u{1a60}', '\u{1a7c}'),
('\u{1a7f}', '\u{1a7f}'),
('\u{1ab0}', '\u{1abe}'),
('\u{1ab0}', '\u{1ac0}'),
('\u{1b00}', 'ᬄ'),
('\u{1b34}', '᭄'),
('\u{1b6b}', '\u{1b73}'),
@ -236,6 +238,7 @@ pub const EXTEND: &'static [(char, char)] = &[
('\u{a806}', '\u{a806}'),
('\u{a80b}', '\u{a80b}'),
('ꠣ', 'ꠧ'),
('\u{a82c}', '\u{a82c}'),
('ꢀ', 'ꢁ'),
('ꢴ', '\u{a8c5}'),
('\u{a8e0}', '\u{a8f1}'),
@ -272,6 +275,7 @@ pub const EXTEND: &'static [(char, char)] = &[
('\u{10a3f}', '\u{10a3f}'),
('\u{10ae5}', '\u{10ae6}'),
('\u{10d24}', '\u{10d27}'),
('\u{10eab}', '\u{10eac}'),
('\u{10f46}', '\u{10f50}'),
('𑀀', '𑀂'),
('\u{11038}', '\u{11046}'),
@ -284,6 +288,7 @@ pub const EXTEND: &'static [(char, char)] = &[
('\u{11180}', '𑆂'),
('𑆳', '𑇀'),
('\u{111c9}', '\u{111cc}'),
('\u{111ce}', '\u{111cf}'),
('𑈬', '\u{11237}'),
('\u{1123e}', '\u{1123e}'),
('\u{112df}', '\u{112ea}'),
@ -306,9 +311,14 @@ pub const EXTEND: &'static [(char, char)] = &[
('\u{116ab}', '\u{116b7}'),
('\u{1171d}', '\u{1172b}'),
('𑠬', '\u{1183a}'),
('\u{119d1}', '\u{119d7}'),
('\u{11930}', '\u{11935}'),
('\u{11937}', '\u{11938}'),
('\u{1193b}', '\u{1193e}'),
('\u{11940}', '\u{11940}'),
('\u{11942}', '\u{11943}'),
('𑧑', '\u{119d7}'),
('\u{119da}', '\u{119e0}'),
('\u{119e4}', '\u{119e4}'),
('𑧤', '𑧤'),
('\u{11a01}', '\u{11a0a}'),
('\u{11a33}', '𑨹'),
('\u{11a3b}', '\u{11a3e}'),
@ -331,8 +341,10 @@ pub const EXTEND: &'static [(char, char)] = &[
('\u{16af0}', '\u{16af4}'),
('\u{16b30}', '\u{16b36}'),
('\u{16f4f}', '\u{16f4f}'),
('𖽑', '\u{16f87}'),
('𖽑', '𖾇'),
('\u{16f8f}', '\u{16f92}'),
('\u{16fe4}', '\u{16fe4}'),
('\u{16ff0}', '\u{16ff1}'),
('\u{1bc9d}', '\u{1bc9e}'),
('\u{1d165}', '\u{1d169}'),
('𝅭', '\u{1d172}'),
@ -986,13 +998,16 @@ pub const LOWER: &'static [(char, char)] = &[
('ꞵ', 'ꞵ'),
('ꞷ', 'ꞷ'),
('ꞹ', 'ꞹ'),
('\u{a7bb}', '\u{a7bb}'),
('\u{a7bd}', '\u{a7bd}'),
('\u{a7bf}', '\u{a7bf}'),
('\u{a7c3}', '\u{a7c3}'),
('ꞻ', 'ꞻ'),
('ꞽ', 'ꞽ'),
('ꞿ', 'ꞿ'),
('ꟃ', 'ꟃ'),
('\u{a7c8}', '\u{a7c8}'),
('\u{a7ca}', '\u{a7ca}'),
('\u{a7f6}', '\u{a7f6}'),
('ꟸ', 'ꟺ'),
('ꬰ', ''),
('ꭜ', '\u{ab67}'),
('ꭜ', '\u{ab68}'),
('ꭰ', 'ꮿ'),
('ff', 'st'),
('ﬓ', 'ﬗ'),
@ -1085,15 +1100,17 @@ pub const NUMERIC: &'static [(char, char)] = &[
('𑛀', '𑛉'),
('𑜰', '𑜹'),
('𑣠', '𑣩'),
('\u{11950}', '\u{11959}'),
('𑱐', '𑱙'),
('𑵐', '𑵙'),
('𑶠', '𑶩'),
('𖩠', '𖩩'),
('𖭐', '𖭙'),
('𝟎', '𝟿'),
('\u{1e140}', '\u{1e149}'),
('\u{1e2f0}', '\u{1e2f9}'),
('𞅀', '𞅉'),
('𞋰', '𞋹'),
('𞥐', '𞥙'),
('\u{1fbf0}', '\u{1fbf9}'),
];
pub const OLETTER: &'static [(char, char)] = &[
@ -1130,7 +1147,7 @@ pub const OLETTER: &'static [(char, char)] = &[
('ࡀ', 'ࡘ'),
('ࡠ', 'ࡪ'),
('ࢠ', 'ࢴ'),
('ࢶ', ''),
('ࢶ', '\u{8c7}'),
('ऄ', 'ह'),
('ऽ', 'ऽ'),
('ॐ', 'ॐ'),
@ -1206,7 +1223,7 @@ pub const OLETTER: &'static [(char, char)] = &[
('ೞ', 'ೞ'),
('ೠ', 'ೡ'),
('ೱ', 'ೲ'),
('', 'ഌ'),
('\u{d04}', 'ഌ'),
('എ', 'ഐ'),
('ഒ', 'ഺ'),
('ഽ', 'ഽ'),
@ -1224,8 +1241,8 @@ pub const OLETTER: &'static [(char, char)] = &[
('เ', 'ๆ'),
('ກ', 'ຂ'),
('ຄ', 'ຄ'),
('\u{e86}', 'ຊ'),
('\u{e8c}', 'ຣ'),
('', 'ຊ'),
('', 'ຣ'),
('ລ', 'ລ'),
('ວ', 'ະ'),
('າ', 'ຳ'),
@ -1304,7 +1321,7 @@ pub const OLETTER: &'static [(char, char)] = &[
('ᳩ', 'ᳬ'),
('ᳮ', 'ᳳ'),
('ᳵ', 'ᳶ'),
('\u{1cfa}', '\u{1cfa}'),
('ᳺ', 'ᳺ'),
('ℵ', 'ℸ'),
('ↀ', 'ↂ'),
('ↅ', 'ↈ'),
@ -1330,10 +1347,10 @@ pub const OLETTER: &'static [(char, char)] = &[
('ー', 'ヿ'),
('ㄅ', 'ㄯ'),
('ㄱ', 'ㆎ'),
('ㆠ', ''),
('ㆠ', '\u{31bf}'),
('ㇰ', 'ㇿ'),
('㐀', ''),
('一', ''),
('㐀', '\u{4dbf}'),
('一', '\u{9ffc}'),
('ꀀ', 'ꒌ'),
('', ''),
('ꔀ', 'ꘌ'),
@ -1382,6 +1399,7 @@ pub const OLETTER: &'static [(char, char)] = &[
('ꬑ', 'ꬖ'),
('ꬠ', 'ꬦ'),
('ꬨ', 'ꬮ'),
('\u{ab69}', '\u{ab69}'),
('ꯀ', 'ꯢ'),
('가', '힣'),
('ힰ', 'ퟆ'),
@ -1459,15 +1477,19 @@ pub const OLETTER: &'static [(char, char)] = &[
('𐮀', '𐮑'),
('𐰀', '𐱈'),
('𐴀', '𐴣'),
('\u{10e80}', '\u{10ea9}'),
('\u{10eb0}', '\u{10eb1}'),
('𐼀', '𐼜'),
('𐼧', '𐼧'),
('𐼰', '𐽅'),
('\u{10fe0}', '\u{10ff6}'),
('\u{10fb0}', '\u{10fc4}'),
('𐿠', '𐿶'),
('𑀃', '𑀷'),
('𑂃', '𑂯'),
('𑃐', '𑃨'),
('𑄃', '𑄦'),
('𑅄', '𑅄'),
('\u{11147}', '\u{11147}'),
('𑅐', '𑅲'),
('𑅶', '𑅶'),
('𑆃', '𑆲'),
@ -1493,7 +1515,7 @@ pub const OLETTER: &'static [(char, char)] = &[
('𑍝', '𑍡'),
('𑐀', '𑐴'),
('𑑇', '𑑊'),
('\u{1145f}', '\u{1145f}'),
('𑑟', '\u{11461}'),
('𑒀', '𑒯'),
('𑓄', '𑓅'),
('𑓇', '𑓇'),
@ -1502,14 +1524,20 @@ pub const OLETTER: &'static [(char, char)] = &[
('𑘀', '𑘯'),
('𑙄', '𑙄'),
('𑚀', '𑚪'),
('\u{116b8}', '\u{116b8}'),
('𑚸', '𑚸'),
('𑜀', '𑜚'),
('𑠀', '𑠫'),
('𑣿', '𑣿'),
('\u{119a0}', '\u{119a7}'),
('\u{119aa}', '\u{119d0}'),
('\u{119e1}', '\u{119e1}'),
('\u{119e3}', '\u{119e3}'),
('𑣿', '\u{11906}'),
('\u{11909}', '\u{11909}'),
('\u{1190c}', '\u{11913}'),
('\u{11915}', '\u{11916}'),
('\u{11918}', '\u{1192f}'),
('\u{1193f}', '\u{1193f}'),
('\u{11941}', '\u{11941}'),
('𑦠', '𑦧'),
('𑦪', '𑧐'),
('𑧡', '𑧡'),
('𑧣', '𑧣'),
('𑨀', '𑨀'),
('𑨋', '𑨲'),
('𑨺', '𑨺'),
@ -1530,6 +1558,7 @@ pub const OLETTER: &'static [(char, char)] = &[
('𑵪', '𑶉'),
('𑶘', '𑶘'),
('𑻠', '𑻲'),
('\u{11fb0}', '\u{11fb0}'),
('𒀀', '𒎙'),
('𒐀', '𒑮'),
('𒒀', '𒕃'),
@ -1542,27 +1571,28 @@ pub const OLETTER: &'static [(char, char)] = &[
('𖭀', '𖭃'),
('𖭣', '𖭷'),
('𖭽', '𖮏'),
('𖼀', '\u{16f4a}'),
('𖼀', '𖽊'),
('𖽐', '𖽐'),
('𖾓', '𖾟'),
('𖿠', '𖿡'),
('\u{16fe3}', '\u{16fe3}'),
('𗀀', '\u{187f7}'),
('𘠀', '𘫲'),
('𖿣', '𖿣'),
('𗀀', '𘟷'),
('𘠀', '\u{18cd5}'),
('\u{18d00}', '\u{18d08}'),
('𛀀', '𛄞'),
('\u{1b150}', '\u{1b152}'),
('\u{1b164}', '\u{1b167}'),
('𛅐', '𛅒'),
('𛅤', '𛅧'),
('𛅰', '𛋻'),
('𛰀', '𛱪'),
('𛱰', '𛱼'),
('𛲀', '𛲈'),
('𛲐', '𛲙'),
('\u{1e100}', '\u{1e12c}'),
('\u{1e137}', '\u{1e13d}'),
('\u{1e14e}', '\u{1e14e}'),
('\u{1e2c0}', '\u{1e2eb}'),
('𞄀', '𞄬'),
('𞄷', '𞄽'),
('𞅎', '𞅎'),
('𞋀', '𞋫'),
('𞠀', '𞣄'),
('\u{1e94b}', '\u{1e94b}'),
('𞥋', '𞥋'),
('𞸀', '𞸃'),
('𞸅', '𞸟'),
('𞸡', '𞸢'),
@ -1596,12 +1626,13 @@ pub const OLETTER: &'static [(char, char)] = &[
('𞺡', '𞺣'),
('𞺥', '𞺩'),
('𞺫', '𞺻'),
('𠀀', '𪛖'),
('𠀀', '\u{2a6dd}'),
('𪜀', '𫜴'),
('𫝀', '𫠝'),
('𫠠', '𬺡'),
('𬺰', '𮯠'),
('丽', '𪘀'),
('\u{30000}', '\u{3134a}'),
];
pub const SCONTINUE: &'static [(char, char)] = &[
@ -1687,6 +1718,8 @@ pub const STERM: &'static [(char, char)] = &[
('𑗉', '𑗗'),
('𑙁', '𑙂'),
('𑜼', '𑜾'),
('\u{11944}', '\u{11944}'),
('\u{11946}', '\u{11946}'),
('𑩂', '𑩃'),
('𑪛', '𑪜'),
('𑱁', '𑱂'),
@ -2312,11 +2345,13 @@ pub const UPPER: &'static [(char, char)] = &[
('Ʞ', ''),
('Ꞷ', 'Ꞷ'),
('Ꞹ', 'Ꞹ'),
('\u{a7ba}', '\u{a7ba}'),
('\u{a7bc}', '\u{a7bc}'),
('\u{a7be}', '\u{a7be}'),
('\u{a7c2}', '\u{a7c2}'),
('\u{a7c4}', '\u{a7c6}'),
('Ꞻ', 'Ꞻ'),
('Ꞽ', 'Ꞽ'),
('Ꞿ', 'Ꞿ'),
('Ꟃ', 'Ꟃ'),
('Ꞔ', '\u{a7c7}'),
('\u{a7c9}', '\u{a7c9}'),
('\u{a7f5}', '\u{a7f5}'),
('', ''),
('𐐀', '𐐧'),
('𐒰', '𐓓'),

Просмотреть файл

@ -1,8 +1,10 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate word-break /tmp/ucd/12.1.0/ --chars
// ucd-generate word-break ucd-13.0.0 --chars
//
// ucd-generate is available on crates.io.
// Unicode version: 13.0.0.
//
// ucd-generate 0.2.8 is available on crates.io.
pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
("ALetter", ALETTER),
@ -34,8 +36,7 @@ pub const ALETTER: &'static [(char, char)] = &[
('À', 'Ö'),
('Ø', 'ö'),
('ø', '˗'),
('˞', 'ˤ'),
('ˬ', '˿'),
('˞', '˿'),
('Ͱ', 'ʹ'),
('Ͷ', 'ͷ'),
('ͺ', 'ͽ'),
@ -48,10 +49,10 @@ pub const ALETTER: &'static [(char, char)] = &[
('Ϸ', 'ҁ'),
('Ҋ', 'ԯ'),
('Ա', 'Ֆ'),
('ՙ', 'ՙ'),
('՛', '՜'),
('ՙ', '՜'),
('՞', '՞'),
('ՠ', 'ֈ'),
('֊', '֊'),
('׳', '׳'),
('ؠ', 'ي'),
('ٮ', 'ٯ'),
@ -75,7 +76,7 @@ pub const ALETTER: &'static [(char, char)] = &[
('ࡀ', 'ࡘ'),
('ࡠ', 'ࡪ'),
('ࢠ', 'ࢴ'),
('ࢶ', ''),
('ࢶ', '\u{8c7}'),
('ऄ', 'ह'),
('ऽ', 'ऽ'),
('ॐ', 'ॐ'),
@ -151,7 +152,7 @@ pub const ALETTER: &'static [(char, char)] = &[
('ೞ', 'ೞ'),
('ೠ', 'ೡ'),
('ೱ', 'ೲ'),
('', 'ഌ'),
('\u{d04}', 'ഌ'),
('എ', 'ഐ'),
('ഒ', 'ഺ'),
('ഽ', 'ഽ'),
@ -223,7 +224,7 @@ pub const ALETTER: &'static [(char, char)] = &[
('ᳩ', 'ᳬ'),
('ᳮ', 'ᳳ'),
('ᳵ', 'ᳶ'),
('\u{1cfa}', '\u{1cfa}'),
('ᳺ', 'ᳺ'),
('ᴀ', 'ᶿ'),
('Ḁ', 'ἕ'),
('Ἐ', 'Ἕ'),
@ -286,7 +287,7 @@ pub const ALETTER: &'static [(char, char)] = &[
('〻', '〼'),
('ㄅ', 'ㄯ'),
('ㄱ', 'ㆎ'),
('ㆠ', ''),
('ㆠ', '\u{31bf}'),
('ꀀ', 'ꒌ'),
('', ''),
('ꔀ', 'ꘌ'),
@ -295,9 +296,9 @@ pub const ALETTER: &'static [(char, char)] = &[
('Ꙁ', 'ꙮ'),
('ꙿ', 'ꚝ'),
('ꚠ', ''),
('ꜗ', '\u{a7bf}'),
('\u{a7c2}', '\u{a7c6}'),
('', 'ꠁ'),
('꜈', 'ꞿ'),
('Ꟃ', '\u{a7ca}'),
('\u{a7f5}', 'ꠁ'),
('ꠃ', 'ꠅ'),
('ꠇ', 'ꠊ'),
('ꠌ', 'ꠢ'),
@ -321,7 +322,7 @@ pub const ALETTER: &'static [(char, char)] = &[
('ꬑ', 'ꬖ'),
('ꬠ', 'ꬦ'),
('ꬨ', 'ꬮ'),
('ꬰ', '\u{ab67}'),
('ꬰ', '\u{ab69}'),
('ꭰ', 'ꯢ'),
('가', '힣'),
('ힰ', 'ퟆ'),
@ -397,15 +398,19 @@ pub const ALETTER: &'static [(char, char)] = &[
('𐲀', '𐲲'),
('𐳀', '𐳲'),
('𐴀', '𐴣'),
('\u{10e80}', '\u{10ea9}'),
('\u{10eb0}', '\u{10eb1}'),
('𐼀', '𐼜'),
('𐼧', '𐼧'),
('𐼰', '𐽅'),
('\u{10fe0}', '\u{10ff6}'),
('\u{10fb0}', '\u{10fc4}'),
('𐿠', '𐿶'),
('𑀃', '𑀷'),
('𑂃', '𑂯'),
('𑃐', '𑃨'),
('𑄃', '𑄦'),
('𑅄', '𑅄'),
('\u{11147}', '\u{11147}'),
('𑅐', '𑅲'),
('𑅶', '𑅶'),
('𑆃', '𑆲'),
@ -431,7 +436,7 @@ pub const ALETTER: &'static [(char, char)] = &[
('𑍝', '𑍡'),
('𑐀', '𑐴'),
('𑑇', '𑑊'),
('\u{1145f}', '\u{1145f}'),
('𑑟', '\u{11461}'),
('𑒀', '𑒯'),
('𑓄', '𑓅'),
('𑓇', '𑓇'),
@ -440,14 +445,20 @@ pub const ALETTER: &'static [(char, char)] = &[
('𑘀', '𑘯'),
('𑙄', '𑙄'),
('𑚀', '𑚪'),
('\u{116b8}', '\u{116b8}'),
('𑚸', '𑚸'),
('𑠀', '𑠫'),
('𑢠', '𑣟'),
('𑣿', '𑣿'),
('\u{119a0}', '\u{119a7}'),
('\u{119aa}', '\u{119d0}'),
('\u{119e1}', '\u{119e1}'),
('\u{119e3}', '\u{119e3}'),
('𑣿', '\u{11906}'),
('\u{11909}', '\u{11909}'),
('\u{1190c}', '\u{11913}'),
('\u{11915}', '\u{11916}'),
('\u{11918}', '\u{1192f}'),
('\u{1193f}', '\u{1193f}'),
('\u{11941}', '\u{11941}'),
('𑦠', '𑦧'),
('𑦪', '𑧐'),
('𑧡', '𑧡'),
('𑧣', '𑧣'),
('𑨀', '𑨀'),
('𑨋', '𑨲'),
('𑨺', '𑨺'),
@ -468,6 +479,7 @@ pub const ALETTER: &'static [(char, char)] = &[
('𑵪', '𑶉'),
('𑶘', '𑶘'),
('𑻠', '𑻲'),
('\u{11fb0}', '\u{11fb0}'),
('𒀀', '𒎙'),
('𒐀', '𒑮'),
('𒒀', '𒕃'),
@ -481,11 +493,11 @@ pub const ALETTER: &'static [(char, char)] = &[
('𖭣', '𖭷'),
('𖭽', '𖮏'),
('𖹀', '𖹿'),
('𖼀', '\u{16f4a}'),
('𖼀', '𖽊'),
('𖽐', '𖽐'),
('𖾓', '𖾟'),
('𖿠', '𖿡'),
('\u{16fe3}', '\u{16fe3}'),
('𖿣', '𖿣'),
('𛰀', '𛱪'),
('𛱰', '𛱼'),
('𛲀', '𛲈'),
@ -520,13 +532,13 @@ pub const ALETTER: &'static [(char, char)] = &[
('𝞊', '𝞨'),
('𝞪', '𝟂'),
('𝟄', '𝟋'),
('\u{1e100}', '\u{1e12c}'),
('\u{1e137}', '\u{1e13d}'),
('\u{1e14e}', '\u{1e14e}'),
('\u{1e2c0}', '\u{1e2eb}'),
('𞄀', '𞄬'),
('𞄷', '𞄽'),
('𞅎', '𞅎'),
('𞋀', '𞋫'),
('𞠀', '𞣄'),
('𞤀', '𞥃'),
('\u{1e94b}', '\u{1e94b}'),
('𞥋', '𞥋'),
('𞸀', '𞸃'),
('𞸅', '𞸟'),
('𞸡', '𞸢'),
@ -628,7 +640,7 @@ pub const EXTEND: &'static [(char, char)] = &[
('\u{b3e}', '\u{b44}'),
('େ', 'ୈ'),
('ୋ', '\u{b4d}'),
('\u{b56}', '\u{b57}'),
('\u{b55}', '\u{b57}'),
('\u{b62}', '\u{b63}'),
('\u{b82}', '\u{b82}'),
('\u{bbe}', 'ூ'),
@ -655,7 +667,7 @@ pub const EXTEND: &'static [(char, char)] = &[
('ൊ', '\u{d4d}'),
('\u{d57}', '\u{d57}'),
('\u{d62}', '\u{d63}'),
('', 'ඃ'),
('\u{d81}', 'ඃ'),
('\u{dca}', '\u{dca}'),
('\u{dcf}', '\u{dd4}'),
('\u{dd6}', '\u{dd6}'),
@ -702,7 +714,7 @@ pub const EXTEND: &'static [(char, char)] = &[
('ᩕ', '\u{1a5e}'),
('\u{1a60}', '\u{1a7c}'),
('\u{1a7f}', '\u{1a7f}'),
('\u{1ab0}', '\u{1abe}'),
('\u{1ab0}', '\u{1ac0}'),
('\u{1b00}', 'ᬄ'),
('\u{1b34}', '᭄'),
('\u{1b6b}', '\u{1b73}'),
@ -732,6 +744,7 @@ pub const EXTEND: &'static [(char, char)] = &[
('\u{a806}', '\u{a806}'),
('\u{a80b}', '\u{a80b}'),
('ꠣ', 'ꠧ'),
('\u{a82c}', '\u{a82c}'),
('ꢀ', 'ꢁ'),
('ꢴ', '\u{a8c5}'),
('\u{a8e0}', '\u{a8f1}'),
@ -768,6 +781,7 @@ pub const EXTEND: &'static [(char, char)] = &[
('\u{10a3f}', '\u{10a3f}'),
('\u{10ae5}', '\u{10ae6}'),
('\u{10d24}', '\u{10d27}'),
('\u{10eab}', '\u{10eac}'),
('\u{10f46}', '\u{10f50}'),
('𑀀', '𑀂'),
('\u{11038}', '\u{11046}'),
@ -780,6 +794,7 @@ pub const EXTEND: &'static [(char, char)] = &[
('\u{11180}', '𑆂'),
('𑆳', '𑇀'),
('\u{111c9}', '\u{111cc}'),
('\u{111ce}', '\u{111cf}'),
('𑈬', '\u{11237}'),
('\u{1123e}', '\u{1123e}'),
('\u{112df}', '\u{112ea}'),
@ -802,9 +817,14 @@ pub const EXTEND: &'static [(char, char)] = &[
('\u{116ab}', '\u{116b7}'),
('\u{1171d}', '\u{1172b}'),
('𑠬', '\u{1183a}'),
('\u{119d1}', '\u{119d7}'),
('\u{11930}', '\u{11935}'),
('\u{11937}', '\u{11938}'),
('\u{1193b}', '\u{1193e}'),
('\u{11940}', '\u{11940}'),
('\u{11942}', '\u{11943}'),
('𑧑', '\u{119d7}'),
('\u{119da}', '\u{119e0}'),
('\u{119e4}', '\u{119e4}'),
('𑧤', '𑧤'),
('\u{11a01}', '\u{11a0a}'),
('\u{11a33}', '𑨹'),
('\u{11a3b}', '\u{11a3e}'),
@ -827,8 +847,10 @@ pub const EXTEND: &'static [(char, char)] = &[
('\u{16af0}', '\u{16af4}'),
('\u{16b30}', '\u{16b36}'),
('\u{16f4f}', '\u{16f4f}'),
('𖽑', '\u{16f87}'),
('𖽑', '𖾇'),
('\u{16f8f}', '\u{16f92}'),
('\u{16fe4}', '\u{16fe4}'),
('\u{16ff0}', '\u{16ff1}'),
('\u{1bc9d}', '\u{1bc9e}'),
('\u{1d165}', '\u{1d169}'),
('𝅭', '\u{1d172}'),
@ -911,7 +933,7 @@ pub const KATAKANA: &'static [(char, char)] = &[
('㌀', '㍗'),
('ヲ', 'ン'),
('𛀀', '𛀀'),
('\u{1b164}', '\u{1b167}'),
('𛅤', '𛅧'),
];
pub const LF: &'static [(char, char)] = &[('\n', '\n')];
@ -920,6 +942,7 @@ pub const MIDLETTER: &'static [(char, char)] = &[
(':', ':'),
('·', '·'),
('·', '·'),
('՟', '՟'),
('״', '״'),
('‧', '‧'),
('︓', '︓'),
@ -1008,15 +1031,17 @@ pub const NUMERIC: &'static [(char, char)] = &[
('𑛀', '𑛉'),
('𑜰', '𑜹'),
('𑣠', '𑣩'),
('\u{11950}', '\u{11959}'),
('𑱐', '𑱙'),
('𑵐', '𑵙'),
('𑶠', '𑶩'),
('𖩠', '𖩩'),
('𖭐', '𖭙'),
('𝟎', '𝟿'),
('\u{1e140}', '\u{1e149}'),
('\u{1e2f0}', '\u{1e2f9}'),
('𞅀', '𞅉'),
('𞋰', '𞋹'),
('𞥐', '𞥙'),
('\u{1fbf0}', '\u{1fbf9}'),
];
pub const REGIONAL_INDICATOR: &'static [(char, char)] = &[('🇦', '🇿')];

88
third_party/rust/regex-syntax/src/utf8.rs поставляемый
Просмотреть файл

@ -15,7 +15,7 @@ whether a particular byte sequence was a Cyrillic character. One possible
scalar value range is `[0400-04FF]`. The set of allowed bytes for this
range can be expressed as a sequence of byte ranges:
```ignore
```text
[D0-D3][80-BF]
```
@ -32,7 +32,7 @@ for example, `04FF` (because its last byte, `BF` isn't in the range `80-AF`).
Instead, you need multiple sequences of byte ranges:
```ignore
```text
[D0-D3][80-BF] # matches codepoints 0400-04FF
[D4][80-AF] # matches codepoints 0500-052F
```
@ -41,7 +41,7 @@ This gets even more complicated if you want bigger ranges, particularly if
they naively contain surrogate codepoints. For example, the sequence of byte
ranges for the basic multilingual plane (`[0000-FFFF]`) look like this:
```ignore
```text
[0-7F]
[C2-DF][80-BF]
[E0][A0-BF][80-BF]
@ -55,7 +55,7 @@ UTF-8, including encodings of surrogate codepoints.
And, of course, for all of Unicode (`[000000-10FFFF]`):
```ignore
```text
[0-7F]
[C2-DF][80-BF]
[E0][A0-BF][80-BF]
@ -84,6 +84,7 @@ which uses it for executing automata on their term index.
use std::char;
use std::fmt;
use std::iter::FusedIterator;
use std::slice;
const MAX_UTF8_BYTES: usize = 4;
@ -152,6 +153,31 @@ impl Utf8Sequence {
self.as_slice().len()
}
/// Reverses the ranges in this sequence.
///
/// For example, if this corresponds to the following sequence:
///
/// ```text
/// [D0-D3][80-BF]
/// ```
///
/// Then after reversal, it will be
///
/// ```text
/// [80-BF][D0-D3]
/// ```
///
/// This is useful when one is constructing a UTF-8 automaton to match
/// character classes in reverse.
pub fn reverse(&mut self) {
match *self {
Utf8Sequence::One(_) => {}
Utf8Sequence::Two(ref mut x) => x.reverse(),
Utf8Sequence::Three(ref mut x) => x.reverse(),
Utf8Sequence::Four(ref mut x) => x.reverse(),
}
}
/// Returns true if and only if a prefix of `bytes` matches this sequence
/// of byte ranges.
pub fn matches(&self, bytes: &[u8]) -> bool {
@ -177,7 +203,7 @@ impl<'a> IntoIterator for &'a Utf8Sequence {
}
impl fmt::Debug for Utf8Sequence {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use self::Utf8Sequence::*;
match *self {
One(ref r) => write!(f, "{:?}", r),
@ -201,7 +227,7 @@ pub struct Utf8Range {
impl Utf8Range {
fn new(start: u8, end: u8) -> Self {
Utf8Range { start: start, end: end }
Utf8Range { start, end }
}
/// Returns true if and only if the given byte is in this range.
@ -211,7 +237,7 @@ impl Utf8Range {
}
impl fmt::Debug for Utf8Range {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.start == self.end {
write!(f, "[{:X}]", self.start)
} else {
@ -270,6 +296,7 @@ impl fmt::Debug for Utf8Range {
/// illustrative. In practice, you could just try to decode your byte sequence
/// and compare it with the scalar value range directly. However, this is not
/// always possible (for example, in a byte based automaton).
#[derive(Debug)]
pub struct Utf8Sequences {
range_stack: Vec<ScalarRange>,
}
@ -294,7 +321,7 @@ impl Utf8Sequences {
}
fn push(&mut self, start: u32, end: u32) {
self.range_stack.push(ScalarRange { start: start, end: end });
self.range_stack.push(ScalarRange { start, end });
}
}
@ -304,7 +331,7 @@ struct ScalarRange {
}
impl fmt::Debug for ScalarRange {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "ScalarRange({:X}, {:X})", self.start, self.end)
}
}
@ -363,6 +390,8 @@ impl Iterator for Utf8Sequences {
}
}
impl FusedIterator for Utf8Sequences {}
impl ScalarRange {
/// split splits this range if it overlaps with a surrogate codepoint.
///
@ -428,7 +457,7 @@ fn max_scalar_value(nbytes: usize) -> u32 {
mod tests {
use std::char;
use utf8::{Utf8Range, Utf8Sequences};
use crate::utf8::{Utf8Range, Utf8Sequences};
fn rutf8(s: u8, e: u8) -> Utf8Range {
Utf8Range::new(s, e)
@ -475,7 +504,7 @@ mod tests {
#[test]
fn bmp() {
use utf8::Utf8Sequence::*;
use crate::utf8::Utf8Sequence::*;
let seqs = Utf8Sequences::new('\u{0}', '\u{FFFF}').collect::<Vec<_>>();
assert_eq!(
@ -507,6 +536,43 @@ mod tests {
);
}
#[test]
fn reverse() {
use crate::utf8::Utf8Sequence::*;
let mut s = One(rutf8(0xA, 0xB));
s.reverse();
assert_eq!(s.as_slice(), &[rutf8(0xA, 0xB)]);
let mut s = Two([rutf8(0xA, 0xB), rutf8(0xB, 0xC)]);
s.reverse();
assert_eq!(s.as_slice(), &[rutf8(0xB, 0xC), rutf8(0xA, 0xB)]);
let mut s = Three([rutf8(0xA, 0xB), rutf8(0xB, 0xC), rutf8(0xC, 0xD)]);
s.reverse();
assert_eq!(
s.as_slice(),
&[rutf8(0xC, 0xD), rutf8(0xB, 0xC), rutf8(0xA, 0xB)]
);
let mut s = Four([
rutf8(0xA, 0xB),
rutf8(0xB, 0xC),
rutf8(0xC, 0xD),
rutf8(0xD, 0xE),
]);
s.reverse();
assert_eq!(
s.as_slice(),
&[
rutf8(0xD, 0xE),
rutf8(0xC, 0xD),
rutf8(0xB, 0xC),
rutf8(0xA, 0xB)
]
);
}
fn encode_surrogate(cp: u32) -> [u8; 3] {
const TAG_CONT: u8 = 0b1000_0000;
const TAG_THREE_B: u8 = 0b1110_0000;

2
third_party/rust/regex-syntax/test поставляемый
Просмотреть файл

@ -1,5 +1,7 @@
#!/bin/bash
set -e
# This is a convenience script for running a broad swath of the syntax tests.
echo "===== DEFAULT FEATURES ==="
cargo test

2
third_party/rust/regex/.cargo-checksum.json поставляемый

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

232
third_party/rust/regex/CHANGELOG.md поставляемый
Просмотреть файл

@ -1,3 +1,235 @@
1.5.4 (2021-05-06)
==================
This release fixes another compilation failure when building regex. This time,
the fix is for when the `pattern` feature is enabled, which only works on
nightly Rust. CI has been updated to test this case.
* [BUG #772](https://github.com/rust-lang/regex/pull/772):
Fix build when `pattern` feature is enabled.
1.5.3 (2021-05-01)
==================
This releases fixes a bug when building regex with only the `unicode-perl`
feature. It turns out that while CI was building this configuration, it wasn't
actually failing the overall build on a failed compilation.
* [BUG #769](https://github.com/rust-lang/regex/issues/769):
Fix build in `regex-syntax` when only the `unicode-perl` feature is enabled.
1.5.2 (2021-05-01)
==================
This release fixes a performance bug when Unicode word boundaries are used.
Namely, for certain regexes on certain inputs, it's possible for the lazy DFA
to stop searching (causing a fallback to a slower engine) when it doesn't
actually need to.
[PR #768](https://github.com/rust-lang/regex/pull/768) fixes the bug, which was
originally reported in
[ripgrep#1860](https://github.com/BurntSushi/ripgrep/issues/1860).
1.5.1 (2021-04-30)
==================
This is a patch release that fixes a compilation error when the `perf-literal`
feature is not enabled.
1.5.0 (2021-04-30)
==================
This release primarily updates to Rust 2018 (finally) and bumps the MSRV to
Rust 1.41 (from Rust 1.28). Rust 1.41 was chosen because it's still reasonably
old, and is what's in Debian stable at the time of writing.
This release also drops this crate's own bespoke substring search algorithms
in favor of a new
[`memmem` implementation provided by the `memchr` crate](https://docs.rs/memchr/2.4.0/memchr/memmem/index.html).
This will change the performance profile of some regexes, sometimes getting a
little worse, and hopefully more frequently, getting a lot better. Please
report any serious performance regressions if you find them.
1.4.6 (2021-04-22)
==================
This is a small patch release that fixes the compiler's size check on how much
heap memory a regex uses. Previously, the compiler did not account for the
heap usage of Unicode character classes. Now it does. It's possible that this
may make some regexes fail to compile that previously did compile. If that
happens, please file an issue.
* [BUG OSS-fuzz#33579](https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=33579):
Some regexes can use more heap memory than one would expect.
1.4.5 (2021-03-14)
==================
This is a small patch release that fixes a regression in the size of a `Regex`
in the 1.4.4 release. Prior to 1.4.4, a `Regex` was 552 bytes. In the 1.4.4
release, it was 856 bytes due to internal changes. In this release, a `Regex`
is now 16 bytes. In general, the size of a `Regex` was never something that was
on my radar, but this increased size in the 1.4.4 release seems to have crossed
a threshold and resulted in stack overflows in some programs.
* [BUG #750](https://github.com/rust-lang/regex/pull/750):
Fixes stack overflows seemingly caused by a large `Regex` size by decreasing
its size.
1.4.4 (2021-03-11)
==================
This is a small patch release that contains some bug fixes. Notably, it also
drops the `thread_local` (and `lazy_static`, via transitivity) dependencies.
Bug fixes:
* [BUG #362](https://github.com/rust-lang/regex/pull/362):
Memory leaks caused by an internal caching strategy should now be fixed.
* [BUG #576](https://github.com/rust-lang/regex/pull/576):
All regex types now implement `UnwindSafe` and `RefUnwindSafe`.
* [BUG #728](https://github.com/rust-lang/regex/pull/749):
Add missing `Replacer` impls for `Vec<u8>`, `String`, `Cow`, etc.
1.4.3 (2021-01-08)
==================
This is a small patch release that adds some missing standard trait
implementations for some types in the public API.
Bug fixes:
* [BUG #734](https://github.com/rust-lang/regex/pull/734):
Add `FusedIterator` and `ExactSizeIterator` impls to iterator types.
* [BUG #735](https://github.com/rust-lang/regex/pull/735):
Add missing `Debug` impls to public API types.
1.4.2 (2020-11-01)
==================
This is a small bug fix release that bans `\P{any}`. We previously banned empty
classes like `[^\w\W]`, but missed the `\P{any}` case. In the future, we hope
to permit empty classes.
* [BUG #722](https://github.com/rust-lang/regex/issues/722):
Ban `\P{any}` to avoid a panic in the regex compiler. Found by OSS-Fuzz.
1.4.1 (2020-10-13)
==================
This is a small bug fix release that makes `\p{cf}` work. Previously, it would
report "property not found" even though `cf` is a valid abbreviation for the
`Format` general category.
* [BUG #719](https://github.com/rust-lang/regex/issues/719):
Fixes bug that prevented `\p{cf}` from working.
1.4.0 (2020-10-11)
==================
This releases has a few minor documentation fixes as well as some very minor
API additions. The MSRV remains at Rust 1.28 for now, but this is intended to
increase to at least Rust 1.41.1 soon.
This release also adds support for OSS-Fuzz. Kudos to
[@DavidKorczynski](https://github.com/DavidKorczynski)
for doing the heavy lifting for that!
New features:
* [FEATURE #649](https://github.com/rust-lang/regex/issues/649):
Support `[`, `]` and `.` in capture group names.
* [FEATURE #687](https://github.com/rust-lang/regex/issues/687):
Add `is_empty` predicate to `RegexSet`.
* [FEATURE #689](https://github.com/rust-lang/regex/issues/689):
Implement `Clone` for `SubCaptureMatches`.
* [FEATURE #715](https://github.com/rust-lang/regex/issues/715):
Add `empty` constructor to `RegexSet` for convenience.
Bug fixes:
* [BUG #694](https://github.com/rust-lang/regex/issues/694):
Fix doc example for `Replacer::replace_append`.
* [BUG #698](https://github.com/rust-lang/regex/issues/698):
Clarify docs for `s` flag when using a `bytes::Regex`.
* [BUG #711](https://github.com/rust-lang/regex/issues/711):
Clarify `is_match` docs to indicate that it can match anywhere in string.
1.3.9 (2020-05-28)
==================
This release fixes a MSRV (Minimum Support Rust Version) regression in the
1.3.8 release. Namely, while 1.3.8 compiles on Rust 1.28, it actually does not
compile on other Rust versions, such as Rust 1.39.
Bug fixes:
* [BUG #685](https://github.com/rust-lang/regex/issues/685):
Remove use of `doc_comment` crate, which cannot be used before Rust 1.43.
1.3.8 (2020-05-28)
==================
This release contains a couple of important bug fixes driven
by better support for empty-subexpressions in regexes. For
example, regexes like `b|` are now allowed. Major thanks to
[@sliquister](https://github.com/sliquister) for implementing support for this
in [#677](https://github.com/rust-lang/regex/pull/677).
Bug fixes:
* [BUG #523](https://github.com/rust-lang/regex/pull/523):
Add note to documentation that spaces can be escaped in `x` mode.
* [BUG #524](https://github.com/rust-lang/regex/issues/524):
Add support for empty sub-expressions, including empty alternations.
* [BUG #659](https://github.com/rust-lang/regex/issues/659):
Fix match bug caused by an empty sub-expression miscompilation.
1.3.7 (2020-04-17)
==================
This release contains a small bug fix that fixes how `regex` forwards crate
features to `regex-syntax`. In particular, this will reduce recompilations in
some cases.
Bug fixes:
* [BUG #665](https://github.com/rust-lang/regex/pull/665):
Fix feature forwarding to `regex-syntax`.
1.3.6 (2020-03-24)
==================
This release contains a sizable (~30%) performance improvement when compiling
some kinds of large regular expressions.
Performance improvements:
* [PERF #657](https://github.com/rust-lang/regex/pull/657):
Improvement performance of compiling large regular expressions.
1.3.5 (2020-03-12)
==================
This release updates this crate to Unicode 13.
New features:
* [FEATURE #653](https://github.com/rust-lang/regex/pull/653):
Update `regex-syntax` to Unicode 13.
1.3.4 (2020-01-30)
==================
This is a small bug fix release that fixes a bug related to the scoping of
flags in a regex. Namely, before this fix, a regex like `((?i)a)b)` would
match `aB` despite the fact that `b` should not be matched case insensitively.
Bug fixes:
* [BUG #640](https://github.com/rust-lang/regex/issues/640):
Fix bug related to the scoping of flags in a regex.
1.3.3 (2020-01-09)
==================
This is a small maintenance release that upgrades the dependency on

201
third_party/rust/regex/Cargo.lock сгенерированный поставляемый
Просмотреть файл

@ -1,47 +1,33 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "0.7.6"
version = "0.7.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58fb5e95d83b38284460a5fda7d6470aa0b8844d283a0b614b8535e880800d2d"
checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
dependencies = [
"memchr",
]
[[package]]
name = "autocfg"
version = "0.1.7"
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "bitflags"
version = "1.2.1"
name = "getrandom"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
[[package]]
name = "cloudabi"
version = "0.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
checksum = "c9495705279e7140bf035dde1f6e750c162df8b625267cd52cc44e0b156732c8"
dependencies = [
"bitflags",
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "doc-comment"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "923dea538cea0aa3025e8685b20d6ee21ef99c4f77e954a30febbaac5ec73a97"
[[package]]
name = "fuchsia-cprng"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
[[package]]
name = "lazy_static"
version = "1.4.0"
@ -50,188 +36,63 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.66"
version = "0.2.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d515b1f41455adea1313a4a2ac8a8a477634fbae63cc6100e3aebb207ce61558"
checksum = "4d58d1b70b004888f764dfbf6a26a3b0342a1632d33968e4a179d8011c760614"
[[package]]
name = "memchr"
version = "2.2.1"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e"
checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"
[[package]]
name = "quickcheck"
version = "0.8.5"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c35d9c36a562f37eca96e79f66d5fd56eefbc22560dacc4a864cabd2d277456"
checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6"
dependencies = [
"rand",
"rand_core 0.4.2",
]
[[package]]
name = "rand"
version = "0.6.5"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca"
checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e"
dependencies = [
"autocfg",
"libc",
"rand_chacha",
"rand_core 0.4.2",
"rand_hc",
"rand_isaac",
"rand_jitter",
"rand_os",
"rand_pcg",
"rand_xorshift",
"winapi",
]
[[package]]
name = "rand_chacha"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef"
dependencies = [
"autocfg",
"rand_core 0.3.1",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.3.1"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b"
checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7"
dependencies = [
"rand_core 0.4.2",
]
[[package]]
name = "rand_core"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc"
[[package]]
name = "rand_hc"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4"
dependencies = [
"rand_core 0.3.1",
]
[[package]]
name = "rand_isaac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08"
dependencies = [
"rand_core 0.3.1",
]
[[package]]
name = "rand_jitter"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1166d5c91dc97b88d1decc3285bb0a99ed84b05cfd0bc2341bdf2d43fc41e39b"
dependencies = [
"libc",
"rand_core 0.4.2",
"winapi",
]
[[package]]
name = "rand_os"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071"
dependencies = [
"cloudabi",
"fuchsia-cprng",
"libc",
"rand_core 0.4.2",
"rdrand",
"winapi",
]
[[package]]
name = "rand_pcg"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abf9b09b01790cfe0364f52bf32995ea3c39f4d2dd011eac241d2914146d0b44"
dependencies = [
"autocfg",
"rand_core 0.4.2",
]
[[package]]
name = "rand_xorshift"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c"
dependencies = [
"rand_core 0.3.1",
]
[[package]]
name = "rdrand"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
dependencies = [
"rand_core 0.3.1",
"getrandom",
]
[[package]]
name = "regex"
version = "1.3.3"
version = "1.5.4"
dependencies = [
"aho-corasick",
"doc-comment",
"lazy_static",
"memchr",
"quickcheck",
"rand",
"regex-syntax",
"thread_local",
]
[[package]]
name = "regex-syntax"
version = "0.6.13"
version = "0.6.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e734e891f5b408a29efbf8309e656876276f49ab6a6ac208600b4419bd893d90"
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
[[package]]
name = "thread_local"
version = "1.0.0"
name = "wasi"
version = "0.10.2+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88ddf1ad580c7e3d1efff877d972bcc93f995556b9087a5a259630985c88ceab"
dependencies = [
"lazy_static",
]
[[package]]
name = "winapi"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"

35
third_party/rust/regex/Cargo.toml поставляемый
Просмотреть файл

@ -11,10 +11,11 @@
# will likely look very different (and much more reasonable)
[package]
edition = "2018"
name = "regex"
version = "1.3.3"
version = "1.5.4"
authors = ["The Rust Project Developers"]
exclude = ["/.travis.yml", "/appveyor.yml", "/ci/*", "/scripts/*"]
exclude = ["/scripts/*", "/.github/*"]
autotests = false
description = "An implementation of regular expressions for Rust. This implementation uses\nfinite automata and guarantees linear time matching on all inputs.\n"
homepage = "https://github.com/rust-lang/regex"
@ -72,43 +73,38 @@ path = "tests/test_backtrack_bytes.rs"
name = "crates-regex"
path = "tests/test_crates_regex.rs"
[dependencies.aho-corasick]
version = "0.7.6"
version = "0.7.18"
optional = true
[dependencies.memchr]
version = "2.2.1"
version = "2.4.0"
optional = true
[dependencies.regex-syntax]
version = "0.6.12"
version = "0.6.25"
default-features = false
[dependencies.thread_local]
version = "1"
optional = true
[dev-dependencies.doc-comment]
version = "0.3"
[dev-dependencies.lazy_static]
version = "1"
[dev-dependencies.quickcheck]
version = "0.8"
version = "1.0.3"
default-features = false
[dev-dependencies.rand]
version = "0.6.5"
version = "0.8.3"
features = ["getrandom", "small_rng"]
default-features = false
[features]
default = ["std", "perf", "unicode"]
default = ["std", "perf", "unicode", "regex-syntax/default"]
pattern = []
perf = ["perf-cache", "perf-dfa", "perf-inline", "perf-literal"]
perf-cache = ["thread_local"]
perf-cache = []
perf-dfa = []
perf-inline = []
perf-literal = ["aho-corasick", "memchr"]
std = []
unicode = ["unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"]
unicode = ["unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment", "regex-syntax/unicode"]
unicode-age = ["regex-syntax/unicode-age"]
unicode-bool = ["regex-syntax/unicode-bool"]
unicode-case = ["regex-syntax/unicode-case"]
@ -118,8 +114,3 @@ unicode-script = ["regex-syntax/unicode-script"]
unicode-segment = ["regex-syntax/unicode-segment"]
unstable = ["pattern"]
use_std = ["std"]
[badges.appveyor]
repository = "rust-lang-libs/regex"
[badges.travis-ci]
repository = "rust-lang/regex"

10
third_party/rust/regex/PERFORMANCE.md поставляемый
Просмотреть файл

@ -62,9 +62,7 @@ on how your program is structured. Thankfully, the
[`lazy_static`](https://crates.io/crates/lazy_static)
crate provides an answer that works well:
#[macro_use] extern crate lazy_static;
extern crate regex;
use lazy_static::lazy_static;
use regex::Regex;
fn some_helper_function(text: &str) -> bool {
@ -147,9 +145,9 @@ In general, these are ordered from fastest to slowest.
`is_match` is fastest because it doesn't actually need to find the start or the
end of the leftmost-first match. It can quit immediately after it knows there
is a match. For example, given the regex `a+` and the haystack, `aaaaa`, the
search will quit after examing the first byte.
search will quit after examining the first byte.
In constrast, `find` must return both the start and end location of the
In contrast, `find` must return both the start and end location of the
leftmost-first match. It can use the DFA matcher for this, but must run it
forwards once to find the end of the match *and then run it backwards* to find
the start of the match. The two scans and the cost of finding the real end of
@ -198,7 +196,7 @@ a few examples of regexes that get literal prefixes detected:
Literals in anchored regexes can also be used for detecting non-matches very
quickly. For example, `^foo\w+` and `\w+foo$` may be able to detect a non-match
just by examing the first (or last) three bytes of the haystack.
just by examining the first (or last) three bytes of the haystack.
## Unicode word boundaries may prevent the DFA from being used

22
third_party/rust/regex/README.md поставляемый
Просмотреть файл

@ -7,11 +7,9 @@ linear time with respect to the size of the regular expression and search text.
Much of the syntax and implementation is inspired
by [RE2](https://github.com/google/re2).
[![Build status](https://travis-ci.com/rust-lang/regex.svg?branch=master)](https://travis-ci.com/rust-lang/regex)
[![Build status](https://ci.appveyor.com/api/projects/status/github/rust-lang/regex?svg=true)](https://ci.appveyor.com/project/rust-lang-libs/regex)
[![Coverage Status](https://coveralls.io/repos/github/rust-lang/regex/badge.svg?branch=master)](https://coveralls.io/github/rust-lang/regex?branch=master)
[![Build status](https://github.com/rust-lang/regex/workflows/ci/badge.svg)](https://github.com/rust-lang/regex/actions)
[![](https://meritbadge.herokuapp.com/regex)](https://crates.io/crates/regex)
[![Rust](https://img.shields.io/badge/rust-1.28.0%2B-blue.svg?maxAge=3600)](https://github.com/rust-lang/regex)
[![Rust](https://img.shields.io/badge/rust-1.41.1%2B-blue.svg?maxAge=3600)](https://github.com/rust-lang/regex)
### Documentation
@ -29,13 +27,7 @@ Add this to your `Cargo.toml`:
```toml
[dependencies]
regex = "1"
```
and this to your crate root (if you're using Rust 2015):
```rust
extern crate regex;
regex = "1.5"
```
Here's a simple example that matches a date in YYYY-MM-DD format and prints the
@ -230,7 +222,7 @@ The full set of features one can disable are
### Minimum Rust version policy
This crate's minimum supported `rustc` version is `1.28.0`.
This crate's minimum supported `rustc` version is `1.41.1`.
The current **tentative** policy is that the minimum Rust version required
to use this crate can be increased in minor version updates. For example, if
@ -247,12 +239,12 @@ supported version of Rust.
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
http://www.apache.org/licenses/LICENSE-2.0)
https://www.apache.org/licenses/LICENSE-2.0)
* MIT license ([LICENSE-MIT](LICENSE-MIT) or
http://opensource.org/licenses/MIT)
https://opensource.org/licenses/MIT)
at your option.
The data in `regex-syntax/src/unicode_tables/` is licensed under the Unicode
License Agreement
([LICENSE-UNICODE](http://www.unicode.org/copyright.html#License)).
([LICENSE-UNICODE](https://www.unicode.org/copyright.html#License)).

34
third_party/rust/regex/UNICODE.md поставляемый
Просмотреть файл

@ -1,7 +1,7 @@
# Unicode conformance
This document describes the regex crate's conformance to Unicode's
[UTS#18](http://unicode.org/reports/tr18/)
[UTS#18](https://unicode.org/reports/tr18/)
report, which lays out 3 levels of support: Basic, Extended and Tailored.
Full support for Level 1 ("Basic Unicode Support") is provided with two
@ -10,7 +10,7 @@ exceptions:
1. Line boundaries are not Unicode aware. Namely, only the `\n`
(`END OF LINE`) character is recognized as a line boundary.
2. The compatibility properties specified by
[RL1.2a](http://unicode.org/reports/tr18/#RL1.2a)
[RL1.2a](https://unicode.org/reports/tr18/#RL1.2a)
are ASCII-only definitions.
Little to no support is provided for either Level 2 or Level 3. For the most
@ -61,18 +61,18 @@ provide a convenient way to construct character classes of groups of code
points specified by Unicode. The regex crate does not provide exhaustive
support, but covers a useful subset. In particular:
* [General categories](http://unicode.org/reports/tr18/#General_Category_Property)
* [Scripts and Script Extensions](http://unicode.org/reports/tr18/#Script_Property)
* [Age](http://unicode.org/reports/tr18/#Age)
* [General categories](https://unicode.org/reports/tr18/#General_Category_Property)
* [Scripts and Script Extensions](https://unicode.org/reports/tr18/#Script_Property)
* [Age](https://unicode.org/reports/tr18/#Age)
* A smattering of boolean properties, including all of those specified by
[RL1.2](http://unicode.org/reports/tr18/#RL1.2) explicitly.
[RL1.2](https://unicode.org/reports/tr18/#RL1.2) explicitly.
In all cases, property name and value abbreviations are supported, and all
names/values are matched loosely without regard for case, whitespace or
underscores. Property name aliases can be found in Unicode's
[`PropertyAliases.txt`](http://www.unicode.org/Public/UCD/latest/ucd/PropertyAliases.txt)
[`PropertyAliases.txt`](https://www.unicode.org/Public/UCD/latest/ucd/PropertyAliases.txt)
file, while property value aliases can be found in Unicode's
[`PropertyValueAliases.txt`](http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt)
[`PropertyValueAliases.txt`](https://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt)
file.
The syntax supported is also consistent with the UTS#18 recommendation:
@ -149,10 +149,10 @@ properties correspond to properties required by RL1.2):
## RL1.2a Compatibility Properties
[UTS#18 RL1.2a](http://unicode.org/reports/tr18/#RL1.2a)
[UTS#18 RL1.2a](https://unicode.org/reports/tr18/#RL1.2a)
The regex crate only provides ASCII definitions of the
[compatibility properties documented in UTS#18 Annex C](http://unicode.org/reports/tr18/#Compatibility_Properties)
[compatibility properties documented in UTS#18 Annex C](https://unicode.org/reports/tr18/#Compatibility_Properties)
(sans the `\X` class, for matching grapheme clusters, which isn't provided
at all). This is because it seems to be consistent with most other regular
expression engines, and in particular, because these are often referred to as
@ -165,7 +165,7 @@ Their traditional ASCII definition can be used by disabling Unicode. That is,
## RL1.3 Subtraction and Intersection
[UTS#18 RL1.3](http://unicode.org/reports/tr18/#Subtraction_and_Intersection)
[UTS#18 RL1.3](https://unicode.org/reports/tr18/#Subtraction_and_Intersection)
The regex crate provides full support for nested character classes, along with
union, intersection (`&&`), difference (`--`) and symmetric difference (`~~`)
@ -178,7 +178,7 @@ For example, to match all non-ASCII letters, you could use either
## RL1.4 Simple Word Boundaries
[UTS#18 RL1.4](http://unicode.org/reports/tr18/#Simple_Word_Boundaries)
[UTS#18 RL1.4](https://unicode.org/reports/tr18/#Simple_Word_Boundaries)
The regex crate provides basic Unicode aware word boundary assertions. A word
boundary assertion can be written as `\b`, or `\B` as its negation. A word
@ -196,9 +196,9 @@ the following classes:
* `\p{gc:Connector_Punctuation}`
In particular, this differs slightly from the
[prescription given in RL1.4](http://unicode.org/reports/tr18/#Simple_Word_Boundaries)
[prescription given in RL1.4](https://unicode.org/reports/tr18/#Simple_Word_Boundaries)
but is permissible according to
[UTS#18 Annex C](http://unicode.org/reports/tr18/#Compatibility_Properties).
[UTS#18 Annex C](https://unicode.org/reports/tr18/#Compatibility_Properties).
Namely, it is convenient and simpler to have `\w` and `\b` be in sync with
one another.
@ -211,7 +211,7 @@ boundaries is currently sub-optimal on non-ASCII text.
## RL1.5 Simple Loose Matches
[UTS#18 RL1.5](http://unicode.org/reports/tr18/#Simple_Loose_Matches)
[UTS#18 RL1.5](https://unicode.org/reports/tr18/#Simple_Loose_Matches)
The regex crate provides full support for case insensitive matching in
accordance with RL1.5. That is, it uses the "simple" case folding mapping. The
@ -226,7 +226,7 @@ then all characters classes are case folded as well.
## RL1.6 Line Boundaries
[UTS#18 RL1.6](http://unicode.org/reports/tr18/#Line_Boundaries)
[UTS#18 RL1.6](https://unicode.org/reports/tr18/#Line_Boundaries)
The regex crate only provides support for recognizing the `\n` (`END OF LINE`)
character as a line boundary. This choice was made mostly for implementation
@ -239,7 +239,7 @@ well, and in theory, this could be done efficiently.
## RL1.7 Code Points
[UTS#18 RL1.7](http://unicode.org/reports/tr18/#Supplementary_Characters)
[UTS#18 RL1.7](https://unicode.org/reports/tr18/#Supplementary_Characters)
The regex crate provides full support for Unicode code point matching. Namely,
the fundamental atom of any match is always a single code point.

Просмотреть файл

@ -1,12 +1,10 @@
// The Computer Language Benchmarks Game
// http://benchmarksgame.alioth.debian.org/
// https://benchmarksgame-team.pages.debian.net/benchmarksgame/
//
// contributed by the Rust Project Developers
// contributed by TeXitoi
// contributed by BurntSushi
extern crate regex;
use std::io::{self, Read};
use std::sync::Arc;
use std::thread;

Просмотреть файл

@ -1,5 +1,5 @@
// The Computer Language Benchmarks Game
// http://benchmarksgame.alioth.debian.org/
// https://benchmarksgame-team.pages.debian.net/benchmarksgame/
//
// contributed by the Rust Project Developers
// contributed by TeXitoi
@ -10,8 +10,6 @@
// replacing them with a single linear scan. i.e., it re-implements
// `replace_all`. As a result, this is around 25% faster. ---AG
extern crate regex;
use std::io::{self, Read};
use std::sync::Arc;
use std::thread;

Просмотреть файл

@ -1,5 +1,3 @@
extern crate regex;
use std::io::{self, Read};
macro_rules! regex {

Просмотреть файл

@ -1,12 +1,10 @@
// The Computer Language Benchmarks Game
// http://benchmarksgame.alioth.debian.org/
// https://benchmarksgame-team.pages.debian.net/benchmarksgame/
//
// contributed by the Rust Project Developers
// contributed by TeXitoi
// contributed by BurntSushi
extern crate regex;
use std::io::{self, Read};
macro_rules! regex {

Просмотреть файл

@ -1,12 +1,10 @@
// The Computer Language Benchmarks Game
// http://benchmarksgame.alioth.debian.org/
// https://benchmarksgame-team.pages.debian.net/benchmarksgame/
//
// contributed by the Rust Project Developers
// contributed by TeXitoi
// contributed by BurntSushi
extern crate regex;
use std::io::{self, Read};
macro_rules! regex {

Просмотреть файл

@ -1,12 +1,10 @@
// The Computer Language Benchmarks Game
// http://benchmarksgame.alioth.debian.org/
// https://benchmarksgame-team.pages.debian.net/benchmarksgame/
//
// contributed by the Rust Project Developers
// contributed by TeXitoi
// contributed by BurntSushi
extern crate regex;
use std::io::{self, Read};
use std::sync::Arc;
use std::thread;

14
third_party/rust/regex/src/backtrack.rs поставляемый
Просмотреть файл

@ -16,10 +16,10 @@
// the bitset has to be zeroed on each execution, which becomes quite expensive
// on large bitsets.
use exec::ProgramCache;
use input::{Input, InputAt};
use prog::{InstPtr, Program};
use re_trait::Slot;
use crate::exec::ProgramCache;
use crate::input::{Input, InputAt};
use crate::prog::{InstPtr, Program};
use crate::re_trait::Slot;
type Bits = u32;
@ -115,8 +115,8 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
// Then we reset all existing allocated space to 0.
// Finally, we request more space if we need it.
//
// This is all a little circuitous, but doing this unsafely
// doesn't seem to have a measurable impact on performance.
// This is all a little circuitous, but doing this using unchecked
// operations doesn't seem to have a measurable impact on performance.
// (Probably because backtracking is limited to such small
// inputs/regexes in the first place.)
let visited_len =
@ -196,7 +196,7 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
}
fn step(&mut self, mut ip: InstPtr, mut at: InputAt) -> bool {
use prog::Inst::*;
use crate::prog::Inst::*;
loop {
// This loop is an optimization to avoid constantly pushing/popping
// from the stack. Namely, if we're pushing a job only to run it

100
third_party/rust/regex/src/cache.rs поставляемый
Просмотреть файл

@ -1,100 +0,0 @@
// This module defines a common API for caching internal runtime state.
// The `thread_local` crate provides an extremely optimized version of this.
// However, if the perf-cache feature is disabled, then we drop the
// thread_local dependency and instead use a pretty naive caching mechanism
// with a mutex.
//
// Strictly speaking, the CachedGuard isn't necessary for the much more
// flexible thread_local API, but implementing thread_local's API doesn't
// seem possible in purely safe code.
pub use self::imp::{Cached, CachedGuard};
#[cfg(feature = "perf-cache")]
mod imp {
use thread_local::CachedThreadLocal;
#[derive(Debug)]
pub struct Cached<T: Send>(CachedThreadLocal<T>);
#[derive(Debug)]
pub struct CachedGuard<'a, T: 'a>(&'a T);
impl<T: Send> Cached<T> {
pub fn new() -> Cached<T> {
Cached(CachedThreadLocal::new())
}
pub fn get_or(&self, create: impl FnOnce() -> T) -> CachedGuard<T> {
CachedGuard(self.0.get_or(|| create()))
}
}
impl<'a, T: Send> CachedGuard<'a, T> {
pub fn value(&self) -> &T {
self.0
}
}
}
#[cfg(not(feature = "perf-cache"))]
mod imp {
use std::marker::PhantomData;
use std::panic::UnwindSafe;
use std::sync::Mutex;
#[derive(Debug)]
pub struct Cached<T: Send> {
stack: Mutex<Vec<T>>,
/// When perf-cache is enabled, the thread_local crate is used, and
/// its CachedThreadLocal impls Send, Sync and UnwindSafe, but NOT
/// RefUnwindSafe. However, a Mutex impls RefUnwindSafe. So in order
/// to keep the APIs consistent regardless of whether perf-cache is
/// enabled, we force this type to NOT impl RefUnwindSafe too.
///
/// Ideally, we should always impl RefUnwindSafe, but it seems a little
/// tricky to do that right now.
///
/// See also: https://github.com/rust-lang/regex/issues/576
_phantom: PhantomData<Box<dyn Send + Sync + UnwindSafe>>,
}
#[derive(Debug)]
pub struct CachedGuard<'a, T: 'a + Send> {
cache: &'a Cached<T>,
value: Option<T>,
}
impl<T: Send> Cached<T> {
pub fn new() -> Cached<T> {
Cached { stack: Mutex::new(vec![]), _phantom: PhantomData }
}
pub fn get_or(&self, create: impl FnOnce() -> T) -> CachedGuard<T> {
let mut stack = self.stack.lock().unwrap();
match stack.pop() {
None => CachedGuard { cache: self, value: Some(create()) },
Some(value) => CachedGuard { cache: self, value: Some(value) },
}
}
fn put(&self, value: T) {
let mut stack = self.stack.lock().unwrap();
stack.push(value);
}
}
impl<'a, T: Send> CachedGuard<'a, T> {
pub fn value(&self) -> &T {
self.value.as_ref().unwrap()
}
}
impl<'a, T: Send> Drop for CachedGuard<'a, T> {
fn drop(&mut self) {
if let Some(value) = self.value.take() {
self.cache.put(value);
}
}
}
}

318
third_party/rust/regex/src/compile.rs поставляемый
Просмотреть файл

@ -1,20 +1,22 @@
use std::collections::HashMap;
use std::fmt;
use std::iter;
use std::result;
use std::sync::Arc;
use syntax::hir::{self, Hir};
use syntax::is_word_byte;
use syntax::utf8::{Utf8Range, Utf8Sequence, Utf8Sequences};
use regex_syntax::hir::{self, Hir};
use regex_syntax::is_word_byte;
use regex_syntax::utf8::{Utf8Range, Utf8Sequence, Utf8Sequences};
use prog::{
use crate::prog::{
EmptyLook, Inst, InstBytes, InstChar, InstEmptyLook, InstPtr, InstRanges,
InstSave, InstSplit, Program,
};
use Error;
use crate::Error;
type Result = result::Result<Patch, Error>;
type ResultOrEmpty = result::Result<Option<Patch>, Error>;
#[derive(Debug)]
struct Patch {
@ -24,6 +26,9 @@ struct Patch {
/// A compiler translates a regular expression AST to a sequence of
/// instructions. The sequence of instructions represents an NFA.
// `Compiler` is only public via the `internal` module, so avoid deriving
// `Debug`.
#[allow(missing_debug_implementations)]
pub struct Compiler {
insts: Vec<MaybeInst>,
compiled: Program,
@ -33,6 +38,7 @@ pub struct Compiler {
suffix_cache: SuffixCache,
utf8_seqs: Option<Utf8Sequences>,
byte_classes: ByteClassSet,
extra_inst_bytes: usize,
}
impl Compiler {
@ -49,6 +55,7 @@ impl Compiler {
suffix_cache: SuffixCache::new(1000),
utf8_seqs: Some(Utf8Sequences::new('\x00', '\x00')),
byte_classes: ByteClassSet::new(),
extra_inst_bytes: 0,
}
}
@ -132,7 +139,7 @@ impl Compiler {
self.compiled.start = dotstar_patch.entry;
}
self.compiled.captures = vec![None];
let patch = self.c_capture(0, expr)?;
let patch = self.c_capture(0, expr)?.unwrap_or(self.next_inst());
if self.compiled.needs_dotstar() {
self.fill(dotstar_patch.hole, patch.entry);
} else {
@ -167,14 +174,16 @@ impl Compiler {
for (i, expr) in exprs[0..exprs.len() - 1].iter().enumerate() {
self.fill_to_next(prev_hole);
let split = self.push_split_hole();
let Patch { hole, entry } = self.c_capture(0, expr)?;
let Patch { hole, entry } =
self.c_capture(0, expr)?.unwrap_or(self.next_inst());
self.fill_to_next(hole);
self.compiled.matches.push(self.insts.len());
self.push_compiled(Inst::Match(i));
prev_hole = self.fill_split(split, Some(entry), None);
}
let i = exprs.len() - 1;
let Patch { hole, entry } = self.c_capture(0, &exprs[i])?;
let Patch { hole, entry } =
self.c_capture(0, &exprs[i])?.unwrap_or(self.next_inst());
self.fill(prev_hole, entry);
self.fill_to_next(hole);
self.compiled.matches.push(self.insts.len());
@ -219,7 +228,7 @@ impl Compiler {
/// hole
/// ```
///
/// To compile two expressions, e1 and e2, concatinated together we
/// To compile two expressions, e1 and e2, concatenated together we
/// would do:
///
/// ```ignore
@ -242,13 +251,16 @@ impl Compiler {
/// method you will see that it does exactly this, though it handles
/// a list of expressions rather than just the two that we use for
/// an example.
fn c(&mut self, expr: &Hir) -> Result {
use prog;
use syntax::hir::HirKind::*;
///
/// Ok(None) is returned when an expression is compiled to no
/// instruction, and so no patch.entry value makes sense.
fn c(&mut self, expr: &Hir) -> ResultOrEmpty {
use crate::prog;
use regex_syntax::hir::HirKind::*;
self.check_size()?;
match *expr.kind() {
Empty => Ok(Patch { hole: Hole::None, entry: self.insts.len() }),
Empty => Ok(None),
Literal(hir::Literal::Unicode(c)) => self.c_char(c),
Literal(hir::Literal::Byte(b)) => {
assert!(self.compiled.uses_bytes());
@ -306,6 +318,13 @@ impl Compiler {
}
self.compiled.has_unicode_word_boundary = true;
self.byte_classes.set_word_boundary();
// We also make sure that all ASCII bytes are in a different
// class from non-ASCII bytes. Otherwise, it's possible for
// ASCII bytes to get lumped into the same class as non-ASCII
// bytes. This in turn may cause the lazy DFA to falsely start
// when it sees an ASCII byte that maps to a byte class with
// non-ASCII bytes. This ensures that never happens.
self.byte_classes.set_range(0, 0x7F);
self.c_empty_look(prog::EmptyLook::WordBoundary)
}
WordBoundary(hir::WordBoundary::UnicodeNegate) => {
@ -318,6 +337,8 @@ impl Compiler {
}
self.compiled.has_unicode_word_boundary = true;
self.byte_classes.set_word_boundary();
// See comments above for why we set the ASCII range here.
self.byte_classes.set_range(0, 0x7F);
self.c_empty_look(prog::EmptyLook::NotWordBoundary)
}
WordBoundary(hir::WordBoundary::Ascii) => {
@ -357,7 +378,7 @@ impl Compiler {
}
}
fn c_capture(&mut self, first_slot: usize, expr: &Hir) -> Result {
fn c_capture(&mut self, first_slot: usize, expr: &Hir) -> ResultOrEmpty {
if self.num_exprs > 1 || self.compiled.is_dfa {
// Don't ever compile Save instructions for regex sets because
// they are never used. They are also never used in DFA programs
@ -366,11 +387,11 @@ impl Compiler {
} else {
let entry = self.insts.len();
let hole = self.push_hole(InstHole::Save { slot: first_slot });
let patch = self.c(expr)?;
let patch = self.c(expr)?.unwrap_or(self.next_inst());
self.fill(hole, patch.entry);
self.fill_to_next(patch.hole);
let hole = self.push_hole(InstHole::Save { slot: first_slot + 1 });
Ok(Patch { hole: hole, entry: entry })
Ok(Some(Patch { hole: hole, entry: entry }))
}
}
@ -381,40 +402,62 @@ impl Compiler {
greedy: false,
hir: Box::new(Hir::any(true)),
}))?
.unwrap()
} else {
self.c(&Hir::repetition(hir::Repetition {
kind: hir::RepetitionKind::ZeroOrMore,
greedy: false,
hir: Box::new(Hir::any(false)),
}))?
.unwrap()
})
}
fn c_char(&mut self, c: char) -> Result {
self.c_class(&[hir::ClassUnicodeRange::new(c, c)])
fn c_char(&mut self, c: char) -> ResultOrEmpty {
if self.compiled.uses_bytes() {
if c.is_ascii() {
let b = c as u8;
let hole =
self.push_hole(InstHole::Bytes { start: b, end: b });
self.byte_classes.set_range(b, b);
Ok(Some(Patch { hole, entry: self.insts.len() - 1 }))
} else {
self.c_class(&[hir::ClassUnicodeRange::new(c, c)])
}
} else {
let hole = self.push_hole(InstHole::Char { c: c });
Ok(Some(Patch { hole, entry: self.insts.len() - 1 }))
}
}
fn c_class(&mut self, ranges: &[hir::ClassUnicodeRange]) -> Result {
fn c_class(&mut self, ranges: &[hir::ClassUnicodeRange]) -> ResultOrEmpty {
use std::mem::size_of;
assert!(!ranges.is_empty());
if self.compiled.uses_bytes() {
CompileClass { c: self, ranges: ranges }.compile()
Ok(Some(CompileClass { c: self, ranges: ranges }.compile()?))
} else {
let ranges: Vec<(char, char)> =
ranges.iter().map(|r| (r.start(), r.end())).collect();
let hole = if ranges.len() == 1 && ranges[0].0 == ranges[0].1 {
self.push_hole(InstHole::Char { c: ranges[0].0 })
} else {
self.extra_inst_bytes +=
ranges.len() * (size_of::<char>() * 2);
self.push_hole(InstHole::Ranges { ranges: ranges })
};
Ok(Patch { hole: hole, entry: self.insts.len() - 1 })
Ok(Some(Patch { hole: hole, entry: self.insts.len() - 1 }))
}
}
fn c_byte(&mut self, b: u8) -> Result {
fn c_byte(&mut self, b: u8) -> ResultOrEmpty {
self.c_class_bytes(&[hir::ClassBytesRange::new(b, b)])
}
fn c_class_bytes(&mut self, ranges: &[hir::ClassBytesRange]) -> Result {
fn c_class_bytes(
&mut self,
ranges: &[hir::ClassBytesRange],
) -> ResultOrEmpty {
debug_assert!(!ranges.is_empty());
let first_split_entry = self.insts.len();
@ -438,35 +481,39 @@ impl Compiler {
self.push_hole(InstHole::Bytes { start: r.start(), end: r.end() }),
);
self.fill(prev_hole, next);
Ok(Patch { hole: Hole::Many(holes), entry: first_split_entry })
Ok(Some(Patch { hole: Hole::Many(holes), entry: first_split_entry }))
}
fn c_empty_look(&mut self, look: EmptyLook) -> Result {
fn c_empty_look(&mut self, look: EmptyLook) -> ResultOrEmpty {
let hole = self.push_hole(InstHole::EmptyLook { look: look });
Ok(Patch { hole: hole, entry: self.insts.len() - 1 })
Ok(Some(Patch { hole: hole, entry: self.insts.len() - 1 }))
}
fn c_concat<'a, I>(&mut self, exprs: I) -> Result
fn c_concat<'a, I>(&mut self, exprs: I) -> ResultOrEmpty
where
I: IntoIterator<Item = &'a Hir>,
{
let mut exprs = exprs.into_iter();
let first = match exprs.next() {
Some(expr) => expr,
None => {
return Ok(Patch { hole: Hole::None, entry: self.insts.len() })
let Patch { mut hole, entry } = loop {
match exprs.next() {
None => return Ok(None),
Some(e) => {
if let Some(p) = self.c(e)? {
break p;
}
}
}
};
let Patch { mut hole, entry } = self.c(first)?;
for e in exprs {
let p = self.c(e)?;
self.fill(hole, p.entry);
hole = p.hole;
if let Some(p) = self.c(e)? {
self.fill(hole, p.entry);
hole = p.hole;
}
}
Ok(Patch { hole: hole, entry: entry })
Ok(Some(Patch { hole: hole, entry: entry }))
}
fn c_alternate(&mut self, exprs: &[Hir]) -> Result {
fn c_alternate(&mut self, exprs: &[Hir]) -> ResultOrEmpty {
debug_assert!(
exprs.len() >= 2,
"alternates must have at least 2 exprs"
@ -479,44 +526,44 @@ impl Compiler {
// patched to point to the same location.
let mut holes = vec![];
let mut prev_hole = Hole::None;
// true indicates that the hole is a split where we want to fill
// the second branch.
let mut prev_hole = (Hole::None, false);
for e in &exprs[0..exprs.len() - 1] {
self.fill_to_next(prev_hole);
let split = self.push_split_hole();
let prev_entry = self.insts.len();
let Patch { hole, entry } = self.c(e)?;
if prev_entry == self.insts.len() {
// TODO(burntsushi): It is kind of silly that we don't support
// empty-subexpressions in alternates, but it is supremely
// awkward to support them in the existing compiler
// infrastructure. This entire compiler needs to be thrown out
// anyway, so don't feel too bad.
return Err(Error::Syntax(
"alternations cannot currently contain \
empty sub-expressions"
.to_string(),
));
if prev_hole.1 {
let next = self.insts.len();
self.fill_split(prev_hole.0, None, Some(next));
} else {
self.fill_to_next(prev_hole.0);
}
let split = self.push_split_hole();
if let Some(Patch { hole, entry }) = self.c(e)? {
holes.push(hole);
prev_hole = (self.fill_split(split, Some(entry), None), false);
} else {
let (split1, split2) = split.dup_one();
holes.push(split1);
prev_hole = (split2, true);
}
}
if let Some(Patch { hole, entry }) = self.c(&exprs[exprs.len() - 1])? {
holes.push(hole);
prev_hole = self.fill_split(split, Some(entry), None);
if prev_hole.1 {
self.fill_split(prev_hole.0, None, Some(entry));
} else {
self.fill(prev_hole.0, entry);
}
} else {
// We ignore prev_hole.1. When it's true, it means we have two
// empty branches both pushing prev_hole.0 into holes, so both
// branches will go to the same place anyway.
holes.push(prev_hole.0);
}
let prev_entry = self.insts.len();
let Patch { hole, entry } = self.c(&exprs[exprs.len() - 1])?;
if prev_entry == self.insts.len() {
// TODO(burntsushi): See TODO above.
return Err(Error::Syntax(
"alternations cannot currently contain \
empty sub-expressions"
.to_string(),
));
}
holes.push(hole);
self.fill(prev_hole, entry);
Ok(Patch { hole: Hole::Many(holes), entry: first_split_entry })
Ok(Some(Patch { hole: Hole::Many(holes), entry: first_split_entry }))
}
fn c_repeat(&mut self, rep: &hir::Repetition) -> Result {
use syntax::hir::RepetitionKind::*;
fn c_repeat(&mut self, rep: &hir::Repetition) -> ResultOrEmpty {
use regex_syntax::hir::RepetitionKind::*;
match rep.kind {
ZeroOrOne => self.c_repeat_zero_or_one(&rep.hir, rep.greedy),
ZeroOrMore => self.c_repeat_zero_or_more(&rep.hir, rep.greedy),
@ -533,24 +580,37 @@ impl Compiler {
}
}
fn c_repeat_zero_or_one(&mut self, expr: &Hir, greedy: bool) -> Result {
fn c_repeat_zero_or_one(
&mut self,
expr: &Hir,
greedy: bool,
) -> ResultOrEmpty {
let split_entry = self.insts.len();
let split = self.push_split_hole();
let Patch { hole: hole_rep, entry: entry_rep } = self.c(expr)?;
let Patch { hole: hole_rep, entry: entry_rep } = match self.c(expr)? {
Some(p) => p,
None => return self.pop_split_hole(),
};
let split_hole = if greedy {
self.fill_split(split, Some(entry_rep), None)
} else {
self.fill_split(split, None, Some(entry_rep))
};
let holes = vec![hole_rep, split_hole];
Ok(Patch { hole: Hole::Many(holes), entry: split_entry })
Ok(Some(Patch { hole: Hole::Many(holes), entry: split_entry }))
}
fn c_repeat_zero_or_more(&mut self, expr: &Hir, greedy: bool) -> Result {
fn c_repeat_zero_or_more(
&mut self,
expr: &Hir,
greedy: bool,
) -> ResultOrEmpty {
let split_entry = self.insts.len();
let split = self.push_split_hole();
let Patch { hole: hole_rep, entry: entry_rep } = self.c(expr)?;
let Patch { hole: hole_rep, entry: entry_rep } = match self.c(expr)? {
Some(p) => p,
None => return self.pop_split_hole(),
};
self.fill(hole_rep, split_entry);
let split_hole = if greedy {
@ -558,11 +618,18 @@ impl Compiler {
} else {
self.fill_split(split, None, Some(entry_rep))
};
Ok(Patch { hole: split_hole, entry: split_entry })
Ok(Some(Patch { hole: split_hole, entry: split_entry }))
}
fn c_repeat_one_or_more(&mut self, expr: &Hir, greedy: bool) -> Result {
let Patch { hole: hole_rep, entry: entry_rep } = self.c(expr)?;
fn c_repeat_one_or_more(
&mut self,
expr: &Hir,
greedy: bool,
) -> ResultOrEmpty {
let Patch { hole: hole_rep, entry: entry_rep } = match self.c(expr)? {
Some(p) => p,
None => return Ok(None),
};
self.fill_to_next(hole_rep);
let split = self.push_split_hole();
@ -571,7 +638,7 @@ impl Compiler {
} else {
self.fill_split(split, None, Some(entry_rep))
};
Ok(Patch { hole: split_hole, entry: entry_rep })
Ok(Some(Patch { hole: split_hole, entry: entry_rep }))
}
fn c_repeat_range_min_or_more(
@ -579,12 +646,20 @@ impl Compiler {
expr: &Hir,
greedy: bool,
min: u32,
) -> Result {
) -> ResultOrEmpty {
let min = u32_to_usize(min);
let patch_concat = self.c_concat(iter::repeat(expr).take(min))?;
let patch_rep = self.c_repeat_zero_or_more(expr, greedy)?;
self.fill(patch_concat.hole, patch_rep.entry);
Ok(Patch { hole: patch_rep.hole, entry: patch_concat.entry })
// Using next_inst() is ok, because we can't return it (concat would
// have to return Some(_) while c_repeat_range_min_or_more returns
// None).
let patch_concat = self
.c_concat(iter::repeat(expr).take(min))?
.unwrap_or(self.next_inst());
if let Some(patch_rep) = self.c_repeat_zero_or_more(expr, greedy)? {
self.fill(patch_concat.hole, patch_rep.entry);
Ok(Some(Patch { hole: patch_rep.hole, entry: patch_concat.entry }))
} else {
Ok(None)
}
}
fn c_repeat_range(
@ -593,13 +668,17 @@ impl Compiler {
greedy: bool,
min: u32,
max: u32,
) -> Result {
) -> ResultOrEmpty {
let (min, max) = (u32_to_usize(min), u32_to_usize(max));
debug_assert!(min <= max);
let patch_concat = self.c_concat(iter::repeat(expr).take(min))?;
let initial_entry = patch_concat.entry;
if min == max {
return Ok(patch_concat);
}
// Same reasoning as in c_repeat_range_min_or_more (we know that min <
// max at this point).
let patch_concat = patch_concat.unwrap_or(self.next_inst());
let initial_entry = patch_concat.entry;
// It is much simpler to compile, e.g., `a{2,5}` as:
//
// aaa?a?a?
@ -624,7 +703,10 @@ impl Compiler {
for _ in min..max {
self.fill_to_next(prev_hole);
let split = self.push_split_hole();
let Patch { hole, entry } = self.c(expr)?;
let Patch { hole, entry } = match self.c(expr)? {
Some(p) => p,
None => return self.pop_split_hole(),
};
prev_hole = hole;
if greedy {
holes.push(self.fill_split(split, Some(entry), None));
@ -633,7 +715,14 @@ impl Compiler {
}
}
holes.push(prev_hole);
Ok(Patch { hole: Hole::Many(holes), entry: initial_entry })
Ok(Some(Patch { hole: Hole::Many(holes), entry: initial_entry }))
}
/// Can be used as a default value for the c_* functions when the call to
/// c_function is followed by inserting at least one instruction that is
/// always executed after the ones written by the c* function.
fn next_inst(&self) -> Patch {
Patch { hole: Hole::None, entry: self.insts.len() }
}
fn fill(&mut self, hole: Hole, goto: InstPtr) {
@ -713,10 +802,17 @@ impl Compiler {
Hole::One(hole)
}
fn pop_split_hole(&mut self) -> ResultOrEmpty {
self.insts.pop();
Ok(None)
}
fn check_size(&self) -> result::Result<(), Error> {
use std::mem::size_of;
if self.insts.len() * size_of::<Inst>() > self.size_limit {
let size =
self.extra_inst_bytes + (self.insts.len() * size_of::<Inst>());
if size > self.size_limit {
Err(Error::CompiledTooBig(self.size_limit))
} else {
Ok(())
@ -731,6 +827,17 @@ enum Hole {
Many(Vec<Hole>),
}
impl Hole {
fn dup_one(self) -> (Self, Self) {
match self {
Hole::One(pc) => (Hole::One(pc), Hole::One(pc)),
Hole::None | Hole::Many(_) => {
unreachable!("must be called on single hole")
}
}
}
}
#[derive(Clone, Debug)]
enum MaybeInst {
Compiled(Inst),
@ -742,13 +849,22 @@ enum MaybeInst {
impl MaybeInst {
fn fill(&mut self, goto: InstPtr) {
let filled = match *self {
MaybeInst::Uncompiled(ref inst) => inst.fill(goto),
let maybeinst = match *self {
MaybeInst::Split => MaybeInst::Split1(goto),
MaybeInst::Uncompiled(ref inst) => {
MaybeInst::Compiled(inst.fill(goto))
}
MaybeInst::Split1(goto1) => {
Inst::Split(InstSplit { goto1: goto1, goto2: goto })
MaybeInst::Compiled(Inst::Split(InstSplit {
goto1: goto1,
goto2: goto,
}))
}
MaybeInst::Split2(goto2) => {
Inst::Split(InstSplit { goto1: goto, goto2: goto2 })
MaybeInst::Compiled(Inst::Split(InstSplit {
goto1: goto,
goto2: goto2,
}))
}
_ => unreachable!(
"not all instructions were compiled! \
@ -756,7 +872,7 @@ impl MaybeInst {
self
),
};
*self = MaybeInst::Compiled(filled);
*self = maybeinst;
}
fn fill_split(&mut self, goto1: InstPtr, goto2: InstPtr) {
@ -828,9 +944,10 @@ impl InstHole {
Inst::EmptyLook(InstEmptyLook { goto: goto, look: look })
}
InstHole::Char { c } => Inst::Char(InstChar { goto: goto, c: c }),
InstHole::Ranges { ref ranges } => {
Inst::Ranges(InstRanges { goto: goto, ranges: ranges.clone() })
}
InstHole::Ranges { ref ranges } => Inst::Ranges(InstRanges {
goto: goto,
ranges: ranges.clone().into_boxed_slice(),
}),
InstHole::Bytes { start, end } => {
Inst::Bytes(InstBytes { goto: goto, start: start, end: end })
}
@ -956,6 +1073,7 @@ impl<'a, 'b> CompileClass<'a, 'b> {
/// This uses similar idea to [`SparseSet`](../sparse/struct.SparseSet.html),
/// except it uses hashes as original indices and then compares full keys for
/// validation against `dense` array.
#[derive(Debug)]
struct SuffixCache {
sparse: Box<[usize]>,
dense: Vec<SuffixCacheEntry>,
@ -1064,6 +1182,12 @@ impl ByteClassSet {
}
}
impl fmt::Debug for ByteClassSet {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_tuple("ByteClassSet").field(&&self.0[..]).finish()
}
}
fn u32_to_usize(n: u32) -> usize {
// In case usize is less than 32 bits, we need to guard against overflow.
// On most platforms this compiles to nothing.

59
third_party/rust/regex/src/dfa.rs поставляемый
Просмотреть файл

@ -42,9 +42,9 @@ use std::iter::repeat;
use std::mem;
use std::sync::Arc;
use exec::ProgramCache;
use prog::{Inst, Program};
use sparse::SparseSet;
use crate::exec::ProgramCache;
use crate::prog::{Inst, Program};
use crate::sparse::SparseSet;
/// Return true if and only if the given program can be executed by a DFA.
///
@ -55,7 +55,7 @@ use sparse::SparseSet;
/// This function will also return false if the given program has any Unicode
/// instructions (Char or Ranges) since the DFA operates on bytes only.
pub fn can_exec(insts: &Program) -> bool {
use prog::Inst::*;
use crate::prog::Inst::*;
// If for some reason we manage to allocate a regex program with more
// than i32::MAX instructions, then we can't execute the DFA because we
// use 32 bit instruction pointer deltas for memory savings.
@ -306,7 +306,7 @@ impl State {
StateFlags(self.data[0])
}
fn inst_ptrs(&self) -> InstPtrs {
fn inst_ptrs(&self) -> InstPtrs<'_> {
InstPtrs { base: 0, data: &self.data[1..] }
}
}
@ -679,7 +679,7 @@ impl<'a> Fsm<'a> {
}
} else if next_si & STATE_START > 0 {
// A start state isn't in the common case because we may
// what to do quick prefix scanning. If the program doesn't
// want to do quick prefix scanning. If the program doesn't
// have a detected prefix, then start states are actually
// considered common and this case is never reached.
debug_assert!(self.has_prefix());
@ -725,7 +725,7 @@ impl<'a> Fsm<'a> {
}
}
// Run the DFA once more on the special EOF senitnel value.
// Run the DFA once more on the special EOF sentinel value.
// We don't care about the special bits in the state pointer any more,
// so get rid of them.
prev_si &= STATE_MAX;
@ -830,7 +830,7 @@ impl<'a> Fsm<'a> {
}
}
// Run the DFA once more on the special EOF senitnel value.
// Run the DFA once more on the special EOF sentinel value.
prev_si = match self.next_state(qcur, qnext, prev_si, Byte::eof()) {
None => return Result::Quit,
Some(STATE_DEAD) => return result.set_non_match(0),
@ -848,7 +848,7 @@ impl<'a> Fsm<'a> {
/// next_si transitions to the next state, where the transition input
/// corresponds to text[i].
///
/// This elides bounds checks, and is therefore unsafe.
/// This elides bounds checks, and is therefore not safe.
#[cfg_attr(feature = "perf-inline", inline(always))]
unsafe fn next_si(&self, si: StatePtr, text: &[u8], i: usize) -> StatePtr {
// What is the argument for safety here?
@ -894,7 +894,7 @@ impl<'a> Fsm<'a> {
mut si: StatePtr,
b: Byte,
) -> Option<StatePtr> {
use prog::Inst::*;
use crate::prog::Inst::*;
// Initialize a queue with the current DFA state's NFA states.
qcur.clear();
@ -913,8 +913,8 @@ impl<'a> Fsm<'a> {
if self.state(si).flags().has_empty() {
// Compute the flags immediately preceding the current byte.
// This means we only care about the "end" or "end line" flags.
// (The "start" flags are computed immediately proceding the
// current byte and is handled below.)
// (The "start" flags are computed immediately following the
// current byte and are handled below.)
let mut flags = EmptyFlags::default();
if b.is_eof() {
flags.end = true;
@ -1048,7 +1048,7 @@ impl<'a> Fsm<'a> {
///
/// If matching starts after the beginning of the input, then only start
/// line should be set if the preceding byte is `\n`. End line should never
/// be set in this case. (Even if the proceding byte is a `\n`, it will
/// be set in this case. (Even if the following byte is a `\n`, it will
/// be handled in a subsequent DFA state.)
fn follow_epsilons(
&mut self,
@ -1056,8 +1056,8 @@ impl<'a> Fsm<'a> {
q: &mut SparseSet,
flags: EmptyFlags,
) {
use prog::EmptyLook::*;
use prog::Inst::*;
use crate::prog::EmptyLook::*;
use crate::prog::Inst::*;
// We need to traverse the NFA to follow epsilon transitions, so avoid
// recursion with an explicit stack.
@ -1190,7 +1190,7 @@ impl<'a> Fsm<'a> {
q: &SparseSet,
state_flags: &mut StateFlags,
) -> Option<State> {
use prog::Inst::*;
use crate::prog::Inst::*;
// We need to build up enough information to recognize pre-built states
// in the DFA. Generally speaking, this includes every instruction
@ -1688,7 +1688,7 @@ impl Transitions {
self.num_byte_classes * mem::size_of::<StatePtr>()
}
/// Like `next`, but uses unchecked access and is therefore unsafe.
/// Like `next`, but uses unchecked access and is therefore not safe.
unsafe fn next_unchecked(&self, si: StatePtr, cls: usize) -> StatePtr {
debug_assert!((si as usize) < self.table.len());
debug_assert!(cls < self.num_byte_classes);
@ -1754,7 +1754,7 @@ impl Byte {
}
impl fmt::Debug for State {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let ips: Vec<usize> = self.inst_ptrs().collect();
f.debug_struct("State")
.field("flags", &self.flags())
@ -1764,7 +1764,7 @@ impl fmt::Debug for State {
}
impl fmt::Debug for Transitions {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut fmtd = f.debug_map();
for si in 0..self.num_states() {
let s = si * self.num_byte_classes;
@ -1778,7 +1778,7 @@ impl fmt::Debug for Transitions {
struct TransitionsRow<'a>(&'a [StatePtr]);
impl<'a> fmt::Debug for TransitionsRow<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut fmtd = f.debug_map();
for (b, si) in self.0.iter().enumerate() {
match *si {
@ -1796,7 +1796,7 @@ impl<'a> fmt::Debug for TransitionsRow<'a> {
}
impl fmt::Debug for StateFlags {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("StateFlags")
.field("is_match", &self.is_match())
.field("is_word", &self.is_word())
@ -1889,18 +1889,27 @@ fn read_varu32(data: &[u8]) -> (u32, usize) {
#[cfg(test)]
mod tests {
extern crate rand;
use super::{
push_inst_ptr, read_vari32, read_varu32, write_vari32, write_varu32,
State, StateFlags,
};
use quickcheck::{quickcheck, QuickCheck, StdGen};
use quickcheck::{quickcheck, Gen, QuickCheck};
use std::sync::Arc;
#[test]
fn prop_state_encode_decode() {
fn p(ips: Vec<u32>, flags: u8) -> bool {
fn p(mut ips: Vec<u32>, flags: u8) -> bool {
// It looks like our encoding scheme can't handle instruction
// pointers at or above 2**31. We should fix that, but it seems
// unlikely to occur in real code due to the amount of memory
// required for such a state machine. So for now, we just clamp
// our test data.
for ip in &mut ips {
if *ip >= 1 << 31 {
*ip = (1 << 31) - 1;
}
}
let mut data = vec![flags];
let mut prev = 0;
for &ip in ips.iter() {
@ -1914,7 +1923,7 @@ mod tests {
expected == got && state.flags() == StateFlags(flags)
}
QuickCheck::new()
.gen(StdGen::new(self::rand::thread_rng(), 10_000))
.gen(Gen::new(10_000))
.quickcheck(p as fn(Vec<u32>, u8) -> bool);
}

4
third_party/rust/regex/src/error.rs поставляемый
Просмотреть файл

@ -31,7 +31,7 @@ impl ::std::error::Error for Error {
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {
Error::Syntax(ref err) => err.fmt(f),
Error::CompiledTooBig(limit) => write!(
@ -49,7 +49,7 @@ impl fmt::Display for Error {
// but the `Syntax` variant is already storing a `String` anyway, so we might
// as well format it nicely.
impl fmt::Debug for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {
Error::Syntax(ref err) => {
let hr: String = repeat('~').take(79).collect();

103
third_party/rust/regex/src/exec.rs поставляемый
Просмотреть файл

@ -1,40 +1,49 @@
use std::cell::RefCell;
use std::collections::HashMap;
use std::panic::AssertUnwindSafe;
use std::sync::Arc;
#[cfg(feature = "perf-literal")]
use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
use syntax::hir::literal::Literals;
use syntax::hir::Hir;
use syntax::ParserBuilder;
use regex_syntax::hir::literal::Literals;
use regex_syntax::hir::Hir;
use regex_syntax::ParserBuilder;
use backtrack;
use cache::{Cached, CachedGuard};
use compile::Compiler;
use crate::backtrack;
use crate::compile::Compiler;
#[cfg(feature = "perf-dfa")]
use dfa;
use error::Error;
use input::{ByteInput, CharInput};
use literal::LiteralSearcher;
use pikevm;
use prog::Program;
use re_builder::RegexOptions;
use re_bytes;
use re_set;
use re_trait::{Locations, RegularExpression, Slot};
use re_unicode;
use utf8::next_utf8;
use crate::dfa;
use crate::error::Error;
use crate::input::{ByteInput, CharInput};
use crate::literal::LiteralSearcher;
use crate::pikevm;
use crate::pool::{Pool, PoolGuard};
use crate::prog::Program;
use crate::re_builder::RegexOptions;
use crate::re_bytes;
use crate::re_set;
use crate::re_trait::{Locations, RegularExpression, Slot};
use crate::re_unicode;
use crate::utf8::next_utf8;
/// `Exec` manages the execution of a regular expression.
///
/// In particular, this manages the various compiled forms of a single regular
/// expression and the choice of which matching engine to use to execute a
/// regular expression.
#[derive(Debug)]
pub struct Exec {
/// All read only state.
ro: Arc<ExecReadOnly>,
/// Caches for the various matching engines.
cache: Cached<ProgramCache>,
/// A pool of reusable values for the various matching engines.
///
/// Note that boxing this value is not strictly necessary, but it is an
/// easy way to ensure that T does not bloat the stack sized used by a pool
/// in the case where T is big. And this turns out to be the case at the
/// time of writing for regex's use of this pool. At the time of writing,
/// the size of a Regex on the stack is 856 bytes. Boxing this value
/// reduces that size to 16 bytes.
pool: Box<Pool<ProgramCache>>,
}
/// `ExecNoSync` is like `Exec`, except it embeds a reference to a cache. This
@ -45,10 +54,11 @@ pub struct ExecNoSync<'c> {
/// All read only state.
ro: &'c Arc<ExecReadOnly>,
/// Caches for the various matching engines.
cache: CachedGuard<'c, ProgramCache>,
cache: PoolGuard<'c, ProgramCache>,
}
/// `ExecNoSyncStr` is like `ExecNoSync`, but matches on &str instead of &[u8].
#[derive(Debug)]
pub struct ExecNoSyncStr<'c>(ExecNoSync<'c>);
/// `ExecReadOnly` comprises all read only state for a regex. Namely, all such
@ -97,6 +107,9 @@ struct ExecReadOnly {
/// Facilitates the construction of an executor by exposing various knobs
/// to control how a regex is executed and what kinds of resources it's
/// permitted to use.
// `ExecBuilder` is only public via the `internal` module, so avoid deriving
// `Debug`.
#[allow(missing_debug_implementations)]
pub struct ExecBuilder {
options: RegexOptions,
match_type: Option<MatchType>,
@ -127,7 +140,7 @@ impl ExecBuilder {
///
/// Note that when compiling 2 or more regular expressions, capture groups
/// are completely unsupported. (This means both `find` and `captures`
/// wont work.)
/// won't work.)
pub fn new_many<I, S>(res: I) -> Self
where
S: AsRef<str>,
@ -297,7 +310,8 @@ impl ExecBuilder {
ac: None,
match_type: MatchType::Nothing,
});
return Ok(Exec { ro: ro, cache: Cached::new() });
let pool = ExecReadOnly::new_pool(&ro);
return Ok(Exec { ro: ro, pool });
}
let parsed = self.parse()?;
let mut nfa = Compiler::new()
@ -337,7 +351,8 @@ impl ExecBuilder {
ro.match_type = ro.choose_match_type(self.match_type);
let ro = Arc::new(ro);
Ok(Exec { ro: ro, cache: Cached::new() })
let pool = ExecReadOnly::new_pool(&ro);
Ok(Exec { ro, pool })
}
#[cfg(feature = "perf-literal")]
@ -358,9 +373,6 @@ impl ExecBuilder {
AhoCorasickBuilder::new()
.match_kind(MatchKind::LeftmostFirst)
.auto_configure(&lits)
// We always want this to reduce size, regardless
// of what auto-configure does.
.byte_classes(true)
.build_with_size::<u32, _, _>(&lits)
// This should never happen because we'd long exceed the
// compilation limit for regexes first.
@ -724,7 +736,7 @@ impl<'c> ExecNoSync<'c> {
text: &[u8],
start: usize,
) -> dfa::Result<(usize, usize)> {
use dfa::Result::*;
use crate::dfa::Result::*;
let end = match dfa::Fsm::forward(
&self.ro.dfa,
self.cache.value(),
@ -764,7 +776,7 @@ impl<'c> ExecNoSync<'c> {
text: &[u8],
start: usize,
) -> dfa::Result<(usize, usize)> {
use dfa::Result::*;
use crate::dfa::Result::*;
match dfa::Fsm::reverse(
&self.ro.dfa_reverse,
self.cache.value(),
@ -820,7 +832,7 @@ impl<'c> ExecNoSync<'c> {
text: &[u8],
original_start: usize,
) -> Option<dfa::Result<(usize, usize)>> {
use dfa::Result::*;
use crate::dfa::Result::*;
let lcs = self.ro.suffixes.lcs();
debug_assert!(lcs.len() >= 1);
@ -865,7 +877,7 @@ impl<'c> ExecNoSync<'c> {
text: &[u8],
start: usize,
) -> dfa::Result<(usize, usize)> {
use dfa::Result::*;
use crate::dfa::Result::*;
let match_start = match self.exec_dfa_reverse_suffix(text, start) {
None => return self.find_dfa_forward(text, start),
@ -1248,17 +1260,16 @@ impl<'c> ExecNoSyncStr<'c> {
impl Exec {
/// Get a searcher that isn't Sync.
#[cfg_attr(feature = "perf-inline", inline(always))]
pub fn searcher(&self) -> ExecNoSync {
let create = || RefCell::new(ProgramCacheInner::new(&self.ro));
pub fn searcher(&self) -> ExecNoSync<'_> {
ExecNoSync {
ro: &self.ro, // a clone is too expensive here! (and not needed)
cache: self.cache.get_or(create),
cache: self.pool.get(),
}
}
/// Get a searcher that isn't Sync and can match on &str.
#[cfg_attr(feature = "perf-inline", inline(always))]
pub fn searcher_str(&self) -> ExecNoSyncStr {
pub fn searcher_str(&self) -> ExecNoSyncStr<'_> {
ExecNoSyncStr(self.searcher())
}
@ -1304,7 +1315,8 @@ impl Exec {
impl Clone for Exec {
fn clone(&self) -> Exec {
Exec { ro: self.ro.clone(), cache: Cached::new() }
let pool = ExecReadOnly::new_pool(&self.ro);
Exec { ro: self.ro.clone(), pool }
}
}
@ -1437,6 +1449,13 @@ impl ExecReadOnly {
let lcs_len = self.suffixes.lcs().char_len();
lcs_len >= 3 && lcs_len > self.dfa.prefixes.lcp().char_len()
}
fn new_pool(ro: &Arc<ExecReadOnly>) -> Box<Pool<ProgramCache>> {
let ro = ro.clone();
Box::new(Pool::new(Box::new(move || {
AssertUnwindSafe(RefCell::new(ProgramCacheInner::new(&ro)))
})))
}
}
#[derive(Clone, Copy, Debug)]
@ -1495,7 +1514,11 @@ enum MatchNfaType {
/// `ProgramCache` maintains reusable allocations for each matching engine
/// available to a particular program.
pub type ProgramCache = RefCell<ProgramCacheInner>;
///
/// We declare this as unwind safe since it's a cache that's only used for
/// performance purposes. If a panic occurs, it is (or should be) always safe
/// to continue using the same regex object.
pub type ProgramCache = AssertUnwindSafe<RefCell<ProgramCacheInner>>;
#[derive(Debug)]
pub struct ProgramCacheInner {
@ -1524,7 +1547,7 @@ impl ProgramCacheInner {
/// literals, and if so, returns them. Otherwise, this returns None.
#[cfg(feature = "perf-literal")]
fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
use syntax::hir::{HirKind, Literal};
use regex_syntax::hir::{HirKind, Literal};
// This is pretty hacky, but basically, if `is_alternation_literal` is
// true, then we can make several assumptions about the structure of our
@ -1576,7 +1599,7 @@ fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
mod test {
#[test]
fn uppercut_s_backtracking_bytes_default_bytes_mismatch() {
use internal::ExecBuilder;
use crate::internal::ExecBuilder;
let backtrack_bytes_re = ExecBuilder::new("^S")
.bounded_backtracking()
@ -1604,7 +1627,7 @@ mod test {
#[test]
fn unicode_lit_star_backtracking_utf8bytes_default_utf8bytes_mismatch() {
use internal::ExecBuilder;
use crate::internal::ExecBuilder;
let backtrack_bytes_re = ExecBuilder::new(r"^(?u:\*)")
.bounded_backtracking()

63
third_party/rust/regex/src/expand.rs поставляемый
Просмотреть файл

@ -1,12 +1,12 @@
use std::str;
use find_byte::find_byte;
use crate::find_byte::find_byte;
use re_bytes;
use re_unicode;
use crate::re_bytes;
use crate::re_unicode;
pub fn expand_str(
caps: &re_unicode::Captures,
caps: &re_unicode::Captures<'_>,
mut replacement: &str,
dst: &mut String,
) {
@ -24,7 +24,7 @@ pub fn expand_str(
continue;
}
debug_assert!(!replacement.is_empty());
let cap_ref = match find_cap_ref(replacement) {
let cap_ref = match find_cap_ref(replacement.as_bytes()) {
Some(cap_ref) => cap_ref,
None => {
dst.push_str("$");
@ -48,7 +48,7 @@ pub fn expand_str(
}
pub fn expand_bytes(
caps: &re_bytes::Captures,
caps: &re_bytes::Captures<'_>,
mut replacement: &[u8],
dst: &mut Vec<u8>,
) {
@ -125,19 +125,15 @@ impl From<usize> for Ref<'static> {
/// starting at the beginning of `replacement`.
///
/// If no such valid reference could be found, None is returned.
fn find_cap_ref<T: ?Sized + AsRef<[u8]>>(
replacement: &T,
) -> Option<CaptureRef> {
fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef<'_>> {
let mut i = 0;
let rep: &[u8] = replacement.as_ref();
if rep.len() <= 1 || rep[0] != b'$' {
return None;
}
let mut brace = false;
i += 1;
if rep[i] == b'{' {
brace = true;
i += 1;
return find_cap_ref_braced(rep, i + 1);
}
let mut cap_end = i;
while rep.get(cap_end).map_or(false, is_valid_cap_letter) {
@ -148,15 +144,10 @@ fn find_cap_ref<T: ?Sized + AsRef<[u8]>>(
}
// We just verified that the range 0..cap_end is valid ASCII, so it must
// therefore be valid UTF-8. If we really cared, we could avoid this UTF-8
// check with either unsafe or by parsing the number straight from &[u8].
// check via an unchecked conversion or by parsing the number straight from
// &[u8].
let cap =
str::from_utf8(&rep[i..cap_end]).expect("valid UTF-8 capture name");
if brace {
if !rep.get(cap_end).map_or(false, |&b| b == b'}') {
return None;
}
cap_end += 1;
}
Some(CaptureRef {
cap: match cap.parse::<u32>() {
Ok(i) => Ref::Number(i as usize),
@ -166,6 +157,31 @@ fn find_cap_ref<T: ?Sized + AsRef<[u8]>>(
})
}
fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option<CaptureRef<'_>> {
let start = i;
while rep.get(i).map_or(false, |&b| b != b'}') {
i += 1;
}
if !rep.get(i).map_or(false, |&b| b == b'}') {
return None;
}
// When looking at braced names, we don't put any restrictions on the name,
// so it's possible it could be invalid UTF-8. But a capture group name
// can never be invalid UTF-8, so if we have invalid UTF-8, then we can
// safely return None.
let cap = match str::from_utf8(&rep[start..i]) {
Err(_) => return None,
Ok(cap) => cap,
};
Some(CaptureRef {
cap: match cap.parse::<u32>() {
Ok(i) => Ref::Number(i as usize),
Err(_) => Ref::Named(cap),
},
end: i + 1,
})
}
/// Returns true if and only if the given byte is allowed in a capture name.
fn is_valid_cap_letter(b: &u8) -> bool {
match *b {
@ -182,13 +198,13 @@ mod tests {
($name:ident, $text:expr) => {
#[test]
fn $name() {
assert_eq!(None, find_cap_ref($text));
assert_eq!(None, find_cap_ref($text.as_bytes()));
}
};
($name:ident, $text:expr, $capref:expr) => {
#[test]
fn $name() {
assert_eq!(Some($capref), find_cap_ref($text));
assert_eq!(Some($capref), find_cap_ref($text.as_bytes()));
}
};
}
@ -204,7 +220,8 @@ mod tests {
find!(find_cap_ref3, "$0", c!(0, 2));
find!(find_cap_ref4, "$5", c!(5, 2));
find!(find_cap_ref5, "$10", c!(10, 3));
// see https://github.com/rust-lang/regex/pull/585 for more on characters following numbers
// See https://github.com/rust-lang/regex/pull/585
// for more on characters following numbers
find!(find_cap_ref6, "$42a", c!("42a", 4));
find!(find_cap_ref7, "${42}a", c!(42, 5));
find!(find_cap_ref8, "${42");
@ -217,4 +234,6 @@ mod tests {
find!(find_cap_ref15, "$1_$2", c!("1_", 3));
find!(find_cap_ref16, "$x-$y", c!("x", 2));
find!(find_cap_ref17, "$x_$y", c!("x_", 3));
find!(find_cap_ref18, "${#}", c!("#", 4));
find!(find_cap_ref19, "${Z[}", c!("Z[", 5));
}

18
third_party/rust/regex/src/input.rs поставляемый
Просмотреть файл

@ -4,11 +4,9 @@ use std::fmt;
use std::ops;
use std::u32;
use syntax;
use literal::LiteralSearcher;
use prog::InstEmptyLook;
use utf8::{decode_last_utf8, decode_utf8};
use crate::literal::LiteralSearcher;
use crate::prog::InstEmptyLook;
use crate::utf8::{decode_last_utf8, decode_utf8};
/// Represents a location in the input.
#[derive(Clone, Copy, Debug)]
@ -175,7 +173,7 @@ impl<'t> Input for CharInput<'t> {
}
fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool {
use prog::EmptyLook::*;
use crate::prog::EmptyLook::*;
match empty.look {
StartLine => {
let c = self.previous_char(at);
@ -268,7 +266,7 @@ impl<'t> Input for ByteInput<'t> {
}
fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool {
use prog::EmptyLook::*;
use crate::prog::EmptyLook::*;
match empty.look {
StartLine => {
let c = self.previous_char(at);
@ -348,7 +346,7 @@ impl<'t> Input for ByteInput<'t> {
pub struct Char(u32);
impl fmt::Debug for Char {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match char::from_u32(self.0) {
None => write!(f, "Empty"),
Some(c) => write!(f, "{:?}", c),
@ -379,7 +377,7 @@ impl Char {
// available. However, our compiler ensures that if a Unicode word
// boundary is used, then the data must also be available. If it isn't,
// then the compiler returns an error.
char::from_u32(self.0).map_or(false, syntax::is_word_character)
char::from_u32(self.0).map_or(false, regex_syntax::is_word_character)
}
/// Returns true iff the byte is a word byte.
@ -387,7 +385,7 @@ impl Char {
/// If the byte is absent, then false is returned.
pub fn is_word_byte(self) -> bool {
match char::from_u32(self.0) {
Some(c) if c <= '\u{7F}' => syntax::is_word_byte(c as u8),
Some(c) if c <= '\u{7F}' => regex_syntax::is_word_byte(c as u8),
None | Some(_) => false,
}
}

106
third_party/rust/regex/src/lib.rs поставляемый
Просмотреть файл

@ -22,12 +22,6 @@ used by adding `regex` to your dependencies in your project's `Cargo.toml`.
regex = "1"
```
If you're using Rust 2015, then you'll also need to add it to your crate root:
```rust
extern crate regex;
```
# Example: find a date
General use of regular expressions in this package involves compiling an
@ -68,9 +62,7 @@ regular expressions are compiled exactly once.
For example:
```rust
#[macro_use] extern crate lazy_static;
extern crate regex;
use lazy_static::lazy_static;
use regex::Regex;
fn some_helper_function(text: &str) -> bool {
@ -94,7 +86,7 @@ matches. For example, to find all dates in a string and be able to access
them by their component pieces:
```rust
# extern crate regex; use regex::Regex;
# use regex::Regex;
# fn main() {
let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap();
let text = "2012-03-14, 2013-01-01 and 2014-07-05";
@ -119,7 +111,7 @@ clearer, we can *name* our capture groups and use those names as variables
in our replacement text:
```rust
# extern crate regex; use regex::Regex;
# use regex::Regex;
# fn main() {
let re = Regex::new(r"(?P<y>\d{4})-(?P<m>\d{2})-(?P<d>\d{2})").unwrap();
let before = "2012-03-14, 2013-01-01 and 2014-07-05";
@ -136,7 +128,7 @@ Note that if your regex gets complicated, you can use the `x` flag to
enable insignificant whitespace mode, which also lets you write comments:
```rust
# extern crate regex; use regex::Regex;
# use regex::Regex;
# fn main() {
let re = Regex::new(r"(?x)
(?P<y>\d{4}) # the year
@ -152,8 +144,9 @@ assert_eq!(after, "03/14/2012, 01/01/2013 and 07/05/2014");
```
If you wish to match against whitespace in this mode, you can still use `\s`,
`\n`, `\t`, etc. For escaping a single space character, you can use its hex
character code `\x20` or temporarily disable the `x` flag, e.g., `(?-x: )`.
`\n`, `\t`, etc. For escaping a single space character, you can escape it
directly with `\ `, use its hex character code `\x20` or temporarily disable
the `x` flag, e.g., `(?-x: )`.
# Example: match multiple regular expressions simultaneously
@ -216,7 +209,7 @@ Unicode scalar values. This means you can use Unicode characters directly
in your expression:
```rust
# extern crate regex; use regex::Regex;
# use regex::Regex;
# fn main() {
let re = Regex::new(r"(?i)Δ+").unwrap();
let mat = re.find("ΔδΔ").unwrap();
@ -243,7 +236,7 @@ of boolean properties are available as character classes. For example, you can
match a sequence of numerals, Greek or Cherokee letters:
```rust
# extern crate regex; use regex::Regex;
# use regex::Regex;
# fn main() {
let re = Regex::new(r"[\pN\p{Greek}\p{Cherokee}]+").unwrap();
let mat = re.find("abcΔβγδⅡxyz").unwrap();
@ -252,7 +245,7 @@ assert_eq!((mat.start(), mat.end()), (3, 23));
```
For a more detailed breakdown of Unicode support with respect to
[UTS#18](http://unicode.org/reports/tr18/),
[UTS#18](https://unicode.org/reports/tr18/),
please see the
[UNICODE](https://github.com/rust-lang/regex/blob/master/UNICODE.md)
document in the root of the regex repository.
@ -364,7 +357,7 @@ $ the end of text (or end-of-line with multi-line mode)
<pre class="rust">
(exp) numbered capture group (indexed by opening parenthesis)
(?P&lt;name&gt;exp) named (also numbered) capture group (allowed chars: [_0-9a-zA-Z])
(?P&lt;name&gt;exp) named (also numbered) capture group (allowed chars: [_0-9a-zA-Z.\[\]])
(?:exp) non-capturing group
(?flags) set flags within current group
(?flags:exp) set flags for exp (non-capturing)
@ -390,7 +383,7 @@ Flags can be toggled within a pattern. Here's an example that matches
case-insensitively for the first part but case-sensitively for the second part:
```rust
# extern crate regex; use regex::Regex;
# use regex::Regex;
# fn main() {
let re = Regex::new(r"(?i)a+(?-i)b+").unwrap();
let cap = re.captures("AaAaAbbBBBb").unwrap();
@ -424,7 +417,7 @@ Here is an example that uses an ASCII word boundary instead of a Unicode
word boundary:
```rust
# extern crate regex; use regex::Regex;
# use regex::Regex;
# fn main() {
let re = Regex::new(r"(?-u:\b).+(?-u:\b)").unwrap();
let cap = re.captures("$$abc$$").unwrap();
@ -454,7 +447,7 @@ assert_eq!(&cap[0], "abc");
## Perl character classes (Unicode friendly)
These classes are based on the definitions provided in
[UTS#18](http://www.unicode.org/reports/tr18/#Compatibility_Properties):
[UTS#18](https://www.unicode.org/reports/tr18/#Compatibility_Properties):
<pre class="rust">
\d digit (\p{Nd})
@ -522,11 +515,6 @@ All features below are enabled by default.
Enables all performance related features. This feature is enabled by default
and will always cover all features that improve performance, even if more
are added in the future.
* **perf-cache** -
Enables the use of very fast thread safe caching for internal match state.
When this is disabled, caching is still used, but with a slower and simpler
implementation. Disabling this drops the `thread_local` and `lazy_static`
dependencies.
* **perf-dfa** -
Enables the use of a lazy DFA for matching. The lazy DFA is used to compile
portions of a regex to a very fast DFA on an as-needed basis. This can
@ -541,6 +529,11 @@ All features below are enabled by default.
Enables the use of literal optimizations for speeding up matches. In some
cases, literal optimizations can result in speedups of _several_ orders of
magnitude. Disabling this drops the `aho-corasick` and `memchr` dependencies.
* **perf-cache** -
This feature used to enable a faster internal cache at the cost of using
additional dependencies, but this is no longer an option. A fast internal
cache is now used unconditionally with no additional dependencies. This may
change in the future.
### Unicode features
@ -561,7 +554,7 @@ All features below are enabled by default.
[Unicode's "simple loose matches" specification](https://www.unicode.org/reports/tr18/#Simple_Loose_Matches).
* **unicode-gencat** -
Provide the data for
[Uncode general categories](https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values).
[Unicode general categories](https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values).
This includes, but is not limited to, `Decimal_Number`, `Letter`,
`Math_Symbol`, `Number` and `Punctuation`.
* **unicode-perl** -
@ -613,39 +606,30 @@ another matching engine with fixed memory requirements.
*/
#![deny(missing_docs)]
#![cfg_attr(test, deny(warnings))]
#![cfg_attr(feature = "pattern", feature(pattern))]
#![warn(missing_debug_implementations)]
#[cfg(not(feature = "std"))]
compile_error!("`std` feature is currently required to build this crate");
#[cfg(feature = "perf-literal")]
extern crate aho_corasick;
#[cfg(test)]
extern crate doc_comment;
#[cfg(feature = "perf-literal")]
extern crate memchr;
#[cfg(test)]
#[cfg_attr(feature = "perf-literal", macro_use)]
extern crate quickcheck;
extern crate regex_syntax as syntax;
#[cfg(feature = "perf-cache")]
extern crate thread_local;
#[cfg(test)]
doc_comment::doctest!("../README.md");
// To check README's example
// TODO: Re-enable this once the MSRV is 1.43 or greater.
// See: https://github.com/rust-lang/regex/issues/684
// See: https://github.com/rust-lang/regex/issues/685
// #[cfg(doctest)]
// doc_comment::doctest!("../README.md");
#[cfg(feature = "std")]
pub use error::Error;
pub use crate::error::Error;
#[cfg(feature = "std")]
pub use re_builder::set_unicode::*;
pub use crate::re_builder::set_unicode::*;
#[cfg(feature = "std")]
pub use re_builder::unicode::*;
pub use crate::re_builder::unicode::*;
#[cfg(feature = "std")]
pub use re_set::unicode::*;
pub use crate::re_set::unicode::*;
#[cfg(feature = "std")]
#[cfg(feature = "std")]
pub use re_unicode::{
pub use crate::re_unicode::{
escape, CaptureLocations, CaptureMatches, CaptureNames, Captures,
Locations, Match, Matches, NoExpand, Regex, Replacer, ReplacerRef, Split,
SplitN, SubCaptureMatches,
@ -730,8 +714,8 @@ Unicode codepoints. For example, in ASCII compatible mode, `\xFF` matches the
literal byte `\xFF`, while in Unicode mode, `\xFF` is a Unicode codepoint that
matches its UTF-8 encoding of `\xC3\xBF`. Similarly for octal notation when
enabled.
6. `.` matches any *byte* except for `\n` instead of any Unicode scalar value.
When the `s` flag is enabled, `.` matches any byte.
6. In ASCII compatible mode, `.` matches any *byte* except for `\n`. When the
`s` flag is additionally enabled, `.` matches any byte.
# Performance
@ -740,14 +724,13 @@ performance on `&str`.
*/
#[cfg(feature = "std")]
pub mod bytes {
pub use re_builder::bytes::*;
pub use re_builder::set_bytes::*;
pub use re_bytes::*;
pub use re_set::bytes::*;
pub use crate::re_builder::bytes::*;
pub use crate::re_builder::set_bytes::*;
pub use crate::re_bytes::*;
pub use crate::re_set::bytes::*;
}
mod backtrack;
mod cache;
mod compile;
#[cfg(feature = "perf-dfa")]
mod dfa;
@ -755,13 +738,12 @@ mod error;
mod exec;
mod expand;
mod find_byte;
#[cfg(feature = "perf-literal")]
mod freqs;
mod input;
mod literal;
#[cfg(feature = "pattern")]
mod pattern;
mod pikevm;
mod pool;
mod prog;
mod re_builder;
mod re_bytes;
@ -777,9 +759,9 @@ mod utf8;
#[doc(hidden)]
#[cfg(feature = "std")]
pub mod internal {
pub use compile::Compiler;
pub use exec::{Exec, ExecBuilder};
pub use input::{Char, CharInput, Input, InputAt};
pub use literal::LiteralSearcher;
pub use prog::{EmptyLook, Inst, InstRanges, Program};
pub use crate::compile::Compiler;
pub use crate::exec::{Exec, ExecBuilder};
pub use crate::input::{Char, CharInput, Input, InputAt};
pub use crate::literal::LiteralSearcher;
pub use crate::prog::{EmptyLook, Inst, InstRanges, Program};
}

783
third_party/rust/regex/src/literal/imp.rs поставляемый
Просмотреть файл

@ -1,11 +1,8 @@
use std::cmp;
use std::mem;
use aho_corasick::{self, packed, AhoCorasick, AhoCorasickBuilder};
use memchr::{memchr, memchr2, memchr3};
use syntax::hir::literal::{Literal, Literals};
use freqs::BYTE_FREQUENCIES;
use memchr::{memchr, memchr2, memchr3, memmem};
use regex_syntax::hir::literal::{Literal, Literals};
/// A prefix extracted from a compiled regular expression.
///
@ -15,8 +12,8 @@ use freqs::BYTE_FREQUENCIES;
#[derive(Clone, Debug)]
pub struct LiteralSearcher {
complete: bool,
lcp: FreqyPacked,
lcs: FreqyPacked,
lcp: Memmem,
lcs: Memmem,
matcher: Matcher,
}
@ -26,10 +23,8 @@ enum Matcher {
Empty,
/// A set of four or more single byte literals.
Bytes(SingleByteSet),
/// A single substring, find using memchr and frequency analysis.
FreqyPacked(FreqyPacked),
/// A single substring, find using Boyer-Moore.
BoyerMoore(BoyerMooreSearch),
/// A single substring, using vector accelerated routines when available.
Memmem(Memmem),
/// An Aho-Corasick automaton.
AC { ac: AhoCorasick<u32>, lits: Vec<Literal> },
/// A packed multiple substring searcher, using SIMD.
@ -63,8 +58,8 @@ impl LiteralSearcher {
let complete = lits.all_complete();
LiteralSearcher {
complete: complete,
lcp: FreqyPacked::new(lits.longest_common_prefix().to_vec()),
lcs: FreqyPacked::new(lits.longest_common_suffix().to_vec()),
lcp: Memmem::new(lits.longest_common_prefix()),
lcs: Memmem::new(lits.longest_common_suffix()),
matcher: matcher,
}
}
@ -72,7 +67,7 @@ impl LiteralSearcher {
/// Returns true if all matches comprise the entire regular expression.
///
/// This does not necessarily mean that a literal match implies a match
/// of the regular expression. For example, the regular expresison `^a`
/// of the regular expression. For example, the regular expression `^a`
/// is comprised of a single complete literal `a`, but the regular
/// expression demands that it only match at the beginning of a string.
pub fn complete(&self) -> bool {
@ -86,8 +81,7 @@ impl LiteralSearcher {
match self.matcher {
Empty => Some((0, 0)),
Bytes(ref sset) => sset.find(haystack).map(|i| (i, i + 1)),
FreqyPacked(ref s) => s.find(haystack).map(|i| (i, i + s.len())),
BoyerMoore(ref s) => s.find(haystack).map(|i| (i, i + s.len())),
Memmem(ref s) => s.find(haystack).map(|i| (i, i + s.len())),
AC { ref ac, .. } => {
ac.find(haystack).map(|m| (m.start(), m.end()))
}
@ -124,24 +118,23 @@ impl LiteralSearcher {
}
/// Returns an iterator over all literals to be matched.
pub fn iter(&self) -> LiteralIter {
pub fn iter(&self) -> LiteralIter<'_> {
match self.matcher {
Matcher::Empty => LiteralIter::Empty,
Matcher::Bytes(ref sset) => LiteralIter::Bytes(&sset.dense),
Matcher::FreqyPacked(ref s) => LiteralIter::Single(&s.pat),
Matcher::BoyerMoore(ref s) => LiteralIter::Single(&s.pattern),
Matcher::Memmem(ref s) => LiteralIter::Single(&s.finder.needle()),
Matcher::AC { ref lits, .. } => LiteralIter::AC(lits),
Matcher::Packed { ref lits, .. } => LiteralIter::Packed(lits),
}
}
/// Returns a matcher for the longest common prefix of this matcher.
pub fn lcp(&self) -> &FreqyPacked {
pub fn lcp(&self) -> &Memmem {
&self.lcp
}
/// Returns a matcher for the longest common suffix of this matcher.
pub fn lcs(&self) -> &FreqyPacked {
pub fn lcs(&self) -> &Memmem {
&self.lcs
}
@ -156,8 +149,7 @@ impl LiteralSearcher {
match self.matcher {
Empty => 0,
Bytes(ref sset) => sset.dense.len(),
FreqyPacked(_) => 1,
BoyerMoore(_) => 1,
Memmem(_) => 1,
AC { ref ac, .. } => ac.pattern_count(),
Packed { ref lits, .. } => lits.len(),
}
@ -169,8 +161,7 @@ impl LiteralSearcher {
match self.matcher {
Empty => 0,
Bytes(ref sset) => sset.approximate_size(),
FreqyPacked(ref single) => single.approximate_size(),
BoyerMoore(ref single) => single.approximate_size(),
Memmem(ref single) => single.approximate_size(),
AC { ref ac, .. } => ac.heap_bytes(),
Packed { ref s, .. } => s.heap_bytes(),
}
@ -205,12 +196,7 @@ impl Matcher {
return Matcher::Bytes(sset);
}
if lits.literals().len() == 1 {
let lit = lits.literals()[0].to_vec();
if BoyerMooreSearch::should_use(lit.as_slice()) {
return Matcher::BoyerMoore(BoyerMooreSearch::new(lit));
} else {
return Matcher::FreqyPacked(FreqyPacked::new(lit));
}
return Matcher::Memmem(Memmem::new(&lits.literals()[0]));
}
let pats = lits.literals().to_owned();
@ -232,6 +218,7 @@ impl Matcher {
}
}
#[derive(Debug)]
pub enum LiteralIter<'a> {
Empty,
Bytes(&'a [u8]),
@ -366,116 +353,27 @@ impl SingleByteSet {
}
}
/// Provides an implementation of fast subtring search using frequency
/// analysis.
/// A simple wrapper around the memchr crate's memmem implementation.
///
/// memchr is so fast that we do everything we can to keep the loop in memchr
/// for as long as possible. The easiest way to do this is to intelligently
/// pick the byte to send to memchr. The best byte is the byte that occurs
/// least frequently in the haystack. Since doing frequency analysis on the
/// haystack is far too expensive, we compute a set of fixed frequencies up
/// front and hard code them in src/freqs.rs. Frequency analysis is done via
/// scripts/frequencies.py.
/// The API this exposes mirrors the API of previous substring searchers that
/// this supplanted.
#[derive(Clone, Debug)]
pub struct FreqyPacked {
/// The pattern.
pat: Vec<u8>,
/// The number of Unicode characters in the pattern. This is useful for
/// determining the effective length of a pattern when deciding which
/// optimizations to perform. A trailing incomplete UTF-8 sequence counts
/// as one character.
pub struct Memmem {
finder: memmem::Finder<'static>,
char_len: usize,
/// The rarest byte in the pattern, according to pre-computed frequency
/// analysis.
rare1: u8,
/// The offset of the rarest byte in `pat`.
rare1i: usize,
/// The second rarest byte in the pattern, according to pre-computed
/// frequency analysis. (This may be equivalent to the rarest byte.)
///
/// The second rarest byte is used as a type of guard for quickly detecting
/// a mismatch after memchr locates an instance of the rarest byte. This
/// is a hedge against pathological cases where the pre-computed frequency
/// analysis may be off. (But of course, does not prevent *all*
/// pathological cases.)
rare2: u8,
/// The offset of the second rarest byte in `pat`.
rare2i: usize,
}
impl FreqyPacked {
fn new(pat: Vec<u8>) -> FreqyPacked {
if pat.is_empty() {
return FreqyPacked::empty();
}
// Find the rarest two bytes. Try to make them distinct (but it's not
// required).
let mut rare1 = pat[0];
let mut rare2 = pat[0];
for b in pat[1..].iter().cloned() {
if freq_rank(b) < freq_rank(rare1) {
rare1 = b;
}
}
for &b in &pat {
if rare1 == rare2 {
rare2 = b
} else if b != rare1 && freq_rank(b) < freq_rank(rare2) {
rare2 = b;
}
}
// And find the offsets of their last occurrences.
let rare1i = pat.iter().rposition(|&b| b == rare1).unwrap();
let rare2i = pat.iter().rposition(|&b| b == rare2).unwrap();
let char_len = char_len_lossy(&pat);
FreqyPacked {
pat: pat,
char_len: char_len,
rare1: rare1,
rare1i: rare1i,
rare2: rare2,
rare2i: rare2i,
}
}
fn empty() -> FreqyPacked {
FreqyPacked {
pat: vec![],
char_len: 0,
rare1: 0,
rare1i: 0,
rare2: 0,
rare2i: 0,
impl Memmem {
fn new(pat: &[u8]) -> Memmem {
Memmem {
finder: memmem::Finder::new(pat).into_owned(),
char_len: char_len_lossy(pat),
}
}
#[cfg_attr(feature = "perf-inline", inline(always))]
pub fn find(&self, haystack: &[u8]) -> Option<usize> {
let pat = &*self.pat;
if haystack.len() < pat.len() || pat.is_empty() {
return None;
}
let mut i = self.rare1i;
while i < haystack.len() {
i += match memchr(self.rare1, &haystack[i..]) {
None => return None,
Some(i) => i,
};
let start = i - self.rare1i;
let end = start + pat.len();
if end > haystack.len() {
return None;
}
let aligned = &haystack[start..end];
if aligned[self.rare2i] == self.rare2 && aligned == &*self.pat {
return Some(start);
}
i += 1;
}
None
self.finder.find(haystack)
}
#[cfg_attr(feature = "perf-inline", inline(always))]
@ -483,11 +381,11 @@ impl FreqyPacked {
if text.len() < self.len() {
return false;
}
text[text.len() - self.len()..] == *self.pat
&text[text.len() - self.len()..] == self.finder.needle()
}
pub fn len(&self) -> usize {
self.pat.len()
self.finder.needle().len()
}
pub fn char_len(&self) -> usize {
@ -495,627 +393,10 @@ impl FreqyPacked {
}
fn approximate_size(&self) -> usize {
self.pat.len() * mem::size_of::<u8>()
self.finder.needle().len() * mem::size_of::<u8>()
}
}
fn char_len_lossy(bytes: &[u8]) -> usize {
String::from_utf8_lossy(bytes).chars().count()
}
/// An implementation of Tuned Boyer-Moore as laid out by
/// Andrew Hume and Daniel Sunday in "Fast String Searching".
/// O(n) in the size of the input.
///
/// Fast string searching algorithms come in many variations,
/// but they can generally be described in terms of three main
/// components.
///
/// The skip loop is where the string searcher wants to spend
/// as much time as possible. Exactly which character in the
/// pattern the skip loop examines varies from algorithm to
/// algorithm, but in the simplest case this loop repeated
/// looks at the last character in the pattern and jumps
/// forward in the input if it is not in the pattern.
/// Robert Boyer and J Moore called this the "fast" loop in
/// their original paper.
///
/// The match loop is responsible for actually examining the
/// whole potentially matching substring. In order to fail
/// faster, the match loop sometimes has a guard test attached.
/// The guard test uses frequency analysis of the different
/// characters in the pattern to choose the least frequency
/// occurring character and use it to find match failures
/// as quickly as possible.
///
/// The shift rule governs how the algorithm will shuffle its
/// test window in the event of a failure during the match loop.
/// Certain shift rules allow the worst-case run time of the
/// algorithm to be shown to be O(n) in the size of the input
/// rather than O(nm) in the size of the input and the size
/// of the pattern (as naive Boyer-Moore is).
///
/// "Fast String Searching", in addition to presenting a tuned
/// algorithm, provides a comprehensive taxonomy of the many
/// different flavors of string searchers. Under that taxonomy
/// TBM, the algorithm implemented here, uses an unrolled fast
/// skip loop with memchr fallback, a forward match loop with guard,
/// and the mini Sunday's delta shift rule. To unpack that you'll have to
/// read the paper.
#[derive(Clone, Debug)]
pub struct BoyerMooreSearch {
/// The pattern we are going to look for in the haystack.
pattern: Vec<u8>,
/// The skip table for the skip loop.
///
/// Maps the character at the end of the input
/// to a shift.
skip_table: Vec<usize>,
/// The guard character (least frequently occurring char).
guard: u8,
/// The reverse-index of the guard character in the pattern.
guard_reverse_idx: usize,
/// Daniel Sunday's mini generalized delta2 shift table.
///
/// We use a skip loop, so we only have to provide a shift
/// for the skip char (last char). This is why it is a mini
/// shift rule.
md2_shift: usize,
}
impl BoyerMooreSearch {
/// Create a new string searcher, performing whatever
/// compilation steps are required.
fn new(pattern: Vec<u8>) -> Self {
debug_assert!(!pattern.is_empty());
let (g, gi) = Self::select_guard(pattern.as_slice());
let skip_table = Self::compile_skip_table(pattern.as_slice());
let md2_shift = Self::compile_md2_shift(pattern.as_slice());
BoyerMooreSearch {
pattern: pattern,
skip_table: skip_table,
guard: g,
guard_reverse_idx: gi,
md2_shift: md2_shift,
}
}
/// Find the pattern in `haystack`, returning the offset
/// of the start of the first occurrence of the pattern
/// in `haystack`.
#[inline]
fn find(&self, haystack: &[u8]) -> Option<usize> {
if haystack.len() < self.pattern.len() {
return None;
}
let mut window_end = self.pattern.len() - 1;
// Inspired by the grep source. It is a way
// to do correct loop unrolling without having to place
// a crashpad of terminating charicters at the end in
// the way described in the Fast String Searching paper.
const NUM_UNROLL: usize = 10;
// 1 for the initial position, and 1 for the md2 shift
let short_circut = (NUM_UNROLL + 2) * self.pattern.len();
if haystack.len() > short_circut {
// just 1 for the md2 shift
let backstop =
haystack.len() - ((NUM_UNROLL + 1) * self.pattern.len());
loop {
window_end =
match self.skip_loop(haystack, window_end, backstop) {
Some(i) => i,
None => return None,
};
if window_end >= backstop {
break;
}
if self.check_match(haystack, window_end) {
return Some(window_end - (self.pattern.len() - 1));
} else {
let skip = self.skip_table[haystack[window_end] as usize];
window_end +=
if skip == 0 { self.md2_shift } else { skip };
continue;
}
}
}
// now process the input after the backstop
while window_end < haystack.len() {
let mut skip = self.skip_table[haystack[window_end] as usize];
if skip == 0 {
if self.check_match(haystack, window_end) {
return Some(window_end - (self.pattern.len() - 1));
} else {
skip = self.md2_shift;
}
}
window_end += skip;
}
None
}
fn len(&self) -> usize {
return self.pattern.len();
}
/// The key heuristic behind which the BoyerMooreSearch lives.
///
/// See `rust-lang/regex/issues/408`.
///
/// Tuned Boyer-Moore is actually pretty slow! It turns out a handrolled
/// platform-specific memchr routine with a bit of frequency
/// analysis sprinkled on top actually wins most of the time.
/// However, there are a few cases where Tuned Boyer-Moore still
/// wins.
///
/// If the haystack is random, frequency analysis doesn't help us,
/// so Boyer-Moore will win for sufficiently large needles.
/// Unfortunately, there is no obvious way to determine this
/// ahead of time.
///
/// If the pattern itself consists of very common characters,
/// frequency analysis won't get us anywhere. The most extreme
/// example of this is a pattern like `eeeeeeeeeeeeeeee`. Fortunately,
/// this case is wholly determined by the pattern, so we can actually
/// implement the heuristic.
///
/// A third case is if the pattern is sufficiently long. The idea
/// here is that once the pattern gets long enough the Tuned
/// Boyer-Moore skip loop will start making strides long enough
/// to beat the asm deep magic that is memchr.
fn should_use(pattern: &[u8]) -> bool {
// The minimum pattern length required to use TBM.
const MIN_LEN: usize = 9;
// The minimum frequency rank (lower is rarer) that every byte in the
// pattern must have in order to use TBM. That is, if the pattern
// contains _any_ byte with a lower rank, then TBM won't be used.
const MIN_CUTOFF: usize = 150;
// The maximum frequency rank for any byte.
const MAX_CUTOFF: usize = 255;
// The scaling factor used to determine the actual cutoff frequency
// to use (keeping in mind that the minimum frequency rank is bounded
// by MIN_CUTOFF). This scaling factor is an attempt to make TBM more
// likely to be used as the pattern grows longer. That is, longer
// patterns permit somewhat less frequent bytes than shorter patterns,
// under the assumption that TBM gets better as the pattern gets
// longer.
const LEN_CUTOFF_PROPORTION: usize = 4;
let scaled_rank = pattern.len().wrapping_mul(LEN_CUTOFF_PROPORTION);
let cutoff = cmp::max(
MIN_CUTOFF,
MAX_CUTOFF - cmp::min(MAX_CUTOFF, scaled_rank),
);
// The pattern must be long enough to be worthwhile. e.g., memchr will
// be faster on `e` because it is short even though e is quite common.
pattern.len() > MIN_LEN
// all the bytes must be more common than the cutoff.
&& pattern.iter().all(|c| freq_rank(*c) >= cutoff)
}
/// Check to see if there is a match at the given position
#[inline]
fn check_match(&self, haystack: &[u8], window_end: usize) -> bool {
// guard test
if haystack[window_end - self.guard_reverse_idx] != self.guard {
return false;
}
// match loop
let window_start = window_end - (self.pattern.len() - 1);
for i in 0..self.pattern.len() {
if self.pattern[i] != haystack[window_start + i] {
return false;
}
}
true
}
/// Skip forward according to the shift table.
///
/// Returns the offset of the next occurrence
/// of the last char in the pattern, or the none
/// if it never reappears. If `skip_loop` hits the backstop
/// it will leave early.
#[inline]
fn skip_loop(
&self,
haystack: &[u8],
mut window_end: usize,
backstop: usize,
) -> Option<usize> {
let window_end_snapshot = window_end;
let skip_of = |we: usize| -> usize {
// Unsafe might make this faster, but the benchmarks
// were hard to interpret.
self.skip_table[haystack[we] as usize]
};
loop {
let mut skip = skip_of(window_end);
window_end += skip;
skip = skip_of(window_end);
window_end += skip;
if skip != 0 {
skip = skip_of(window_end);
window_end += skip;
skip = skip_of(window_end);
window_end += skip;
skip = skip_of(window_end);
window_end += skip;
if skip != 0 {
skip = skip_of(window_end);
window_end += skip;
skip = skip_of(window_end);
window_end += skip;
skip = skip_of(window_end);
window_end += skip;
if skip != 0 {
skip = skip_of(window_end);
window_end += skip;
skip = skip_of(window_end);
window_end += skip;
// If ten iterations did not make at least 16 words
// worth of progress, we just fall back on memchr.
if window_end - window_end_snapshot
> 16 * mem::size_of::<usize>()
{
// Returning a window_end >= backstop will
// immediatly break us out of the inner loop in
// `find`.
if window_end >= backstop {
return Some(window_end);
}
continue; // we made enough progress
} else {
// In case we are already there, and so that
// we will catch the guard char.
window_end = window_end
.checked_sub(1 + self.guard_reverse_idx)
.unwrap_or(0);
match memchr(self.guard, &haystack[window_end..]) {
None => return None,
Some(g_idx) => {
return Some(
window_end
+ g_idx
+ self.guard_reverse_idx,
);
}
}
}
}
}
}
return Some(window_end);
}
}
/// Compute the ufast skip table.
fn compile_skip_table(pattern: &[u8]) -> Vec<usize> {
let mut tab = vec![pattern.len(); 256];
// For every char in the pattern, we write a skip
// that will line us up with the rightmost occurrence.
//
// N.B. the sentinel (0) is written by the last
// loop iteration.
for (i, c) in pattern.iter().enumerate() {
tab[*c as usize] = (pattern.len() - 1) - i;
}
tab
}
/// Select the guard character based off of the precomputed
/// frequency table.
fn select_guard(pattern: &[u8]) -> (u8, usize) {
let mut rarest = pattern[0];
let mut rarest_rev_idx = pattern.len() - 1;
for (i, c) in pattern.iter().enumerate() {
if freq_rank(*c) < freq_rank(rarest) {
rarest = *c;
rarest_rev_idx = (pattern.len() - 1) - i;
}
}
(rarest, rarest_rev_idx)
}
/// If there is another occurrence of the skip
/// char, shift to it, otherwise just shift to
/// the next window.
fn compile_md2_shift(pattern: &[u8]) -> usize {
let shiftc = *pattern.last().unwrap();
// For a pattern of length 1 we will never apply the
// shift rule, so we use a poison value on the principle
// that failing fast is a good thing.
if pattern.len() == 1 {
return 0xDEADBEAF;
}
let mut i = pattern.len() - 2;
while i > 0 {
if pattern[i] == shiftc {
return (pattern.len() - 1) - i;
}
i -= 1;
}
// The skip char never re-occurs in the pattern, so
// we can just shift the whole window length.
pattern.len() - 1
}
fn approximate_size(&self) -> usize {
(self.pattern.len() * mem::size_of::<u8>())
+ (256 * mem::size_of::<usize>()) // skip table
}
}
fn freq_rank(b: u8) -> usize {
BYTE_FREQUENCIES[b as usize] as usize
}
#[cfg(test)]
mod tests {
use super::{BoyerMooreSearch, FreqyPacked};
//
// Unit Tests
//
// The "hello, world" of string searching
#[test]
fn bm_find_subs() {
let searcher = BoyerMooreSearch::new(Vec::from(&b"pattern"[..]));
let haystack = b"I keep seeing patterns in this text";
assert_eq!(14, searcher.find(haystack).unwrap());
}
#[test]
fn bm_find_no_subs() {
let searcher = BoyerMooreSearch::new(Vec::from(&b"pattern"[..]));
let haystack = b"I keep seeing needles in this text";
assert_eq!(None, searcher.find(haystack));
}
//
// Regression Tests
//
#[test]
fn bm_skip_reset_bug() {
let haystack = vec![0, 0, 0, 0, 0, 1, 1, 0];
let needle = vec![0, 1, 1, 0];
let searcher = BoyerMooreSearch::new(needle);
let offset = searcher.find(haystack.as_slice()).unwrap();
assert_eq!(4, offset);
}
#[test]
fn bm_backstop_underflow_bug() {
let haystack = vec![0, 0];
let needle = vec![0, 0];
let searcher = BoyerMooreSearch::new(needle);
let offset = searcher.find(haystack.as_slice()).unwrap();
assert_eq!(0, offset);
}
#[test]
fn bm_naive_off_by_one_bug() {
let haystack = vec![91];
let needle = vec![91];
let naive_offset = naive_find(&needle, &haystack).unwrap();
assert_eq!(0, naive_offset);
}
#[test]
fn bm_memchr_fallback_indexing_bug() {
let mut haystack = vec![
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
];
let needle = vec![1, 1, 1, 1, 32, 32, 87];
let needle_start = haystack.len();
haystack.extend(needle.clone());
let searcher = BoyerMooreSearch::new(needle);
assert_eq!(needle_start, searcher.find(haystack.as_slice()).unwrap());
}
#[test]
fn bm_backstop_boundary() {
let haystack = b"\
// aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
e_data.clone_created(entity_id, entity_to_add.entity_id);
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
"
.to_vec();
let needle = b"clone_created".to_vec();
let searcher = BoyerMooreSearch::new(needle);
let result = searcher.find(&haystack);
assert_eq!(Some(43), result);
}
#[test]
fn bm_win_gnu_indexing_bug() {
let haystack_raw = vec![
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
];
let needle = vec![1, 1, 1, 1, 1, 1, 1];
let haystack = haystack_raw.as_slice();
BoyerMooreSearch::new(needle.clone()).find(haystack);
}
//
// QuickCheck Properties
//
use quickcheck::TestResult;
fn naive_find(needle: &[u8], haystack: &[u8]) -> Option<usize> {
assert!(needle.len() <= haystack.len());
for i in 0..(haystack.len() - (needle.len() - 1)) {
if haystack[i] == needle[0]
&& &haystack[i..(i + needle.len())] == needle
{
return Some(i);
}
}
None
}
quickcheck! {
fn qc_bm_equals_nieve_find(pile1: Vec<u8>, pile2: Vec<u8>) -> TestResult {
if pile1.len() == 0 || pile2.len() == 0 {
return TestResult::discard();
}
let (needle, haystack) = if pile1.len() < pile2.len() {
(pile1, pile2.as_slice())
} else {
(pile2, pile1.as_slice())
};
let searcher = BoyerMooreSearch::new(needle.clone());
TestResult::from_bool(
searcher.find(haystack) == naive_find(&needle, haystack))
}
fn qc_bm_equals_single(pile1: Vec<u8>, pile2: Vec<u8>) -> TestResult {
if pile1.len() == 0 || pile2.len() == 0 {
return TestResult::discard();
}
let (needle, haystack) = if pile1.len() < pile2.len() {
(pile1, pile2.as_slice())
} else {
(pile2, pile1.as_slice())
};
let bm_searcher = BoyerMooreSearch::new(needle.clone());
let freqy_memchr = FreqyPacked::new(needle);
TestResult::from_bool(
bm_searcher.find(haystack) == freqy_memchr.find(haystack))
}
fn qc_bm_finds_trailing_needle(
haystack_pre: Vec<u8>,
needle: Vec<u8>
) -> TestResult {
if needle.len() == 0 {
return TestResult::discard();
}
let mut haystack = haystack_pre.clone();
let searcher = BoyerMooreSearch::new(needle.clone());
if haystack.len() >= needle.len() &&
searcher.find(haystack.as_slice()).is_some() {
return TestResult::discard();
}
haystack.extend(needle.clone());
// What if the the tail of the haystack can start the
// needle?
let start = haystack_pre.len()
.checked_sub(needle.len())
.unwrap_or(0);
for i in 0..(needle.len() - 1) {
if searcher.find(&haystack[(i + start)..]).is_some() {
return TestResult::discard();
}
}
TestResult::from_bool(
searcher.find(haystack.as_slice())
.map(|x| x == haystack_pre.len())
.unwrap_or(false))
}
// qc_equals_* is only testing the negative case as @burntsushi
// pointed out in https://github.com/rust-lang/regex/issues/446.
// This quickcheck prop represents an effort to force testing of
// the positive case. qc_bm_finds_first and qc_bm_finds_trailing_needle
// already check some of the positive cases, but they don't cover
// cases where the needle is in the middle of haystack. This prop
// fills that hole.
fn qc_bm_finds_subslice(
haystack: Vec<u8>,
needle_start: usize,
needle_length: usize
) -> TestResult {
if haystack.len() == 0 {
return TestResult::discard();
}
let needle_start = needle_start % haystack.len();
let needle_length = needle_length % (haystack.len() - needle_start);
if needle_length == 0 {
return TestResult::discard();
}
let needle = &haystack[needle_start..(needle_start + needle_length)];
let bm_searcher = BoyerMooreSearch::new(needle.to_vec());
let start = naive_find(&needle, &haystack);
match start {
None => TestResult::from_bool(false),
Some(nf_start) =>
TestResult::from_bool(
nf_start <= needle_start
&& bm_searcher.find(&haystack) == start
)
}
}
fn qc_bm_finds_first(needle: Vec<u8>) -> TestResult {
if needle.len() == 0 {
return TestResult::discard();
}
let mut haystack = needle.clone();
let searcher = BoyerMooreSearch::new(needle.clone());
haystack.extend(needle);
TestResult::from_bool(
searcher.find(haystack.as_slice())
.map(|x| x == 0)
.unwrap_or(false))
}
}
}

2
third_party/rust/regex/src/literal/mod.rs поставляемый
Просмотреть файл

@ -6,7 +6,7 @@ mod imp;
#[allow(missing_docs)]
#[cfg(not(feature = "perf-literal"))]
mod imp {
use syntax::hir::literal::Literals;
use regex_syntax::hir::literal::Literals;
#[derive(Clone, Debug)]
pub struct LiteralSearcher(());

3
third_party/rust/regex/src/pattern.rs поставляемый
Просмотреть файл

@ -1,7 +1,8 @@
use std::str::pattern::{Pattern, SearchStep, Searcher};
use re_unicode::{Matches, Regex};
use crate::re_unicode::{Matches, Regex};
#[derive(Debug)]
pub struct RegexSearcher<'r, 't> {
haystack: &'t str,
it: Matches<'r, 't>,

16
third_party/rust/regex/src/pikevm.rs поставляемый
Просмотреть файл

@ -8,7 +8,7 @@
//
// It can do more than the DFA can (specifically, record capture locations
// and execute Unicode word boundary assertions), but at a slower speed.
// Specifically, the Pike VM exectues a DFA implicitly by repeatedly expanding
// Specifically, the Pike VM executes a DFA implicitly by repeatedly expanding
// epsilon transitions. That is, the Pike VM engine can be in multiple states
// at once where as the DFA is only ever in one state at a time.
//
@ -17,11 +17,11 @@
use std::mem;
use exec::ProgramCache;
use input::{Input, InputAt};
use prog::{InstPtr, Program};
use re_trait::Slot;
use sparse::SparseSet;
use crate::exec::ProgramCache;
use crate::input::{Input, InputAt};
use crate::prog::{InstPtr, Program};
use crate::re_trait::Slot;
use crate::sparse::SparseSet;
/// An NFA simulation matching engine.
#[derive(Debug)]
@ -231,7 +231,7 @@ impl<'r, I: Input> Fsm<'r, I> {
at: InputAt,
at_next: InputAt,
) -> bool {
use prog::Inst::*;
use crate::prog::Inst::*;
match self.prog[ip] {
Match(match_slot) => {
if match_slot < matches.len() {
@ -300,7 +300,7 @@ impl<'r, I: Input> Fsm<'r, I> {
// traverse the set of states. We only push to the stack when we
// absolutely need recursion (restoring captures or following a
// branch).
use prog::Inst::*;
use crate::prog::Inst::*;
loop {
// Don't visit states we've already added.
if nlist.set.contains(ip) {

333
third_party/rust/regex/src/pool.rs поставляемый Normal file
Просмотреть файл

@ -0,0 +1,333 @@
// This module provides a relatively simple thread-safe pool of reusable
// objects. For the most part, it's implemented by a stack represented by a
// Mutex<Vec<T>>. It has one small trick: because unlocking a mutex is somewhat
// costly, in the case where a pool is accessed by the first thread that tried
// to get a value, we bypass the mutex. Here are some benchmarks showing the
// difference.
//
// 1) misc::anchored_literal_long_non_match 21 (18571 MB/s)
// 2) misc::anchored_literal_long_non_match 107 (3644 MB/s)
// 3) misc::anchored_literal_long_non_match 45 (8666 MB/s)
// 4) misc::anchored_literal_long_non_match 19 (20526 MB/s)
//
// (1) represents our baseline: the master branch at the time of writing when
// using the 'thread_local' crate to implement the pool below.
//
// (2) represents a naive pool implemented completely via Mutex<Vec<T>>. There
// is no special trick for bypassing the mutex.
//
// (3) is the same as (2), except it uses Mutex<Vec<Box<T>>>. It is twice as
// fast because a Box<T> is much smaller than the T we use with a Pool in this
// crate. So pushing and popping a Box<T> from a Vec is quite a bit faster
// than for T.
//
// (4) is the same as (3), but with the trick for bypassing the mutex in the
// case of the first-to-get thread.
//
// Why move off of thread_local? Even though (4) is a hair faster than (1)
// above, this was not the main goal. The main goal was to move off of
// thread_local and find a way to *simply* re-capture some of its speed for
// regex's specific case. So again, why move off of it? The *primary* reason is
// because of memory leaks. See https://github.com/rust-lang/regex/issues/362
// for example. (Why do I want it to be simple? Well, I suppose what I mean is,
// "use as much safe code as possible to minimize risk and be as sure as I can
// be that it is correct.")
//
// My guess is that the thread_local design is probably not appropriate for
// regex since its memory usage scales to the number of active threads that
// have used a regex, where as the pool below scales to the number of threads
// that simultaneously use a regex. While neither case permits contraction,
// since we own the pool data structure below, we can add contraction if a
// clear use case pops up in the wild. More pressingly though, it seems that
// there are at least some use case patterns where one might have many threads
// sitting around that might have used a regex at one point. While thread_local
// does try to reuse space previously used by a thread that has since stopped,
// its maximal memory usage still scales with the total number of active
// threads. In contrast, the pool below scales with the total number of threads
// *simultaneously* using the pool. The hope is that this uses less memory
// overall. And if it doesn't, we can hopefully tune it somehow.
//
// It seems that these sort of conditions happen frequently
// in FFI inside of other more "managed" languages. This was
// mentioned in the issue linked above, and also mentioned here:
// https://github.com/BurntSushi/rure-go/issues/3. And in particular, users
// confirm that disabling the use of thread_local resolves the leak.
//
// There were other weaker reasons for moving off of thread_local as well.
// Namely, at the time, I was looking to reduce dependencies. And for something
// like regex, maintenance can be simpler when we own the full dependency tree.
use std::panic::{RefUnwindSafe, UnwindSafe};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Mutex;
/// An atomic counter used to allocate thread IDs.
static COUNTER: AtomicUsize = AtomicUsize::new(1);
thread_local!(
/// A thread local used to assign an ID to a thread.
static THREAD_ID: usize = {
let next = COUNTER.fetch_add(1, Ordering::Relaxed);
// SAFETY: We cannot permit the reuse of thread IDs since reusing a
// thread ID might result in more than one thread "owning" a pool,
// and thus, permit accessing a mutable value from multiple threads
// simultaneously without synchronization. The intent of this panic is
// to be a sanity check. It is not expected that the thread ID space
// will actually be exhausted in practice.
//
// This checks that the counter never wraps around, since atomic
// addition wraps around on overflow.
if next == 0 {
panic!("regex: thread ID allocation space exhausted");
}
next
};
);
/// The type of the function used to create values in a pool when the pool is
/// empty and the caller requests one.
type CreateFn<T> =
Box<dyn Fn() -> T + Send + Sync + UnwindSafe + RefUnwindSafe + 'static>;
/// A simple thread safe pool for reusing values.
///
/// Getting a value out comes with a guard. When that guard is dropped, the
/// value is automatically put back in the pool.
///
/// A Pool<T> impls Sync when T is Send (even if it's not Sync). This means
/// that T can use interior mutability. This is possible because a pool is
/// guaranteed to provide a value to exactly one thread at any time.
///
/// Currently, a pool never contracts in size. Its size is proportional to the
/// number of simultaneous uses.
pub struct Pool<T> {
/// A stack of T values to hand out. These are used when a Pool is
/// accessed by a thread that didn't create it.
stack: Mutex<Vec<Box<T>>>,
/// A function to create more T values when stack is empty and a caller
/// has requested a T.
create: CreateFn<T>,
/// The ID of the thread that owns this pool. The owner is the thread
/// that makes the first call to 'get'. When the owner calls 'get', it
/// gets 'owner_val' directly instead of returning a T from 'stack'.
/// See comments elsewhere for details, but this is intended to be an
/// optimization for the common case that makes getting a T faster.
///
/// It is initialized to a value of zero (an impossible thread ID) as a
/// sentinel to indicate that it is unowned.
owner: AtomicUsize,
/// A value to return when the caller is in the same thread that created
/// the Pool.
owner_val: T,
}
// SAFETY: Since we want to use a Pool from multiple threads simultaneously
// behind an Arc, we need for it to be Sync. In cases where T is sync, Pool<T>
// would be Sync. However, since we use a Pool to store mutable scratch space,
// we wind up using a T that has interior mutability and is thus itself not
// Sync. So what we *really* want is for our Pool<T> to by Sync even when T is
// not Sync (but is at least Send).
//
// The only non-sync aspect of a Pool is its 'owner_val' field, which is used
// to implement faster access to a pool value in the common case of a pool
// being accessed in the same thread in which it was created. The 'stack' field
// is also shared, but a Mutex<T> where T: Send is already Sync. So we only
// need to worry about 'owner_val'.
//
// The key is to guarantee that 'owner_val' can only ever be accessed from one
// thread. In our implementation below, we guarantee this by only returning the
// 'owner_val' when the ID of the current thread matches the ID of the thread
// that created the Pool. Since this can only ever be one thread, it follows
// that only one thread can access 'owner_val' at any point in time. Thus, it
// is safe to declare that Pool<T> is Sync when T is Send.
//
// NOTE: It would also be possible to make the owning thread be the *first*
// thread that tries to get a value out of a Pool. However, the current
// implementation is a little simpler and it's not clear if making the first
// thread (rather than the creating thread) is meaningfully better.
//
// If there is a way to achieve our performance goals using safe code, then
// I would very much welcome a patch. As it stands, the implementation below
// tries to balance safety with performance. The case where a Regex is used
// from multiple threads simultaneously will suffer a bit since getting a cache
// will require unlocking a mutex.
unsafe impl<T: Send> Sync for Pool<T> {}
impl<T: ::std::fmt::Debug> ::std::fmt::Debug for Pool<T> {
fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
f.debug_struct("Pool")
.field("stack", &self.stack)
.field("owner", &self.owner)
.field("owner_val", &self.owner_val)
.finish()
}
}
/// A guard that is returned when a caller requests a value from the pool.
///
/// The purpose of the guard is to use RAII to automatically put the value back
/// in the pool once it's dropped.
#[derive(Debug)]
pub struct PoolGuard<'a, T: Send> {
/// The pool that this guard is attached to.
pool: &'a Pool<T>,
/// This is None when the guard represents the special "owned" value. In
/// which case, the value is retrieved from 'pool.owner_val'.
value: Option<Box<T>>,
}
impl<T: Send> Pool<T> {
/// Create a new pool. The given closure is used to create values in the
/// pool when necessary.
pub fn new(create: CreateFn<T>) -> Pool<T> {
let owner = AtomicUsize::new(0);
let owner_val = create();
Pool { stack: Mutex::new(vec![]), create, owner, owner_val }
}
/// Get a value from the pool. The caller is guaranteed to have exclusive
/// access to the given value.
///
/// Note that there is no guarantee provided about which value in the
/// pool is returned. That is, calling get, dropping the guard (causing
/// the value to go back into the pool) and then calling get again is NOT
/// guaranteed to return the same value received in the first get call.
#[cfg_attr(feature = "perf-inline", inline(always))]
pub fn get(&self) -> PoolGuard<'_, T> {
// Our fast path checks if the caller is the thread that "owns" this
// pool. Or stated differently, whether it is the first thread that
// tried to extract a value from the pool. If it is, then we can return
// a T to the caller without going through a mutex.
//
// SAFETY: We must guarantee that only one thread gets access to this
// value. Since a thread is uniquely identified by the THREAD_ID thread
// local, it follows that is the caller's thread ID is equal to the
// owner, then only one thread may receive this value.
let caller = THREAD_ID.with(|id| *id);
let owner = self.owner.load(Ordering::Relaxed);
if caller == owner {
return self.guard_owned();
}
self.get_slow(caller, owner)
}
/// This is the "slow" version that goes through a mutex to pop an
/// allocated value off a stack to return to the caller. (Or, if the stack
/// is empty, a new value is created.)
///
/// If the pool has no owner, then this will set the owner.
#[cold]
fn get_slow(&self, caller: usize, owner: usize) -> PoolGuard<'_, T> {
use std::sync::atomic::Ordering::Relaxed;
if owner == 0 {
// The sentinel 0 value means this pool is not yet owned. We
// try to atomically set the owner. If we do, then this thread
// becomes the owner and we can return a guard that represents
// the special T for the owner.
let res = self.owner.compare_exchange(0, caller, Relaxed, Relaxed);
if res.is_ok() {
return self.guard_owned();
}
}
let mut stack = self.stack.lock().unwrap();
let value = match stack.pop() {
None => Box::new((self.create)()),
Some(value) => value,
};
self.guard_stack(value)
}
/// Puts a value back into the pool. Callers don't need to call this. Once
/// the guard that's returned by 'get' is dropped, it is put back into the
/// pool automatically.
fn put(&self, value: Box<T>) {
let mut stack = self.stack.lock().unwrap();
stack.push(value);
}
/// Create a guard that represents the special owned T.
fn guard_owned(&self) -> PoolGuard<'_, T> {
PoolGuard { pool: self, value: None }
}
/// Create a guard that contains a value from the pool's stack.
fn guard_stack(&self, value: Box<T>) -> PoolGuard<'_, T> {
PoolGuard { pool: self, value: Some(value) }
}
}
impl<'a, T: Send> PoolGuard<'a, T> {
/// Return the underlying value.
pub fn value(&self) -> &T {
match self.value {
None => &self.pool.owner_val,
Some(ref v) => &**v,
}
}
}
impl<'a, T: Send> Drop for PoolGuard<'a, T> {
#[cfg_attr(feature = "perf-inline", inline(always))]
fn drop(&mut self) {
if let Some(value) = self.value.take() {
self.pool.put(value);
}
}
}
#[cfg(test)]
mod tests {
use std::panic::{RefUnwindSafe, UnwindSafe};
use super::*;
#[test]
fn oibits() {
use crate::exec::ProgramCache;
fn has_oibits<T: Send + Sync + UnwindSafe + RefUnwindSafe>() {}
has_oibits::<Pool<ProgramCache>>();
}
// Tests that Pool implements the "single owner" optimization. That is, the
// thread that first accesses the pool gets its own copy, while all other
// threads get distinct copies.
#[test]
fn thread_owner_optimization() {
use std::cell::RefCell;
use std::sync::Arc;
let pool: Arc<Pool<RefCell<Vec<char>>>> =
Arc::new(Pool::new(Box::new(|| RefCell::new(vec!['a']))));
pool.get().value().borrow_mut().push('x');
let pool1 = pool.clone();
let t1 = std::thread::spawn(move || {
let guard = pool1.get();
let v = guard.value();
v.borrow_mut().push('y');
});
let pool2 = pool.clone();
let t2 = std::thread::spawn(move || {
let guard = pool2.get();
let v = guard.value();
v.borrow_mut().push('z');
});
t1.join().unwrap();
t2.join().unwrap();
// If we didn't implement the single owner optimization, then one of
// the threads above is likely to have mutated the [a, x] vec that
// we stuffed in the pool before spawning the threads. But since
// neither thread was first to access the pool, and because of the
// optimization, we should be guaranteed that neither thread mutates
// the special owned pool value.
//
// (Technically this is an implementation detail and not a contract of
// Pool's API.)
assert_eq!(vec!['a', 'x'], *pool.get().value().borrow());
}
}

25
third_party/rust/regex/src/prog.rs поставляемый
Просмотреть файл

@ -6,8 +6,8 @@ use std::ops::Deref;
use std::slice;
use std::sync::Arc;
use input::Char;
use literal::LiteralSearcher;
use crate::input::Char;
use crate::literal::LiteralSearcher;
/// `InstPtr` represents the index of an instruction in a regex program.
pub type InstPtr = usize;
@ -168,7 +168,7 @@ impl Deref for Program {
}
impl fmt::Debug for Program {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use self::Inst::*;
fn with_goto(cur: usize, goto: usize, fmtd: String) -> String {
@ -259,8 +259,8 @@ impl<'a> IntoIterator for &'a Program {
///
/// Other than the benefit of moving invariants into the type system, another
/// benefit is the decreased size. If we remove the `Char` and `Ranges`
/// instructions from the `Inst` enum, then its size shrinks from 40 bytes to
/// 24 bytes. (This is because of the removal of a `Vec` in the `Ranges`
/// instructions from the `Inst` enum, then its size shrinks from 32 bytes to
/// 24 bytes. (This is because of the removal of a `Box<[]>` in the `Ranges`
/// variant.) Given that byte based machines are typically much bigger than
/// their Unicode analogues (because they can decode UTF-8 directly), this ends
/// up being a pretty significant savings.
@ -374,7 +374,7 @@ pub struct InstRanges {
/// succeeds.
pub goto: InstPtr,
/// The set of Unicode scalar value ranges to test.
pub ranges: Vec<(char, char)>,
pub ranges: Box<[(char, char)]>,
}
impl InstRanges {
@ -432,3 +432,16 @@ impl InstBytes {
self.start <= byte && byte <= self.end
}
}
#[cfg(test)]
mod test {
#[test]
#[cfg(target_pointer_width = "64")]
fn test_size_of_inst() {
use std::mem::size_of;
use super::Inst;
assert_eq!(32, size_of::<Inst>());
}
}

14
third_party/rust/regex/src/re_builder.rs поставляемый
Просмотреть файл

@ -37,16 +37,17 @@ macro_rules! define_builder {
($name:ident, $regex_mod:ident, $only_utf8:expr) => {
pub mod $name {
use super::RegexOptions;
use error::Error;
use exec::ExecBuilder;
use crate::error::Error;
use crate::exec::ExecBuilder;
use $regex_mod::Regex;
use crate::$regex_mod::Regex;
/// A configurable builder for a regular expression.
///
/// A builder can be used to configure how the regex is built, for example, by
/// setting the default flags (which can be overridden in the expression
/// itself) or setting various limits.
#[derive(Debug)]
pub struct RegexBuilder(RegexOptions);
impl RegexBuilder {
@ -234,16 +235,17 @@ macro_rules! define_set_builder {
($name:ident, $regex_mod:ident, $only_utf8:expr) => {
pub mod $name {
use super::RegexOptions;
use error::Error;
use exec::ExecBuilder;
use crate::error::Error;
use crate::exec::ExecBuilder;
use re_set::$regex_mod::RegexSet;
use crate::re_set::$regex_mod::RegexSet;
/// A configurable builder for a set of regular expressions.
///
/// A builder can be used to configure how the regexes are built, for example,
/// by setting the default flags (which can be overridden in the expression
/// itself) or setting various limits.
#[derive(Debug)]
pub struct RegexSetBuilder(RegexOptions);
impl RegexSetBuilder {

179
third_party/rust/regex/src/re_bytes.rs поставляемый
Просмотреть файл

@ -1,17 +1,18 @@
use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt;
use std::iter::FusedIterator;
use std::ops::{Index, Range};
use std::str::FromStr;
use std::sync::Arc;
use find_byte::find_byte;
use crate::find_byte::find_byte;
use error::Error;
use exec::{Exec, ExecNoSync};
use expand::expand_bytes;
use re_builder::bytes::RegexBuilder;
use re_trait::{self, RegularExpression, SubCapturesPosIter};
use crate::error::Error;
use crate::exec::{Exec, ExecNoSync};
use crate::expand::expand_bytes;
use crate::re_builder::bytes::RegexBuilder;
use crate::re_trait::{self, RegularExpression, SubCapturesPosIter};
/// Match represents a single match of a regex in a haystack.
///
@ -78,14 +79,14 @@ pub struct Regex(Exec);
impl fmt::Display for Regex {
/// Shows the original regular expression.
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.as_str())
}
}
impl fmt::Debug for Regex {
/// Shows the original regular expression.
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self, f)
}
}
@ -119,7 +120,8 @@ impl Regex {
RegexBuilder::new(re).build()
}
/// Returns true if and only if the regex matches the string given.
/// Returns true if and only if there is a match for the regex in the
/// string given.
///
/// It is recommended to use this method if all you need to do is test
/// a match, since the underlying matching engine may be able to do less
@ -131,7 +133,7 @@ impl Regex {
/// bytes:
///
/// ```rust
/// # extern crate regex; use regex::bytes::Regex;
/// # use regex::bytes::Regex;
/// # fn main() {
/// let text = b"I categorically deny having triskaidekaphobia.";
/// assert!(Regex::new(r"\b\w{13}\b").unwrap().is_match(text));
@ -154,7 +156,7 @@ impl Regex {
/// ASCII word bytes:
///
/// ```rust
/// # extern crate regex; use regex::bytes::Regex;
/// # use regex::bytes::Regex;
/// # fn main() {
/// let text = b"I categorically deny having triskaidekaphobia.";
/// let mat = Regex::new(r"\b\w{13}\b").unwrap().find(text).unwrap();
@ -175,7 +177,7 @@ impl Regex {
/// word bytes:
///
/// ```rust
/// # extern crate regex; use regex::bytes::Regex;
/// # use regex::bytes::Regex;
/// # fn main() {
/// let text = b"Retroactively relinquishing remunerations is reprehensible.";
/// for mat in Regex::new(r"\b\w{13}\b").unwrap().find_iter(text) {
@ -203,7 +205,7 @@ impl Regex {
/// year separately.
///
/// ```rust
/// # extern crate regex; use regex::bytes::Regex;
/// # use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap();
/// let text = b"Not my favorite movie: 'Citizen Kane' (1941).";
@ -225,7 +227,7 @@ impl Regex {
/// We can make this example a bit clearer by using *named* capture groups:
///
/// ```rust
/// # extern crate regex; use regex::bytes::Regex;
/// # use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")
/// .unwrap();
@ -269,7 +271,7 @@ impl Regex {
/// some text, where the movie is formatted like "'Title' (xxxx)":
///
/// ```rust
/// # extern crate regex; use std::str; use regex::bytes::Regex;
/// # use std::str; use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")
/// .unwrap();
@ -303,7 +305,7 @@ impl Regex {
/// To split a string delimited by arbitrary amounts of spaces or tabs:
///
/// ```rust
/// # extern crate regex; use regex::bytes::Regex;
/// # use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new(r"[ \t]+").unwrap();
/// let fields: Vec<&[u8]> = re.split(b"a b \t c\td e").collect();
@ -329,7 +331,7 @@ impl Regex {
/// Get the first two words in some text:
///
/// ```rust
/// # extern crate regex; use regex::bytes::Regex;
/// # use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new(r"\W+").unwrap();
/// let fields: Vec<&[u8]> = re.splitn(b"Hey! How are you?", 3).collect();
@ -377,7 +379,7 @@ impl Regex {
/// In typical usage, this can just be a normal byte string:
///
/// ```rust
/// # extern crate regex; use regex::bytes::Regex;
/// # use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new("[^01]+").unwrap();
/// assert_eq!(re.replace(b"1078910", &b""[..]), &b"1010"[..]);
@ -390,7 +392,7 @@ impl Regex {
/// group matches easily:
///
/// ```rust
/// # extern crate regex; use regex::bytes::Regex;
/// # use regex::bytes::Regex;
/// # use regex::bytes::Captures; fn main() {
/// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap();
/// let result = re.replace(b"Springsteen, Bruce", |caps: &Captures| {
@ -409,7 +411,7 @@ impl Regex {
/// with named capture groups:
///
/// ```rust
/// # extern crate regex; use regex::bytes::Regex;
/// # use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap();
/// let result = re.replace(b"Springsteen, Bruce", &b"$first $last"[..]);
@ -426,7 +428,7 @@ impl Regex {
/// underscore:
///
/// ```rust
/// # extern crate regex; use regex::bytes::Regex;
/// # use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap();
/// let result = re.replace(b"deep fried", &b"${first}_$second"[..]);
@ -443,7 +445,7 @@ impl Regex {
/// byte string with `NoExpand`:
///
/// ```rust
/// # extern crate regex; use regex::bytes::Regex;
/// # use regex::bytes::Regex;
/// # fn main() {
/// use regex::bytes::NoExpand;
///
@ -544,7 +546,7 @@ impl Regex {
/// `a`.
///
/// ```rust
/// # extern crate regex; use regex::bytes::Regex;
/// # use regex::bytes::Regex;
/// # fn main() {
/// let text = b"aaaaa";
/// let pos = Regex::new(r"a+").unwrap().shortest_match(text);
@ -656,7 +658,7 @@ impl Regex {
}
/// Returns an iterator over the capture names.
pub fn capture_names(&self) -> CaptureNames {
pub fn capture_names(&self) -> CaptureNames<'_> {
CaptureNames(self.0.capture_names().iter())
}
@ -689,6 +691,7 @@ impl Regex {
///
/// `'r` is the lifetime of the compiled regular expression and `'t` is the
/// lifetime of the matched byte string.
#[derive(Debug)]
pub struct Matches<'r, 't>(re_trait::Matches<'t, ExecNoSync<'r>>);
impl<'r, 't> Iterator for Matches<'r, 't> {
@ -700,6 +703,8 @@ impl<'r, 't> Iterator for Matches<'r, 't> {
}
}
impl<'r, 't> FusedIterator for Matches<'r, 't> {}
/// An iterator that yields all non-overlapping capture groups matching a
/// particular regular expression.
///
@ -707,6 +712,7 @@ impl<'r, 't> Iterator for Matches<'r, 't> {
///
/// `'r` is the lifetime of the compiled regular expression and `'t` is the
/// lifetime of the matched byte string.
#[derive(Debug)]
pub struct CaptureMatches<'r, 't>(
re_trait::CaptureMatches<'t, ExecNoSync<'r>>,
);
@ -723,10 +729,13 @@ impl<'r, 't> Iterator for CaptureMatches<'r, 't> {
}
}
impl<'r, 't> FusedIterator for CaptureMatches<'r, 't> {}
/// Yields all substrings delimited by a regular expression match.
///
/// `'r` is the lifetime of the compiled regular expression and `'t` is the
/// lifetime of the byte string being split.
#[derive(Debug)]
pub struct Split<'r, 't> {
finder: Matches<'r, 't>,
last: usize,
@ -756,12 +765,15 @@ impl<'r, 't> Iterator for Split<'r, 't> {
}
}
impl<'r, 't> FusedIterator for Split<'r, 't> {}
/// Yields at most `N` substrings delimited by a regular expression match.
///
/// The last substring will be whatever remains after splitting.
///
/// `'r` is the lifetime of the compiled regular expression and `'t` is the
/// lifetime of the byte string being split.
#[derive(Debug)]
pub struct SplitN<'r, 't> {
splits: Split<'r, 't>,
n: usize,
@ -789,14 +801,21 @@ impl<'r, 't> Iterator for SplitN<'r, 't> {
Some(&text[self.splits.last..])
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
(0, Some(self.n))
}
}
impl<'r, 't> FusedIterator for SplitN<'r, 't> {}
/// An iterator over the names of all possible captures.
///
/// `None` indicates an unnamed capture; the first element (capture 0, the
/// whole matched region) is always unnamed.
///
/// `'r` is the lifetime of the compiled regular expression.
#[derive(Clone, Debug)]
pub struct CaptureNames<'r>(::std::slice::Iter<'r, Option<String>>);
impl<'r> Iterator for CaptureNames<'r> {
@ -812,8 +831,16 @@ impl<'r> Iterator for CaptureNames<'r> {
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
fn count(self) -> usize {
self.0.count()
}
}
impl<'r> ExactSizeIterator for CaptureNames<'r> {}
impl<'r> FusedIterator for CaptureNames<'r> {}
/// CaptureLocations is a low level representation of the raw offsets of each
/// submatch.
///
@ -930,17 +957,22 @@ impl<'t> Captures<'t> {
/// Expands all instances of `$name` in `replacement` to the corresponding
/// capture group `name`, and writes them to the `dst` buffer given.
///
/// `name` may be an integer corresponding to the index of the
/// capture group (counted by order of opening parenthesis where `0` is the
/// `name` may be an integer corresponding to the index of the capture
/// group (counted by order of opening parenthesis where `0` is the
/// entire match) or it can be a name (consisting of letters, digits or
/// underscores) corresponding to a named capture group.
///
/// If `name` isn't a valid capture group (whether the name doesn't exist
/// or isn't a valid index), then it is replaced with the empty string.
///
/// The longest possible name is used. e.g., `$1a` looks up the capture
/// group named `1a` and not the capture group at index `1`. To exert more
/// precise control over the name, use braces, e.g., `${1}a`.
/// The longest possible name consisting of the characters `[_0-9A-Za-z]`
/// is used. e.g., `$1a` looks up the capture group named `1a` and not the
/// capture group at index `1`. To exert more precise control over the
/// name, or to refer to a capture group name that uses characters outside
/// of `[_0-9A-Za-z]`, use braces, e.g., `${1}a` or `${foo[bar].baz}`. When
/// using braces, any sequence of valid UTF-8 bytes is permitted. If the
/// sequence does not refer to a capture group name in the corresponding
/// regex, then it is replaced with an empty string.
///
/// To write a literal `$` use `$$`.
pub fn expand(&self, replacement: &[u8], dst: &mut Vec<u8>) {
@ -958,15 +990,15 @@ impl<'t> Captures<'t> {
}
impl<'t> fmt::Debug for Captures<'t> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_tuple("Captures").field(&CapturesDebug(self)).finish()
}
}
struct CapturesDebug<'c, 't: 'c>(&'c Captures<'t>);
struct CapturesDebug<'c, 't>(&'c Captures<'t>);
impl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fn escape_bytes(bytes: &[u8]) -> String {
let mut s = String::new();
for &b in bytes {
@ -1051,7 +1083,8 @@ impl<'t, 'i> Index<&'i str> for Captures<'t> {
///
/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and
/// the lifetime `'t` corresponds to the originally matched text.
pub struct SubCaptureMatches<'c, 't: 'c> {
#[derive(Clone, Debug)]
pub struct SubCaptureMatches<'c, 't> {
caps: &'c Captures<'t>,
it: SubCapturesPosIter<'c>,
}
@ -1066,13 +1099,15 @@ impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> {
}
}
impl<'c, 't> FusedIterator for SubCaptureMatches<'c, 't> {}
/// Replacer describes types that can be used to replace matches in a byte
/// string.
///
/// In general, users of this crate shouldn't need to implement this trait,
/// since implementations are already provided for `&[u8]` and
/// `FnMut(&Captures) -> Vec<u8>` (or any `FnMut(&Captures) -> T`
/// where `T: AsRef<[u8]>`), which covers most use cases.
/// since implementations are already provided for `&[u8]` along with other
/// variants of bytes types and `FnMut(&Captures) -> Vec<u8>` (or any
/// `FnMut(&Captures) -> T` where `T: AsRef<[u8]>`), which covers most use cases.
pub trait Replacer {
/// Appends text to `dst` to replace the current match.
///
@ -1081,7 +1116,7 @@ pub trait Replacer {
///
/// For example, a no-op replacement would be
/// `dst.extend(&caps[0])`.
fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>);
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>);
/// Return a fixed unchanging replacement byte string.
///
@ -1124,10 +1159,10 @@ pub trait Replacer {
///
/// Returned by [`Replacer::by_ref`](trait.Replacer.html#method.by_ref).
#[derive(Debug)]
pub struct ReplacerRef<'a, R: ?Sized + 'a>(&'a mut R);
pub struct ReplacerRef<'a, R: ?Sized>(&'a mut R);
impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> {
fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
self.0.replace_append(caps, dst)
}
fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> {
@ -1136,24 +1171,69 @@ impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> {
}
impl<'a> Replacer for &'a [u8] {
fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
caps.expand(*self, dst);
}
fn no_expansion(&mut self) -> Option<Cow<[u8]>> {
match find_byte(b'$', *self) {
Some(_) => None,
None => Some(Cow::Borrowed(*self)),
}
fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
no_expansion(self)
}
}
impl<'a> Replacer for &'a Vec<u8> {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
caps.expand(*self, dst);
}
fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
no_expansion(self)
}
}
impl Replacer for Vec<u8> {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
caps.expand(self, dst);
}
fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
no_expansion(self)
}
}
impl<'a> Replacer for Cow<'a, [u8]> {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
caps.expand(self.as_ref(), dst);
}
fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
no_expansion(self)
}
}
impl<'a> Replacer for &'a Cow<'a, [u8]> {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
caps.expand(self.as_ref(), dst);
}
fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
no_expansion(self)
}
}
fn no_expansion<T: AsRef<[u8]>>(t: &T) -> Option<Cow<'_, [u8]>> {
let s = t.as_ref();
match find_byte(b'$', s) {
Some(_) => None,
None => Some(Cow::Borrowed(s)),
}
}
impl<F, T> Replacer for F
where
F: FnMut(&Captures) -> T,
F: FnMut(&Captures<'_>) -> T,
T: AsRef<[u8]>,
{
fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
dst.extend_from_slice((*self)(caps).as_ref());
}
}
@ -1166,14 +1246,15 @@ where
/// and performant (since capture groups don't need to be found).
///
/// `'t` is the lifetime of the literal text.
#[derive(Clone, Debug)]
pub struct NoExpand<'t>(pub &'t [u8]);
impl<'t> Replacer for NoExpand<'t> {
fn replace_append(&mut self, _: &Captures, dst: &mut Vec<u8>) {
fn replace_append(&mut self, _: &Captures<'_>, dst: &mut Vec<u8>) {
dst.extend_from_slice(self.0);
}
fn no_expansion(&mut self) -> Option<Cow<[u8]>> {
fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
Some(Cow::Borrowed(self.0))
}
}

39
third_party/rust/regex/src/re_set.rs поставляемый
Просмотреть файл

@ -7,10 +7,10 @@ macro_rules! define_set {
use std::slice;
use std::vec;
use error::Error;
use exec::Exec;
use re_builder::$builder_mod::RegexSetBuilder;
use re_trait::RegularExpression;
use crate::error::Error;
use crate::exec::Exec;
use crate::re_builder::$builder_mod::RegexSetBuilder;
use crate::re_trait::RegularExpression;
/// Match multiple (possibly overlapping) regular expressions in a single scan.
///
@ -43,7 +43,7 @@ $(#[$doc_regexset_example])*
/// Note that it would be possible to adapt the above example to using `Regex`
/// with an expression like:
///
/// ```ignore
/// ```text
/// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com|org|net)))|(?P<domain>[a-z]+[.](com|org|net))
/// ```
///
@ -96,6 +96,19 @@ impl RegexSet {
RegexSetBuilder::new(exprs).build()
}
/// Create a new empty regex set.
///
/// # Example
///
/// ```rust
/// # use regex::RegexSet;
/// let set = RegexSet::empty();
/// assert!(set.is_empty());
/// ```
pub fn empty() -> RegexSet {
RegexSetBuilder::new(&[""; 0]).build().unwrap()
}
/// Returns true if and only if one of the regexes in this set matches
/// the text given.
///
@ -207,6 +220,11 @@ impl RegexSet {
self.0.regex_strings().len()
}
/// Returns `true` if this set contains no regular expressions.
pub fn is_empty(&self) -> bool {
self.0.regex_strings().is_empty()
}
/// Returns the patterns that this set will match on.
///
/// This function can be used to determine the pattern for a match. The
@ -274,7 +292,7 @@ impl SetMatches {
/// This will always produces matches in ascending order of index, where
/// the index corresponds to the index of the regex that matched with
/// respect to its position when initially building the set.
pub fn iter(&self) -> SetMatchesIter {
pub fn iter(&self) -> SetMatchesIter<'_> {
SetMatchesIter((&*self.matches).into_iter().enumerate())
}
}
@ -302,6 +320,7 @@ impl<'a> IntoIterator for &'a SetMatches {
/// This will always produces matches in ascending order of index, where the
/// index corresponds to the index of the regex that matched with respect to
/// its position when initially building the set.
#[derive(Debug)]
pub struct SetMatchesIntoIter(iter::Enumerate<vec::IntoIter<bool>>);
impl Iterator for SetMatchesIntoIter {
@ -334,6 +353,8 @@ impl DoubleEndedIterator for SetMatchesIntoIter {
}
}
impl iter::FusedIterator for SetMatchesIntoIter {}
/// A borrowed iterator over the set of matches from a regex set.
///
/// The lifetime `'a` refers to the lifetime of a `SetMatches` value.
@ -341,7 +362,7 @@ impl DoubleEndedIterator for SetMatchesIntoIter {
/// This will always produces matches in ascending order of index, where the
/// index corresponds to the index of the regex that matched with respect to
/// its position when initially building the set.
#[derive(Clone)]
#[derive(Clone, Debug)]
pub struct SetMatchesIter<'a>(iter::Enumerate<slice::Iter<'a, bool>>);
impl<'a> Iterator for SetMatchesIter<'a> {
@ -374,6 +395,8 @@ impl<'a> DoubleEndedIterator for SetMatchesIter<'a> {
}
}
impl<'a> iter::FusedIterator for SetMatchesIter<'a> {}
#[doc(hidden)]
impl From<Exec> for RegexSet {
fn from(exec: Exec) -> Self {
@ -382,7 +405,7 @@ impl From<Exec> for RegexSet {
}
impl fmt::Debug for RegexSet {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "RegexSet({:?})", self.0.regex_strings())
}
}

32
third_party/rust/regex/src/re_trait.rs поставляемый
Просмотреть файл

@ -1,3 +1,6 @@
use std::fmt;
use std::iter::FusedIterator;
/// Slot is a single saved capture location. Note that there are two slots for
/// every capture in a regular expression (one slot each for the start and end
/// of the capture).
@ -27,7 +30,7 @@ impl Locations {
/// Creates an iterator of all the capture group positions in order of
/// appearance in the regular expression. Positions are byte indices
/// in terms of the original string matched.
pub fn iter(&self) -> SubCapturesPosIter {
pub fn iter(&self) -> SubCapturesPosIter<'_> {
SubCapturesPosIter { idx: 0, locs: self }
}
@ -51,6 +54,7 @@ impl Locations {
/// Positions are byte indices in terms of the original string matched.
///
/// `'c` is the lifetime of the captures.
#[derive(Clone, Debug)]
pub struct SubCapturesPosIter<'c> {
idx: usize,
locs: &'c Locations,
@ -72,6 +76,8 @@ impl<'c> Iterator for SubCapturesPosIter<'c> {
}
}
impl<'c> FusedIterator for SubCapturesPosIter<'c> {}
/// `RegularExpression` describes types that can implement regex searching.
///
/// This trait is my attempt at reducing code duplication and to standardize
@ -84,9 +90,9 @@ impl<'c> Iterator for SubCapturesPosIter<'c> {
/// somewhat reasonable. One particular thing this trait would expose would be
/// the ability to start the search of a regex anywhere in a haystack, which
/// isn't possible in the current public API.
pub trait RegularExpression: Sized {
pub trait RegularExpression: Sized + fmt::Debug {
/// The type of the haystack.
type Text: ?Sized;
type Text: ?Sized + fmt::Debug;
/// The number of capture slots in the compiled regular expression. This is
/// always two times the number of capture groups (two slots per group).
@ -132,18 +138,19 @@ pub trait RegularExpression: Sized {
/// Returns an iterator over all non-overlapping successive leftmost-first
/// matches.
fn find_iter(self, text: &Self::Text) -> Matches<Self> {
fn find_iter(self, text: &Self::Text) -> Matches<'_, Self> {
Matches { re: self, text: text, last_end: 0, last_match: None }
}
/// Returns an iterator over all non-overlapping successive leftmost-first
/// matches with captures.
fn captures_iter(self, text: &Self::Text) -> CaptureMatches<Self> {
fn captures_iter(self, text: &Self::Text) -> CaptureMatches<'_, Self> {
CaptureMatches(self.find_iter(text))
}
}
/// An iterator over all non-overlapping successive leftmost-first matches.
#[derive(Debug)]
pub struct Matches<'t, R>
where
R: RegularExpression,
@ -204,8 +211,16 @@ where
}
}
impl<'t, R> FusedIterator for Matches<'t, R>
where
R: RegularExpression,
R::Text: 't + AsRef<[u8]>,
{
}
/// An iterator over all non-overlapping successive leftmost-first matches with
/// captures.
#[derive(Debug)]
pub struct CaptureMatches<'t, R>(Matches<'t, R>)
where
R: RegularExpression,
@ -259,3 +274,10 @@ where
Some(locs)
}
}
impl<'t, R> FusedIterator for CaptureMatches<'t, R>
where
R: RegularExpression,
R::Text: 't + AsRef<[u8]>,
{
}

186
third_party/rust/regex/src/re_unicode.rs поставляемый
Просмотреть файл

@ -1,25 +1,25 @@
use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt;
use std::iter::FusedIterator;
use std::ops::{Index, Range};
use std::str::FromStr;
use std::sync::Arc;
use find_byte::find_byte;
use syntax;
use crate::find_byte::find_byte;
use error::Error;
use exec::{Exec, ExecNoSyncStr};
use expand::expand_str;
use re_builder::unicode::RegexBuilder;
use re_trait::{self, RegularExpression, SubCapturesPosIter};
use crate::error::Error;
use crate::exec::{Exec, ExecNoSyncStr};
use crate::expand::expand_str;
use crate::re_builder::unicode::RegexBuilder;
use crate::re_trait::{self, RegularExpression, SubCapturesPosIter};
/// Escapes all regular expression meta characters in `text`.
///
/// The string returned may be safely used as a literal in a regular
/// expression.
pub fn escape(text: &str) -> String {
syntax::escape(text)
regex_syntax::escape(text)
}
/// Match represents a single match of a regex in a haystack.
@ -137,14 +137,14 @@ pub struct Regex(Exec);
impl fmt::Display for Regex {
/// Shows the original regular expression.
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.as_str())
}
}
impl fmt::Debug for Regex {
/// Shows the original regular expression.
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self, f)
}
}
@ -175,7 +175,8 @@ impl Regex {
RegexBuilder::new(re).build()
}
/// Returns true if and only if the regex matches the string given.
/// Returns true if and only if there is a match for the regex in the
/// string given.
///
/// It is recommended to use this method if all you need to do is test
/// a match, since the underlying matching engine may be able to do less
@ -187,7 +188,7 @@ impl Regex {
/// Unicode word characters:
///
/// ```rust
/// # extern crate regex; use regex::Regex;
/// # use regex::Regex;
/// # fn main() {
/// let text = "I categorically deny having triskaidekaphobia.";
/// assert!(Regex::new(r"\b\w{13}\b").unwrap().is_match(text));
@ -210,7 +211,7 @@ impl Regex {
/// Unicode word characters:
///
/// ```rust
/// # extern crate regex; use regex::Regex;
/// # use regex::Regex;
/// # fn main() {
/// let text = "I categorically deny having triskaidekaphobia.";
/// let mat = Regex::new(r"\b\w{13}\b").unwrap().find(text).unwrap();
@ -232,7 +233,7 @@ impl Regex {
/// word characters:
///
/// ```rust
/// # extern crate regex; use regex::Regex;
/// # use regex::Regex;
/// # fn main() {
/// let text = "Retroactively relinquishing remunerations is reprehensible.";
/// for mat in Regex::new(r"\b\w{13}\b").unwrap().find_iter(text) {
@ -260,7 +261,7 @@ impl Regex {
/// year separately.
///
/// ```rust
/// # extern crate regex; use regex::Regex;
/// # use regex::Regex;
/// # fn main() {
/// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap();
/// let text = "Not my favorite movie: 'Citizen Kane' (1941).";
@ -282,7 +283,7 @@ impl Regex {
/// We can make this example a bit clearer by using *named* capture groups:
///
/// ```rust
/// # extern crate regex; use regex::Regex;
/// # use regex::Regex;
/// # fn main() {
/// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")
/// .unwrap();
@ -326,7 +327,7 @@ impl Regex {
/// some text, where the movie is formatted like "'Title' (xxxx)":
///
/// ```rust
/// # extern crate regex; use regex::Regex;
/// # use regex::Regex;
/// # fn main() {
/// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")
/// .unwrap();
@ -359,7 +360,7 @@ impl Regex {
/// To split a string delimited by arbitrary amounts of spaces or tabs:
///
/// ```rust
/// # extern crate regex; use regex::Regex;
/// # use regex::Regex;
/// # fn main() {
/// let re = Regex::new(r"[ \t]+").unwrap();
/// let fields: Vec<&str> = re.split("a b \t c\td e").collect();
@ -383,7 +384,7 @@ impl Regex {
/// Get the first two words in some text:
///
/// ```rust
/// # extern crate regex; use regex::Regex;
/// # use regex::Regex;
/// # fn main() {
/// let re = Regex::new(r"\W+").unwrap();
/// let fields: Vec<&str> = re.splitn("Hey! How are you?", 3).collect();
@ -430,7 +431,7 @@ impl Regex {
/// In typical usage, this can just be a normal string:
///
/// ```rust
/// # extern crate regex; use regex::Regex;
/// # use regex::Regex;
/// # fn main() {
/// let re = Regex::new("[^01]+").unwrap();
/// assert_eq!(re.replace("1078910", ""), "1010");
@ -443,7 +444,7 @@ impl Regex {
/// capturing group matches easily:
///
/// ```rust
/// # extern crate regex; use regex::Regex;
/// # use regex::Regex;
/// # use regex::Captures; fn main() {
/// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap();
/// let result = re.replace("Springsteen, Bruce", |caps: &Captures| {
@ -459,7 +460,7 @@ impl Regex {
/// with named capture groups:
///
/// ```rust
/// # extern crate regex; use regex::Regex;
/// # use regex::Regex;
/// # fn main() {
/// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap();
/// let result = re.replace("Springsteen, Bruce", "$first $last");
@ -476,7 +477,7 @@ impl Regex {
/// underscore:
///
/// ```rust
/// # extern crate regex; use regex::Regex;
/// # use regex::Regex;
/// # fn main() {
/// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap();
/// let result = re.replace("deep fried", "${first}_$second");
@ -493,7 +494,7 @@ impl Regex {
/// byte string with `NoExpand`:
///
/// ```rust
/// # extern crate regex; use regex::Regex;
/// # use regex::Regex;
/// # fn main() {
/// use regex::NoExpand;
///
@ -603,7 +604,7 @@ impl Regex {
/// `a`.
///
/// ```rust
/// # extern crate regex; use regex::Regex;
/// # use regex::Regex;
/// # fn main() {
/// let text = "aaaaa";
/// let pos = Regex::new(r"a+").unwrap().shortest_match(text);
@ -715,7 +716,7 @@ impl Regex {
}
/// Returns an iterator over the capture names.
pub fn capture_names(&self) -> CaptureNames {
pub fn capture_names(&self) -> CaptureNames<'_> {
CaptureNames(self.0.capture_names().iter())
}
@ -746,6 +747,7 @@ impl Regex {
/// whole matched region) is always unnamed.
///
/// `'r` is the lifetime of the compiled regular expression.
#[derive(Clone, Debug)]
pub struct CaptureNames<'r>(::std::slice::Iter<'r, Option<String>>);
impl<'r> Iterator for CaptureNames<'r> {
@ -761,12 +763,21 @@ impl<'r> Iterator for CaptureNames<'r> {
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
fn count(self) -> usize {
self.0.count()
}
}
impl<'r> ExactSizeIterator for CaptureNames<'r> {}
impl<'r> FusedIterator for CaptureNames<'r> {}
/// Yields all substrings delimited by a regular expression match.
///
/// `'r` is the lifetime of the compiled regular expression and `'t` is the
/// lifetime of the string being split.
#[derive(Debug)]
pub struct Split<'r, 't> {
finder: Matches<'r, 't>,
last: usize,
@ -796,12 +807,15 @@ impl<'r, 't> Iterator for Split<'r, 't> {
}
}
impl<'r, 't> FusedIterator for Split<'r, 't> {}
/// Yields at most `N` substrings delimited by a regular expression match.
///
/// The last substring will be whatever remains after splitting.
///
/// `'r` is the lifetime of the compiled regular expression and `'t` is the
/// lifetime of the string being split.
#[derive(Debug)]
pub struct SplitN<'r, 't> {
splits: Split<'r, 't>,
n: usize,
@ -829,8 +843,14 @@ impl<'r, 't> Iterator for SplitN<'r, 't> {
Some(&text[self.splits.last..])
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
(0, Some(self.n))
}
}
impl<'r, 't> FusedIterator for SplitN<'r, 't> {}
/// CaptureLocations is a low level representation of the raw offsets of each
/// submatch.
///
@ -947,17 +967,22 @@ impl<'t> Captures<'t> {
/// Expands all instances of `$name` in `replacement` to the corresponding
/// capture group `name`, and writes them to the `dst` buffer given.
///
/// `name` may be an integer corresponding to the index of the
/// capture group (counted by order of opening parenthesis where `0` is the
/// `name` may be an integer corresponding to the index of the capture
/// group (counted by order of opening parenthesis where `0` is the
/// entire match) or it can be a name (consisting of letters, digits or
/// underscores) corresponding to a named capture group.
///
/// If `name` isn't a valid capture group (whether the name doesn't exist
/// or isn't a valid index), then it is replaced with the empty string.
///
/// The longest possible name is used. e.g., `$1a` looks up the capture
/// group named `1a` and not the capture group at index `1`. To exert more
/// precise control over the name, use braces, e.g., `${1}a`.
/// The longest possible name consisting of the characters `[_0-9A-Za-z]`
/// is used. e.g., `$1a` looks up the capture group named `1a` and not the
/// capture group at index `1`. To exert more precise control over the
/// name, or to refer to a capture group name that uses characters outside
/// of `[_0-9A-Za-z]`, use braces, e.g., `${1}a` or `${foo[bar].baz}`. When
/// using braces, any sequence of characters is permitted. If the sequence
/// does not refer to a capture group name in the corresponding regex, then
/// it is replaced with an empty string.
///
/// To write a literal `$` use `$$`.
pub fn expand(&self, replacement: &str, dst: &mut String) {
@ -975,15 +1000,15 @@ impl<'t> Captures<'t> {
}
impl<'t> fmt::Debug for Captures<'t> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_tuple("Captures").field(&CapturesDebug(self)).finish()
}
}
struct CapturesDebug<'c, 't: 'c>(&'c Captures<'t>);
struct CapturesDebug<'c, 't>(&'c Captures<'t>);
impl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// We'd like to show something nice here, even if it means an
// allocation to build a reverse index.
let slot_to_name: HashMap<&usize, &String> =
@ -1053,7 +1078,8 @@ impl<'t, 'i> Index<&'i str> for Captures<'t> {
///
/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and
/// the lifetime `'t` corresponds to the originally matched text.
pub struct SubCaptureMatches<'c, 't: 'c> {
#[derive(Clone, Debug)]
pub struct SubCaptureMatches<'c, 't> {
caps: &'c Captures<'t>,
it: SubCapturesPosIter<'c>,
}
@ -1068,6 +1094,8 @@ impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> {
}
}
impl<'c, 't> FusedIterator for SubCaptureMatches<'c, 't> {}
/// An iterator that yields all non-overlapping capture groups matching a
/// particular regular expression.
///
@ -1075,6 +1103,7 @@ impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> {
///
/// `'r` is the lifetime of the compiled regular expression and `'t` is the
/// lifetime of the matched string.
#[derive(Debug)]
pub struct CaptureMatches<'r, 't>(
re_trait::CaptureMatches<'t, ExecNoSyncStr<'r>>,
);
@ -1091,6 +1120,8 @@ impl<'r, 't> Iterator for CaptureMatches<'r, 't> {
}
}
impl<'r, 't> FusedIterator for CaptureMatches<'r, 't> {}
/// An iterator over all non-overlapping matches for a particular string.
///
/// The iterator yields a `Match` value. The iterator stops when no more
@ -1098,6 +1129,7 @@ impl<'r, 't> Iterator for CaptureMatches<'r, 't> {
///
/// `'r` is the lifetime of the compiled regular expression and `'t` is the
/// lifetime of the matched string.
#[derive(Debug)]
pub struct Matches<'r, 't>(re_trait::Matches<'t, ExecNoSyncStr<'r>>);
impl<'r, 't> Iterator for Matches<'r, 't> {
@ -1109,12 +1141,14 @@ impl<'r, 't> Iterator for Matches<'r, 't> {
}
}
impl<'r, 't> FusedIterator for Matches<'r, 't> {}
/// Replacer describes types that can be used to replace matches in a string.
///
/// In general, users of this crate shouldn't need to implement this trait,
/// since implementations are already provided for `&str` and
/// `FnMut(&Captures) -> String` (or any `FnMut(&Captures) -> T`
/// where `T: AsRef<str>`), which covers most use cases.
/// since implementations are already provided for `&str` along with other
/// variants of string types and `FnMut(&Captures) -> String` (or any
/// `FnMut(&Captures) -> T` where `T: AsRef<str>`), which covers most use cases.
pub trait Replacer {
/// Appends text to `dst` to replace the current match.
///
@ -1122,8 +1156,8 @@ pub trait Replacer {
/// have a match at capture group `0`.
///
/// For example, a no-op replacement would be
/// `dst.extend(caps.get(0).unwrap().as_str())`.
fn replace_append(&mut self, caps: &Captures, dst: &mut String);
/// `dst.push_str(caps.get(0).unwrap().as_str())`.
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String);
/// Return a fixed unchanging replacement string.
///
@ -1166,36 +1200,81 @@ pub trait Replacer {
///
/// Returned by [`Replacer::by_ref`](trait.Replacer.html#method.by_ref).
#[derive(Debug)]
pub struct ReplacerRef<'a, R: ?Sized + 'a>(&'a mut R);
pub struct ReplacerRef<'a, R: ?Sized>(&'a mut R);
impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> {
fn replace_append(&mut self, caps: &Captures, dst: &mut String) {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
self.0.replace_append(caps, dst)
}
fn no_expansion(&mut self) -> Option<Cow<str>> {
fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
self.0.no_expansion()
}
}
impl<'a> Replacer for &'a str {
fn replace_append(&mut self, caps: &Captures, dst: &mut String) {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
caps.expand(*self, dst);
}
fn no_expansion(&mut self) -> Option<Cow<str>> {
match find_byte(b'$', self.as_bytes()) {
Some(_) => None,
None => Some(Cow::Borrowed(*self)),
}
fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
no_expansion(self)
}
}
impl<'a> Replacer for &'a String {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
self.as_str().replace_append(caps, dst)
}
fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
no_expansion(self)
}
}
impl Replacer for String {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
self.as_str().replace_append(caps, dst)
}
fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
no_expansion(self)
}
}
impl<'a> Replacer for Cow<'a, str> {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
self.as_ref().replace_append(caps, dst)
}
fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
no_expansion(self)
}
}
impl<'a> Replacer for &'a Cow<'a, str> {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
self.as_ref().replace_append(caps, dst)
}
fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
no_expansion(self)
}
}
fn no_expansion<T: AsRef<str>>(t: &T) -> Option<Cow<'_, str>> {
let s = t.as_ref();
match find_byte(b'$', s.as_bytes()) {
Some(_) => None,
None => Some(Cow::Borrowed(s)),
}
}
impl<F, T> Replacer for F
where
F: FnMut(&Captures) -> T,
F: FnMut(&Captures<'_>) -> T,
T: AsRef<str>,
{
fn replace_append(&mut self, caps: &Captures, dst: &mut String) {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
dst.push_str((*self)(caps).as_ref());
}
}
@ -1208,14 +1287,15 @@ where
/// and performant (since capture groups don't need to be found).
///
/// `'t` is the lifetime of the literal text.
#[derive(Clone, Debug)]
pub struct NoExpand<'t>(pub &'t str);
impl<'t> Replacer for NoExpand<'t> {
fn replace_append(&mut self, _: &Captures, dst: &mut String) {
fn replace_append(&mut self, _: &Captures<'_>, dst: &mut String) {
dst.push_str(self.0);
}
fn no_expansion(&mut self) -> Option<Cow<str>> {
fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
Some(Cow::Borrowed(self.0))
}
}

11
third_party/rust/regex/src/sparse.rs поставляемый
Просмотреть файл

@ -1,3 +1,4 @@
use std::fmt;
use std::ops::Deref;
use std::slice;
@ -7,11 +8,11 @@ use std::slice;
/// entire set can also be done in constant time. Iteration yields elements
/// in the order in which they were inserted.
///
/// The data structure is based on: http://research.swtch.com/sparse
/// The data structure is based on: https://research.swtch.com/sparse
/// Note though that we don't actually use uninitialized memory. We generally
/// reuse allocations, so the initial allocation cost is bareable. However,
/// its other properties listed above are extremely useful.
#[derive(Clone, Debug)]
#[derive(Clone)]
pub struct SparseSet {
/// Dense contains the instruction pointers in the order in which they
/// were inserted.
@ -60,6 +61,12 @@ impl SparseSet {
}
}
impl fmt::Debug for SparseSet {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "SparseSet({:?})", self.dense)
}
}
impl Deref for SparseSet {
type Target = [usize];

2
third_party/rust/regex/test поставляемый
Просмотреть файл

@ -1,5 +1,7 @@
#!/bin/bash
set -e
# This is a convenience script for running a broad swath of tests across
# features. We don't test the complete space, since the complete space is quite
# large. Hopefully once we migrate the test suite to better infrastructure

12
third_party/rust/regex/tests/api.rs поставляемый
Просмотреть файл

@ -195,6 +195,18 @@ expand!(
);
expand!(expand10, r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)", "abc 123", "$bz$az", "");
expand!(expand_name1, r"%(?P<Z>[a-z]+)", "%abc", "$Z%", "abc%");
expand!(expand_name2, r"\[(?P<Z>[a-z]+)", "[abc", "$Z[", "abc[");
expand!(expand_name3, r"\{(?P<Z>[a-z]+)", "{abc", "$Z{", "abc{");
expand!(expand_name4, r"\}(?P<Z>[a-z]+)", "}abc", "$Z}", "abc}");
expand!(expand_name5, r"%([a-z]+)", "%abc", "$1a%", "%");
expand!(expand_name6, r"%([a-z]+)", "%abc", "${1}a%", "abca%");
expand!(expand_name7, r"\[(?P<Z[>[a-z]+)", "[abc", "${Z[}[", "abc[");
expand!(expand_name8, r"\[(?P<Z[>[a-z]+)", "[abc", "${foo}[", "[");
expand!(expand_name9, r"\[(?P<Z[>[a-z]+)", "[abc", "${1a}[", "[");
expand!(expand_name10, r"\[(?P<Z[>[a-z]+)", "[abc", "${#}[", "[");
expand!(expand_name11, r"\[(?P<Z[>[a-z]+)", "[abc", "${$$}[", "[");
split!(
split1,
r"(?-u)\s+",

5
third_party/rust/regex/tests/consistent.rs поставляемый
Просмотреть файл

@ -157,10 +157,7 @@ macro_rules! checker {
}
impl quickcheck::Testable for RegexEqualityTest {
fn result<G: quickcheck::Gen>(
&self,
gen: &mut G,
) -> TestResult {
fn result(&self, gen: &mut quickcheck::Gen) -> TestResult {
let input = $mk_input(gen);
let input = &input;

17
third_party/rust/regex/tests/crazy.rs поставляемый
Просмотреть файл

@ -118,6 +118,18 @@ matiter!(match_empty8, r"()+|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty9, r"z|()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty10, r"()+|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty11, r"b|()+", "abc", (0, 0), (1, 2), (3, 3));
matiter!(match_empty12, r"|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty13, r"b|", "abc", (0, 0), (1, 2), (3, 3));
matiter!(match_empty14, r"|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty15, r"z|", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty16, r"|", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty17, r"||", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty18, r"||z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty19, r"(?:)|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty20, r"b|(?:)", "abc", (0, 0), (1, 2), (3, 3));
matiter!(match_empty21, r"(?:|)", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty22, r"(?:|)|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty23, r"a(?:)|b", "abc", (0, 1), (1, 2));
// Test that the DFA can handle pathological cases.
// (This should result in the DFA's cache being flushed too frequently, which
@ -125,9 +137,10 @@ matiter!(match_empty11, r"b|()+", "abc", (0, 0), (1, 2), (3, 3));
#[test]
fn dfa_handles_pathological_case() {
fn ones_and_zeroes(count: usize) -> String {
use rand::{thread_rng, Rng};
use rand::rngs::SmallRng;
use rand::{Rng, SeedableRng};
let mut rng = thread_rng();
let mut rng = SmallRng::from_entropy();
let mut s = String::new();
for _ in 0..count {
if rng.gen() {

Просмотреть файл

@ -4,7 +4,6 @@ macro_rules! t { ($re:expr) => { text!($re) } }
macro_rules! match_text { ($text:expr) => { $text.as_bytes() } }
macro_rules! use_ { ($($path: tt)*) => { use regex::bytes::$($path)*; } }
macro_rules! empty_vec { () => { <Vec<&[u8]>>::new() } }
macro_rules! bytes { ($text:expr) => { $text } }
macro_rules! no_expand {

1
third_party/rust/regex/tests/macros_str.rs поставляемый
Просмотреть файл

@ -4,6 +4,7 @@ macro_rules! t { ($text:expr) => { text!($text) } }
macro_rules! match_text { ($text:expr) => { $text.as_str() } }
macro_rules! use_ { ($($path: tt)*) => { use regex::$($path)*; } }
macro_rules! empty_vec { () => { <Vec<&str>>::new() } }
macro_rules! bytes { ($text:expr) => { std::str::from_utf8($text.as_ref()).unwrap() } }
macro_rules! no_expand {
($text:expr) => {{

9
third_party/rust/regex/tests/noparse.rs поставляемый
Просмотреть файл

@ -26,6 +26,8 @@ noparse!(fail_bad_capture_name, "(?P<na-me>)");
noparse!(fail_bad_flag, "(?a)a");
noparse!(fail_too_big, "a{10000000}");
noparse!(fail_counted_no_close, "a{1001");
noparse!(fail_counted_decreasing, "a{2,1}");
noparse!(fail_counted_nonnegative, "a{-1,1}");
noparse!(fail_unfinished_cap, "(?");
noparse!(fail_unfinished_escape, "\\");
noparse!(fail_octal_digit, r"\8");
@ -41,10 +43,3 @@ noparse!(fail_range_end_no_class, "[a-[:lower:]]");
noparse!(fail_range_end_no_begin, r"[a-\A]");
noparse!(fail_range_end_no_end, r"[a-\z]");
noparse!(fail_range_end_no_boundary, r"[a-\b]");
noparse!(fail_empty_alt1, r"|z");
noparse!(fail_empty_alt2, r"z|");
noparse!(fail_empty_alt3, r"|");
noparse!(fail_empty_alt4, r"||");
noparse!(fail_empty_alt5, r"()|z");
noparse!(fail_empty_alt6, r"z|()");
noparse!(fail_empty_alt7, r"(|)");

18
third_party/rust/regex/tests/regression.rs поставляемый
Просмотреть файл

@ -199,3 +199,21 @@ fn regression_nfa_stops1() {
let re = ::regex::bytes::Regex::new(r"\bs(?:[ab])").unwrap();
assert_eq!(0, re.find_iter(b"s\xE4").count());
}
// See: https://github.com/rust-lang/regex/issues/640
#[cfg(feature = "unicode-case")]
matiter!(
flags_are_unset,
r"((?i)foo)|Bar",
"foo Foo bar Bar",
(0, 3),
(4, 7),
(12, 15)
);
// See: https://github.com/rust-lang/regex/issues/659
//
// Note that 'Ј' is not 'j', but cyrillic Je
// https://en.wikipedia.org/wiki/Je_(Cyrillic)
ismatch!(empty_group_match, r"()Ј01", "zЈ01", true);
matiter!(empty_group_find, r"()Ј01", "zЈ01", (1, 5));

31
third_party/rust/regex/tests/regression_fuzz.rs поставляемый Normal file
Просмотреть файл

@ -0,0 +1,31 @@
// These tests are only run for the "default" test target because some of them
// can take quite a long time. Some of them take long enough that it's not
// practical to run them in debug mode. :-/
// See: https://oss-fuzz.com/testcase-detail/5673225499181056
//
// Ignored by default since it takes too long in debug mode (almost a minute).
#[test]
#[ignore]
fn fuzz1() {
regex!(r"1}{55}{0}*{1}{55}{55}{5}*{1}{55}+{56}|;**");
}
// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=26505
// See: https://github.com/rust-lang/regex/issues/722
#[test]
fn empty_any_errors_no_panic() {
assert!(regex_new!(r"\P{any}").is_err());
}
// This tests that a very large regex errors during compilation instead of
// using gratuitous amounts of memory. The specific problem is that the
// compiler wasn't accounting for the memory used by Unicode character classes
// correctly.
//
// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=33579
#[test]
fn big_regex_fails_to_compile() {
let pat = "[\u{0}\u{e}\u{2}\\w~~>[l\t\u{0}]p?<]{971158}";
assert!(regex_new!(pat).is_err());
}

102
third_party/rust/regex/tests/replace.rs поставляемый
Просмотреть файл

@ -94,7 +94,7 @@ replace!(
replace,
r"([0-9]+)",
"age: 26",
|captures: &Captures| {
|captures: &Captures<'_>| {
match_text!(captures.get(1).unwrap())[0..1].to_owned()
},
"age: 2"
@ -104,7 +104,7 @@ replace!(
replace,
r"[0-9]+",
"age: 26",
|_captures: &Captures| t!("Z").to_owned(),
|_captures: &Captures<'_>| t!("Z").to_owned(),
"age: Z"
);
@ -130,3 +130,101 @@ replace!(
t!("${1}a $1a"),
"ba "
);
replace!(
impl_string,
replace,
r"[0-9]",
"age: 26",
t!("Z".to_string()),
"age: Z6"
);
replace!(
impl_string_ref,
replace,
r"[0-9]",
"age: 26",
t!(&"Z".to_string()),
"age: Z6"
);
replace!(
impl_cow_str_borrowed,
replace,
r"[0-9]",
"age: 26",
t!(std::borrow::Cow::<'_, str>::Borrowed("Z")),
"age: Z6"
);
replace!(
impl_cow_str_borrowed_ref,
replace,
r"[0-9]",
"age: 26",
t!(&std::borrow::Cow::<'_, str>::Borrowed("Z")),
"age: Z6"
);
replace!(
impl_cow_str_owned,
replace,
r"[0-9]",
"age: 26",
t!(std::borrow::Cow::<'_, str>::Owned("Z".to_string())),
"age: Z6"
);
replace!(
impl_cow_str_owned_ref,
replace,
r"[0-9]",
"age: 26",
t!(&std::borrow::Cow::<'_, str>::Owned("Z".to_string())),
"age: Z6"
);
replace!(
impl_vec_u8,
replace,
r"[0-9]",
"age: 26",
bytes!(vec![b'Z']),
"age: Z6"
);
replace!(
impl_vec_u8_ref,
replace,
r"[0-9]",
"age: 26",
bytes!(&vec![b'Z']),
"age: Z6"
);
replace!(
impl_cow_slice_borrowed,
replace,
r"[0-9]",
"age: 26",
bytes!(std::borrow::Cow::<'_, [u8]>::Borrowed(&[b'Z'])),
"age: Z6"
);
replace!(
impl_cow_slice_borrowed_ref,
replace,
r"[0-9]",
"age: 26",
bytes!(&std::borrow::Cow::<'_, [u8]>::Borrowed(&[b'Z'])),
"age: Z6"
);
replace!(
impl_cow_slice_owned,
replace,
r"[0-9]",
"age: 26",
bytes!(std::borrow::Cow::<'_, [u8]>::Owned(vec![b'Z'])),
"age: Z6"
);
replace!(
impl_cow_slice_owned_ref,
replace,
r"[0-9]",
"age: 26",
bytes!(&std::borrow::Cow::<'_, [u8]>::Owned(vec![b'Z'])),
"age: Z6"
);

22
third_party/rust/regex/tests/set.rs поставляемый
Просмотреть файл

@ -17,6 +17,17 @@ matset!(set16, &["a"], "a", 0);
matset!(set17, &[".*a"], "a", 0);
matset!(set18, &["a", "β"], "β", 1);
// regexes that match the empty string
matset!(setempty1, &["", "a"], "abc", 0, 1);
matset!(setempty2, &["", "b"], "abc", 0, 1);
matset!(setempty3, &["", "z"], "abc", 0);
matset!(setempty4, &["a", ""], "abc", 0, 1);
matset!(setempty5, &["b", ""], "abc", 0, 1);
matset!(setempty6, &["z", ""], "abc", 1);
matset!(setempty7, &["b", "(?:)"], "abc", 0, 1);
matset!(setempty8, &["(?:)", "b"], "abc", 0, 1);
matset!(setempty9, &["c(?:)", "b"], "abc", 0, 1);
nomatset!(nset1, &["a", "a"], "b");
nomatset!(nset2, &["^foo", "bar$"], "bar foo");
nomatset!(
@ -43,3 +54,14 @@ fn get_set_patterns() {
let set = regex_set!(&["a", "b"]);
assert_eq!(vec!["a", "b"], set.patterns());
}
#[test]
fn len_and_empty() {
let empty = regex_set!(&[""; 0]);
assert_eq!(empty.len(), 0);
assert!(empty.is_empty());
let not_empty = regex_set!(&["ab", "b"]);
assert_eq!(not_empty.len(), 2);
assert!(!not_empty.is_empty());
}

Просмотреть файл

@ -1,8 +1,5 @@
#![cfg_attr(feature = "pattern", feature(pattern))]
extern crate rand;
extern crate regex;
macro_rules! regex_new {
($re:expr) => {{
use regex::internal::ExecBuilder;

Просмотреть файл

@ -1,6 +1,3 @@
extern crate rand;
extern crate regex;
macro_rules! regex_new {
($re:expr) => {{
use regex::internal::ExecBuilder;

Просмотреть файл

@ -1,8 +1,5 @@
#![cfg_attr(feature = "pattern", feature(pattern))]
extern crate rand;
extern crate regex;
macro_rules! regex_new {
($re:expr) => {{
use regex::internal::ExecBuilder;

Просмотреть файл

@ -1,6 +1,3 @@
extern crate quickcheck;
extern crate regex;
/*
* This test is a minimal version of <rofl_0> and <subdiff_0>
*

46
third_party/rust/regex/tests/test_default.rs поставляемый
Просмотреть файл

@ -1,7 +1,6 @@
#![cfg_attr(feature = "pattern", feature(pattern))]
extern crate rand;
extern crate regex;
use regex;
// Due to macro scoping rules, this definition only applies for the modules
// defined below. Effectively, it allows us to use the same tests for both
@ -49,6 +48,7 @@ mod misc;
mod multiline;
mod noparse;
mod regression;
mod regression_fuzz;
mod replace;
mod searcher;
mod set;
@ -82,26 +82,49 @@ fn allow_octal() {
#[test]
fn oibits() {
use regex::bytes;
use regex::{Regex, RegexBuilder};
use std::panic::UnwindSafe;
use regex::{Regex, RegexBuilder, RegexSet, RegexSetBuilder};
use std::panic::{RefUnwindSafe, UnwindSafe};
fn assert_send<T: Send>() {}
fn assert_sync<T: Sync>() {}
fn assert_unwind_safe<T: UnwindSafe>() {}
fn assert_ref_unwind_safe<T: RefUnwindSafe>() {}
assert_send::<Regex>();
assert_sync::<Regex>();
assert_unwind_safe::<Regex>();
assert_ref_unwind_safe::<Regex>();
assert_send::<RegexBuilder>();
assert_sync::<RegexBuilder>();
assert_unwind_safe::<RegexBuilder>();
assert_ref_unwind_safe::<RegexBuilder>();
assert_send::<bytes::Regex>();
assert_sync::<bytes::Regex>();
assert_unwind_safe::<bytes::Regex>();
assert_ref_unwind_safe::<bytes::Regex>();
assert_send::<bytes::RegexBuilder>();
assert_sync::<bytes::RegexBuilder>();
assert_unwind_safe::<bytes::RegexBuilder>();
assert_ref_unwind_safe::<bytes::RegexBuilder>();
assert_send::<RegexSet>();
assert_sync::<RegexSet>();
assert_unwind_safe::<RegexSet>();
assert_ref_unwind_safe::<RegexSet>();
assert_send::<RegexSetBuilder>();
assert_sync::<RegexSetBuilder>();
assert_unwind_safe::<RegexSetBuilder>();
assert_ref_unwind_safe::<RegexSetBuilder>();
assert_send::<bytes::RegexSet>();
assert_sync::<bytes::RegexSet>();
assert_unwind_safe::<bytes::RegexSet>();
assert_ref_unwind_safe::<bytes::RegexSet>();
assert_send::<bytes::RegexSetBuilder>();
assert_sync::<bytes::RegexSetBuilder>();
assert_unwind_safe::<bytes::RegexSetBuilder>();
assert_ref_unwind_safe::<bytes::RegexSetBuilder>();
}
// See: https://github.com/rust-lang/regex/issues/568
@ -112,3 +135,18 @@ fn oibits_regression() {
let _ = panic::catch_unwind(|| Regex::new("a").unwrap());
}
// See: https://github.com/rust-lang/regex/issues/750
#[test]
#[cfg(target_pointer_width = "64")]
fn regex_is_reasonably_small() {
use std::mem::size_of;
use regex::bytes;
use regex::{Regex, RegexSet};
assert_eq!(16, size_of::<Regex>());
assert_eq!(16, size_of::<RegexSet>());
assert_eq!(16, size_of::<bytes::Regex>());
assert_eq!(16, size_of::<bytes::RegexSet>());
}

Просмотреть файл

@ -1,6 +1,3 @@
extern crate rand;
extern crate regex;
macro_rules! regex_new {
($re:expr) => {{
use regex::bytes::Regex;

3
third_party/rust/regex/tests/test_nfa.rs поставляемый
Просмотреть файл

@ -1,8 +1,5 @@
#![cfg_attr(feature = "pattern", feature(pattern))]
extern crate rand;
extern crate regex;
macro_rules! regex_new {
($re:expr) => {{
use regex::internal::ExecBuilder;

Просмотреть файл

@ -1,6 +1,3 @@
extern crate rand;
extern crate regex;
macro_rules! regex_new {
($re:expr) => {{
use regex::internal::ExecBuilder;

Просмотреть файл

@ -1,8 +1,5 @@
#![cfg_attr(feature = "pattern", feature(pattern))]
extern crate rand;
extern crate regex;
macro_rules! regex_new {
($re:expr) => {{
use regex::internal::ExecBuilder;

3
third_party/rust/regex/tests/unicode.rs поставляемый
Просмотреть файл

@ -74,6 +74,9 @@ mat!(
Some((0, 3))
);
mat!(uni_class_gencat_format, r"\p{Format}", "\u{E007F}", Some((0, 4)));
// See: https://github.com/rust-lang/regex/issues/719
mat!(uni_class_gencat_format_abbrev1, r"\p{cf}", "\u{E007F}", Some((0, 4)));
mat!(uni_class_gencat_format_abbrev2, r"\p{gc=cf}", "\u{E007F}", Some((0, 4)));
mat!(
uni_class_gencat_initial_punctuation,
r"\p{Initial_Punctuation}",

Просмотреть файл

@ -1 +0,0 @@
{"files":{"Cargo.toml":"a08d3007cec7ad1a83afad57980965ece5457089404f6f5d41eacc8143386d69","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"c9a75f18b9ab2927829a208fc6aa2cf4e63b8420887ba29cdb265d6619ae82d5","README.md":"ab6f09e96c06e37ee3df492562a07c1c3548dd5abf73301f8215a5dcedcccc84","benches/thread_local.rs":"cc8bde81ed6206525feff209598caf1e01e89a83bf21d8b7ccc0dadc8b89d815","src/cached.rs":"089286aa7bcde7c92b1ee7381b74f8c30049c0d80a85c1babdbac69b2e210396","src/lib.rs":"a67d7bf8c7c3bd869ea297cf1d158db8c9c4bbf7ae1e23d9028cfc3a7554e235","src/thread_id.rs":"0962c130061939557aa272115e4420fbbc63b6bd306783a456a8ffcbf304a447","src/unreachable.rs":"830d44988f86f4fc6c3c4dd7e9e4e7d0f2cb9c5b024c360b5f7ceae365983367"},"package":"d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"}

26
third_party/rust/thread_local/Cargo.toml поставляемый
Просмотреть файл

@ -1,26 +0,0 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
name = "thread_local"
version = "1.0.1"
authors = ["Amanieu d'Antras <amanieu@gmail.com>"]
description = "Per-object thread-local storage"
documentation = "https://amanieu.github.io/thread_local-rs/thread_local/index.html"
readme = "README.md"
keywords = ["thread_local", "concurrent", "thread"]
license = "Apache-2.0/MIT"
repository = "https://github.com/Amanieu/thread_local-rs"
[dependencies.lazy_static]
version = "1.0"
[badges.travis-ci]
repository = "Amanieu/thread_local-rs"

201
third_party/rust/thread_local/LICENSE-APACHE поставляемый
Просмотреть файл

@ -1,201 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

25
third_party/rust/thread_local/LICENSE-MIT поставляемый
Просмотреть файл

@ -1,25 +0,0 @@
Copyright (c) 2016 The Rust Project Developers
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

41
third_party/rust/thread_local/README.md поставляемый
Просмотреть файл

@ -1,41 +0,0 @@
thread_local
============
[![Build Status](https://travis-ci.org/Amanieu/thread_local-rs.svg?branch=master)](https://travis-ci.org/Amanieu/thread_local-rs) [![Crates.io](https://img.shields.io/crates/v/thread_local.svg)](https://crates.io/crates/thread_local)
This library provides the `ThreadLocal` and `CachedThreadLocal` types which
allow a separate copy of an object to be used for each thread. This allows for
per-object thread-local storage, unlike the standard library's `thread_local!`
macro which only allows static thread-local storage.
[Documentation](https://amanieu.github.io/thread_local-rs/thread_local/index.html)
## Usage
Add this to your `Cargo.toml`:
```toml
[dependencies]
thread_local = "1.0"
```
and this to your crate root:
```rust
extern crate thread_local;
```
## License
Licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
* MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any
additional terms or conditions.

Просмотреть файл

@ -1,18 +0,0 @@
#![feature(test)]
extern crate thread_local;
extern crate test;
use thread_local::{ThreadLocal, CachedThreadLocal};
#[bench]
fn thread_local(b: &mut test::Bencher) {
let local = ThreadLocal::new();
b.iter(|| { let _: &i32 = local.get_or(|| Box::new(0)); });
}
#[bench]
fn cached_thread_local(b: &mut test::Bencher) {
let local = CachedThreadLocal::new();
b.iter(|| { let _: &i32 = local.get_or(|| Box::new(0)); });
}

198
third_party/rust/thread_local/src/cached.rs поставляемый
Просмотреть файл

@ -1,198 +0,0 @@
use super::{IntoIter, IterMut, ThreadLocal};
use std::cell::UnsafeCell;
use std::fmt;
use std::panic::UnwindSafe;
use std::sync::atomic::{AtomicUsize, Ordering};
use thread_id;
use unreachable::{UncheckedOptionExt, UncheckedResultExt};
/// Wrapper around `ThreadLocal` which adds a fast path for a single thread.
///
/// This has the same API as `ThreadLocal`, but will register the first thread
/// that sets a value as its owner. All accesses by the owner will go through
/// a special fast path which is much faster than the normal `ThreadLocal` path.
pub struct CachedThreadLocal<T: Send> {
owner: AtomicUsize,
local: UnsafeCell<Option<Box<T>>>,
global: ThreadLocal<T>,
}
// CachedThreadLocal is always Sync, even if T isn't
unsafe impl<T: Send> Sync for CachedThreadLocal<T> {}
impl<T: Send> Default for CachedThreadLocal<T> {
fn default() -> CachedThreadLocal<T> {
CachedThreadLocal::new()
}
}
impl<T: Send> CachedThreadLocal<T> {
/// Creates a new empty `CachedThreadLocal`.
pub fn new() -> CachedThreadLocal<T> {
CachedThreadLocal {
owner: AtomicUsize::new(0),
local: UnsafeCell::new(None),
global: ThreadLocal::new(),
}
}
/// Returns the element for the current thread, if it exists.
pub fn get(&self) -> Option<&T> {
let id = thread_id::get();
let owner = self.owner.load(Ordering::Relaxed);
if owner == id {
return unsafe { Some((*self.local.get()).as_ref().unchecked_unwrap()) };
}
if owner == 0 {
return None;
}
self.global.get_fast(id)
}
/// Returns the element for the current thread, or creates it if it doesn't
/// exist.
#[inline(always)]
pub fn get_or<F>(&self, create: F) -> &T
where
F: FnOnce() -> T,
{
unsafe {
self.get_or_try(|| Ok::<T, ()>(create()))
.unchecked_unwrap_ok()
}
}
/// Returns the element for the current thread, or creates it if it doesn't
/// exist. If `create` fails, that error is returned and no element is
/// added.
pub fn get_or_try<F, E>(&self, create: F) -> Result<&T, E>
where
F: FnOnce() -> Result<T, E>,
{
let id = thread_id::get();
let owner = self.owner.load(Ordering::Relaxed);
if owner == id {
return Ok(unsafe { (*self.local.get()).as_ref().unchecked_unwrap() });
}
self.get_or_try_slow(id, owner, create)
}
#[cold]
#[inline(never)]
fn get_or_try_slow<F, E>(&self, id: usize, owner: usize, create: F) -> Result<&T, E>
where
F: FnOnce() -> Result<T, E>,
{
if owner == 0 && self.owner.compare_and_swap(0, id, Ordering::Relaxed) == 0 {
unsafe {
(*self.local.get()) = Some(Box::new(create()?));
return Ok((*self.local.get()).as_ref().unchecked_unwrap());
}
}
match self.global.get_fast(id) {
Some(x) => Ok(x),
None => Ok(self.global.insert(id, Box::new(create()?), true)),
}
}
/// Returns a mutable iterator over the local values of all threads.
///
/// Since this call borrows the `ThreadLocal` mutably, this operation can
/// be done safely---the mutable borrow statically guarantees no other
/// threads are currently accessing their associated values.
pub fn iter_mut(&mut self) -> CachedIterMut<T> {
CachedIterMut {
local: unsafe { (*self.local.get()).as_mut().map(|x| &mut **x) },
global: self.global.iter_mut(),
}
}
/// Removes all thread-specific values from the `ThreadLocal`, effectively
/// reseting it to its original state.
///
/// Since this call borrows the `ThreadLocal` mutably, this operation can
/// be done safely---the mutable borrow statically guarantees no other
/// threads are currently accessing their associated values.
pub fn clear(&mut self) {
*self = CachedThreadLocal::new();
}
}
impl<T: Send> IntoIterator for CachedThreadLocal<T> {
type Item = T;
type IntoIter = CachedIntoIter<T>;
fn into_iter(self) -> CachedIntoIter<T> {
CachedIntoIter {
local: unsafe { (*self.local.get()).take().map(|x| *x) },
global: self.global.into_iter(),
}
}
}
impl<'a, T: Send + 'a> IntoIterator for &'a mut CachedThreadLocal<T> {
type Item = &'a mut T;
type IntoIter = CachedIterMut<'a, T>;
fn into_iter(self) -> CachedIterMut<'a, T> {
self.iter_mut()
}
}
impl<T: Send + Default> CachedThreadLocal<T> {
/// Returns the element for the current thread, or creates a default one if
/// it doesn't exist.
pub fn get_or_default(&self) -> &T {
self.get_or(T::default)
}
}
impl<T: Send + fmt::Debug> fmt::Debug for CachedThreadLocal<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "ThreadLocal {{ local_data: {:?} }}", self.get())
}
}
impl<T: Send + UnwindSafe> UnwindSafe for CachedThreadLocal<T> {}
/// Mutable iterator over the contents of a `CachedThreadLocal`.
pub struct CachedIterMut<'a, T: Send + 'a> {
local: Option<&'a mut T>,
global: IterMut<'a, T>,
}
impl<'a, T: Send + 'a> Iterator for CachedIterMut<'a, T> {
type Item = &'a mut T;
fn next(&mut self) -> Option<&'a mut T> {
self.local.take().or_else(|| self.global.next())
}
fn size_hint(&self) -> (usize, Option<usize>) {
let len = self.global.size_hint().0 + self.local.is_some() as usize;
(len, Some(len))
}
}
impl<'a, T: Send + 'a> ExactSizeIterator for CachedIterMut<'a, T> {}
/// An iterator that moves out of a `CachedThreadLocal`.
pub struct CachedIntoIter<T: Send> {
local: Option<T>,
global: IntoIter<T>,
}
impl<T: Send> Iterator for CachedIntoIter<T> {
type Item = T;
fn next(&mut self) -> Option<T> {
self.local.take().or_else(|| self.global.next())
}
fn size_hint(&self) -> (usize, Option<usize>) {
let len = self.global.size_hint().0 + self.local.is_some() as usize;
(len, Some(len))
}
}
impl<T: Send> ExactSizeIterator for CachedIntoIter<T> {}

607
third_party/rust/thread_local/src/lib.rs поставляемый
Просмотреть файл

@ -1,607 +0,0 @@
// Copyright 2017 Amanieu d'Antras
//
// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
// http://opensource.org/licenses/MIT>, at your option. This file may not be
// copied, modified, or distributed except according to those terms.
//! Per-object thread-local storage
//!
//! This library provides the `ThreadLocal` type which allows a separate copy of
//! an object to be used for each thread. This allows for per-object
//! thread-local storage, unlike the standard library's `thread_local!` macro
//! which only allows static thread-local storage.
//!
//! Per-thread objects are not destroyed when a thread exits. Instead, objects
//! are only destroyed when the `ThreadLocal` containing them is destroyed.
//!
//! You can also iterate over the thread-local values of all thread in a
//! `ThreadLocal` object using the `iter_mut` and `into_iter` methods. This can
//! only be done if you have mutable access to the `ThreadLocal` object, which
//! guarantees that you are the only thread currently accessing it.
//!
//! A `CachedThreadLocal` type is also provided which wraps a `ThreadLocal` but
//! also uses a special fast path for the first thread that writes into it. The
//! fast path has very low overhead (<1ns per access) while keeping the same
//! performance as `ThreadLocal` for other threads.
//!
//! Note that since thread IDs are recycled when a thread exits, it is possible
//! for one thread to retrieve the object of another thread. Since this can only
//! occur after a thread has exited this does not lead to any race conditions.
//!
//! # Examples
//!
//! Basic usage of `ThreadLocal`:
//!
//! ```rust
//! use thread_local::ThreadLocal;
//! let tls: ThreadLocal<u32> = ThreadLocal::new();
//! assert_eq!(tls.get(), None);
//! assert_eq!(tls.get_or(|| 5), &5);
//! assert_eq!(tls.get(), Some(&5));
//! ```
//!
//! Combining thread-local values into a single result:
//!
//! ```rust
//! use thread_local::ThreadLocal;
//! use std::sync::Arc;
//! use std::cell::Cell;
//! use std::thread;
//!
//! let tls = Arc::new(ThreadLocal::new());
//!
//! // Create a bunch of threads to do stuff
//! for _ in 0..5 {
//! let tls2 = tls.clone();
//! thread::spawn(move || {
//! // Increment a counter to count some event...
//! let cell = tls2.get_or(|| Cell::new(0));
//! cell.set(cell.get() + 1);
//! }).join().unwrap();
//! }
//!
//! // Once all threads are done, collect the counter values and return the
//! // sum of all thread-local counter values.
//! let tls = Arc::try_unwrap(tls).unwrap();
//! let total = tls.into_iter().fold(0, |x, y| x + y.get());
//! assert_eq!(total, 5);
//! ```
#![warn(missing_docs)]
#[macro_use]
extern crate lazy_static;
mod thread_id;
mod unreachable;
mod cached;
pub use cached::{CachedIntoIter, CachedIterMut, CachedThreadLocal};
use std::cell::UnsafeCell;
use std::fmt;
use std::marker::PhantomData;
use std::panic::UnwindSafe;
use std::sync::atomic::{AtomicPtr, AtomicUsize, Ordering};
use std::sync::Mutex;
use unreachable::{UncheckedOptionExt, UncheckedResultExt};
/// Thread-local variable wrapper
///
/// See the [module-level documentation](index.html) for more.
pub struct ThreadLocal<T: Send> {
// Pointer to the current top-level hash table
table: AtomicPtr<Table<T>>,
// Lock used to guard against concurrent modifications. This is only taken
// while writing to the table, not when reading from it. This also guards
// the counter for the total number of values in the hash table.
lock: Mutex<usize>,
}
struct Table<T: Send> {
// Hash entries for the table
entries: Box<[TableEntry<T>]>,
// Number of bits used for the hash function
hash_bits: usize,
// Previous table, half the size of the current one
prev: Option<Box<Table<T>>>,
}
struct TableEntry<T: Send> {
// Current owner of this entry, or 0 if this is an empty entry
owner: AtomicUsize,
// The object associated with this entry. This is only ever accessed by the
// owner of the entry.
data: UnsafeCell<Option<Box<T>>>,
}
// ThreadLocal is always Sync, even if T isn't
unsafe impl<T: Send> Sync for ThreadLocal<T> {}
impl<T: Send> Default for ThreadLocal<T> {
fn default() -> ThreadLocal<T> {
ThreadLocal::new()
}
}
impl<T: Send> Drop for ThreadLocal<T> {
fn drop(&mut self) {
unsafe {
Box::from_raw(self.table.load(Ordering::Relaxed));
}
}
}
// Implementation of Clone for TableEntry, needed to make vec![] work
impl<T: Send> Clone for TableEntry<T> {
fn clone(&self) -> TableEntry<T> {
TableEntry {
owner: AtomicUsize::new(0),
data: UnsafeCell::new(None),
}
}
}
// Hash function for the thread id
#[cfg(target_pointer_width = "32")]
#[inline]
fn hash(id: usize, bits: usize) -> usize {
id.wrapping_mul(0x9E3779B9) >> (32 - bits)
}
#[cfg(target_pointer_width = "64")]
#[inline]
fn hash(id: usize, bits: usize) -> usize {
id.wrapping_mul(0x9E37_79B9_7F4A_7C15) >> (64 - bits)
}
impl<T: Send> ThreadLocal<T> {
/// Creates a new empty `ThreadLocal`.
pub fn new() -> ThreadLocal<T> {
let entry = TableEntry {
owner: AtomicUsize::new(0),
data: UnsafeCell::new(None),
};
let table = Table {
entries: vec![entry; 2].into_boxed_slice(),
hash_bits: 1,
prev: None,
};
ThreadLocal {
table: AtomicPtr::new(Box::into_raw(Box::new(table))),
lock: Mutex::new(0),
}
}
/// Returns the element for the current thread, if it exists.
pub fn get(&self) -> Option<&T> {
let id = thread_id::get();
self.get_fast(id)
}
/// Returns the element for the current thread, or creates it if it doesn't
/// exist.
pub fn get_or<F>(&self, create: F) -> &T
where
F: FnOnce() -> T,
{
unsafe {
self.get_or_try(|| Ok::<T, ()>(create()))
.unchecked_unwrap_ok()
}
}
/// Returns the element for the current thread, or creates it if it doesn't
/// exist. If `create` fails, that error is returned and no element is
/// added.
pub fn get_or_try<F, E>(&self, create: F) -> Result<&T, E>
where
F: FnOnce() -> Result<T, E>,
{
let id = thread_id::get();
match self.get_fast(id) {
Some(x) => Ok(x),
None => Ok(self.insert(id, Box::new(create()?), true)),
}
}
// Simple hash table lookup function
fn lookup(id: usize, table: &Table<T>) -> Option<&UnsafeCell<Option<Box<T>>>> {
// Because we use a Mutex to prevent concurrent modifications (but not
// reads) of the hash table, we can avoid any memory barriers here. No
// elements between our hash bucket and our value can have been modified
// since we inserted our thread-local value into the table.
for entry in table.entries.iter().cycle().skip(hash(id, table.hash_bits)) {
let owner = entry.owner.load(Ordering::Relaxed);
if owner == id {
return Some(&entry.data);
}
if owner == 0 {
return None;
}
}
unreachable!();
}
// Fast path: try to find our thread in the top-level hash table
fn get_fast(&self, id: usize) -> Option<&T> {
let table = unsafe { &*self.table.load(Ordering::Acquire) };
match Self::lookup(id, table) {
Some(x) => unsafe { Some((*x.get()).as_ref().unchecked_unwrap()) },
None => self.get_slow(id, table),
}
}
// Slow path: try to find our thread in the other hash tables, and then
// move it to the top-level hash table.
#[cold]
fn get_slow(&self, id: usize, table_top: &Table<T>) -> Option<&T> {
let mut current = &table_top.prev;
while let Some(ref table) = *current {
if let Some(x) = Self::lookup(id, table) {
let data = unsafe { (*x.get()).take().unchecked_unwrap() };
return Some(self.insert(id, data, false));
}
current = &table.prev;
}
None
}
#[cold]
fn insert(&self, id: usize, data: Box<T>, new: bool) -> &T {
// Lock the Mutex to ensure only a single thread is modify the hash
// table at once.
let mut count = self.lock.lock().unwrap();
if new {
*count += 1;
}
let table_raw = self.table.load(Ordering::Relaxed);
let table = unsafe { &*table_raw };
// If the current top-level hash table is more than 75% full, add a new
// level with 2x the capacity. Elements will be moved up to the new top
// level table as they are accessed.
let table = if *count > table.entries.len() * 3 / 4 {
let entry = TableEntry {
owner: AtomicUsize::new(0),
data: UnsafeCell::new(None),
};
let new_table = Box::into_raw(Box::new(Table {
entries: vec![entry; table.entries.len() * 2].into_boxed_slice(),
hash_bits: table.hash_bits + 1,
prev: unsafe { Some(Box::from_raw(table_raw)) },
}));
self.table.store(new_table, Ordering::Release);
unsafe { &*new_table }
} else {
table
};
// Insert the new element into the top-level hash table
for entry in table.entries.iter().cycle().skip(hash(id, table.hash_bits)) {
let owner = entry.owner.load(Ordering::Relaxed);
if owner == 0 {
unsafe {
entry.owner.store(id, Ordering::Relaxed);
*entry.data.get() = Some(data);
return (*entry.data.get()).as_ref().unchecked_unwrap();
}
}
if owner == id {
// This can happen if create() inserted a value into this
// ThreadLocal between our calls to get_fast() and insert(). We
// just return the existing value and drop the newly-allocated
// Box.
unsafe {
return (*entry.data.get()).as_ref().unchecked_unwrap();
}
}
}
unreachable!();
}
fn raw_iter(&mut self) -> RawIter<T> {
RawIter {
remaining: *self.lock.get_mut().unwrap(),
index: 0,
table: self.table.load(Ordering::Relaxed),
}
}
/// Returns a mutable iterator over the local values of all threads.
///
/// Since this call borrows the `ThreadLocal` mutably, this operation can
/// be done safely---the mutable borrow statically guarantees no other
/// threads are currently accessing their associated values.
pub fn iter_mut(&mut self) -> IterMut<T> {
IterMut {
raw: self.raw_iter(),
marker: PhantomData,
}
}
/// Removes all thread-specific values from the `ThreadLocal`, effectively
/// reseting it to its original state.
///
/// Since this call borrows the `ThreadLocal` mutably, this operation can
/// be done safely---the mutable borrow statically guarantees no other
/// threads are currently accessing their associated values.
pub fn clear(&mut self) {
*self = ThreadLocal::new();
}
}
impl<T: Send> IntoIterator for ThreadLocal<T> {
type Item = T;
type IntoIter = IntoIter<T>;
fn into_iter(mut self) -> IntoIter<T> {
IntoIter {
raw: self.raw_iter(),
_thread_local: self,
}
}
}
impl<'a, T: Send + 'a> IntoIterator for &'a mut ThreadLocal<T> {
type Item = &'a mut T;
type IntoIter = IterMut<'a, T>;
fn into_iter(self) -> IterMut<'a, T> {
self.iter_mut()
}
}
impl<T: Send + Default> ThreadLocal<T> {
/// Returns the element for the current thread, or creates a default one if
/// it doesn't exist.
pub fn get_or_default(&self) -> &T {
self.get_or(Default::default)
}
}
impl<T: Send + fmt::Debug> fmt::Debug for ThreadLocal<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "ThreadLocal {{ local_data: {:?} }}", self.get())
}
}
impl<T: Send + UnwindSafe> UnwindSafe for ThreadLocal<T> {}
struct RawIter<T: Send> {
remaining: usize,
index: usize,
table: *const Table<T>,
}
impl<T: Send> Iterator for RawIter<T> {
type Item = *mut Option<Box<T>>;
fn next(&mut self) -> Option<*mut Option<Box<T>>> {
if self.remaining == 0 {
return None;
}
loop {
let entries = unsafe { &(*self.table).entries[..] };
while self.index < entries.len() {
let val = entries[self.index].data.get();
self.index += 1;
if unsafe { (*val).is_some() } {
self.remaining -= 1;
return Some(val);
}
}
self.index = 0;
self.table = unsafe { &**(*self.table).prev.as_ref().unchecked_unwrap() };
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
(self.remaining, Some(self.remaining))
}
}
/// Mutable iterator over the contents of a `ThreadLocal`.
pub struct IterMut<'a, T: Send + 'a> {
raw: RawIter<T>,
marker: PhantomData<&'a mut ThreadLocal<T>>,
}
impl<'a, T: Send + 'a> Iterator for IterMut<'a, T> {
type Item = &'a mut T;
fn next(&mut self) -> Option<&'a mut T> {
self.raw
.next()
.map(|x| unsafe { &mut **(*x).as_mut().unchecked_unwrap() })
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.raw.size_hint()
}
}
impl<'a, T: Send + 'a> ExactSizeIterator for IterMut<'a, T> {}
/// An iterator that moves out of a `ThreadLocal`.
pub struct IntoIter<T: Send> {
raw: RawIter<T>,
_thread_local: ThreadLocal<T>,
}
impl<T: Send> Iterator for IntoIter<T> {
type Item = T;
fn next(&mut self) -> Option<T> {
self.raw
.next()
.map(|x| unsafe { *(*x).take().unchecked_unwrap() })
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.raw.size_hint()
}
}
impl<T: Send> ExactSizeIterator for IntoIter<T> {}
#[cfg(test)]
mod tests {
use super::{CachedThreadLocal, ThreadLocal};
use std::cell::RefCell;
use std::sync::atomic::AtomicUsize;
use std::sync::atomic::Ordering::Relaxed;
use std::sync::Arc;
use std::thread;
fn make_create() -> Arc<dyn Fn() -> usize + Send + Sync> {
let count = AtomicUsize::new(0);
Arc::new(move || count.fetch_add(1, Relaxed))
}
#[test]
fn same_thread() {
let create = make_create();
let mut tls = ThreadLocal::new();
assert_eq!(None, tls.get());
assert_eq!("ThreadLocal { local_data: None }", format!("{:?}", &tls));
assert_eq!(0, *tls.get_or(|| create()));
assert_eq!(Some(&0), tls.get());
assert_eq!(0, *tls.get_or(|| create()));
assert_eq!(Some(&0), tls.get());
assert_eq!(0, *tls.get_or(|| create()));
assert_eq!(Some(&0), tls.get());
assert_eq!("ThreadLocal { local_data: Some(0) }", format!("{:?}", &tls));
tls.clear();
assert_eq!(None, tls.get());
}
#[test]
fn same_thread_cached() {
let create = make_create();
let mut tls = CachedThreadLocal::new();
assert_eq!(None, tls.get());
assert_eq!("ThreadLocal { local_data: None }", format!("{:?}", &tls));
assert_eq!(0, *tls.get_or(|| create()));
assert_eq!(Some(&0), tls.get());
assert_eq!(0, *tls.get_or(|| create()));
assert_eq!(Some(&0), tls.get());
assert_eq!(0, *tls.get_or(|| create()));
assert_eq!(Some(&0), tls.get());
assert_eq!("ThreadLocal { local_data: Some(0) }", format!("{:?}", &tls));
tls.clear();
assert_eq!(None, tls.get());
}
#[test]
fn different_thread() {
let create = make_create();
let tls = Arc::new(ThreadLocal::new());
assert_eq!(None, tls.get());
assert_eq!(0, *tls.get_or(|| create()));
assert_eq!(Some(&0), tls.get());
let tls2 = tls.clone();
let create2 = create.clone();
thread::spawn(move || {
assert_eq!(None, tls2.get());
assert_eq!(1, *tls2.get_or(|| create2()));
assert_eq!(Some(&1), tls2.get());
})
.join()
.unwrap();
assert_eq!(Some(&0), tls.get());
assert_eq!(0, *tls.get_or(|| create()));
}
#[test]
fn different_thread_cached() {
let create = make_create();
let tls = Arc::new(CachedThreadLocal::new());
assert_eq!(None, tls.get());
assert_eq!(0, *tls.get_or(|| create()));
assert_eq!(Some(&0), tls.get());
let tls2 = tls.clone();
let create2 = create.clone();
thread::spawn(move || {
assert_eq!(None, tls2.get());
assert_eq!(1, *tls2.get_or(|| create2()));
assert_eq!(Some(&1), tls2.get());
})
.join()
.unwrap();
assert_eq!(Some(&0), tls.get());
assert_eq!(0, *tls.get_or(|| create()));
}
#[test]
fn iter() {
let tls = Arc::new(ThreadLocal::new());
tls.get_or(|| Box::new(1));
let tls2 = tls.clone();
thread::spawn(move || {
tls2.get_or(|| Box::new(2));
let tls3 = tls2.clone();
thread::spawn(move || {
tls3.get_or(|| Box::new(3));
})
.join()
.unwrap();
})
.join()
.unwrap();
let mut tls = Arc::try_unwrap(tls).unwrap();
let mut v = tls.iter_mut().map(|x| **x).collect::<Vec<i32>>();
v.sort();
assert_eq!(vec![1, 2, 3], v);
let mut v = tls.into_iter().map(|x| *x).collect::<Vec<i32>>();
v.sort();
assert_eq!(vec![1, 2, 3], v);
}
#[test]
fn iter_cached() {
let tls = Arc::new(CachedThreadLocal::new());
tls.get_or(|| Box::new(1));
let tls2 = tls.clone();
thread::spawn(move || {
tls2.get_or(|| Box::new(2));
let tls3 = tls2.clone();
thread::spawn(move || {
tls3.get_or(|| Box::new(3));
})
.join()
.unwrap();
})
.join()
.unwrap();
let mut tls = Arc::try_unwrap(tls).unwrap();
let mut v = tls.iter_mut().map(|x| **x).collect::<Vec<i32>>();
v.sort();
assert_eq!(vec![1, 2, 3], v);
let mut v = tls.into_iter().map(|x| *x).collect::<Vec<i32>>();
v.sort();
assert_eq!(vec![1, 2, 3], v);
}
#[test]
fn is_sync() {
fn foo<T: Sync>() {}
foo::<ThreadLocal<String>>();
foo::<ThreadLocal<RefCell<String>>>();
foo::<CachedThreadLocal<String>>();
foo::<CachedThreadLocal<RefCell<String>>>();
}
}

Просмотреть файл

@ -1,61 +0,0 @@
// Copyright 2017 Amanieu d'Antras
//
// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
// http://opensource.org/licenses/MIT>, at your option. This file may not be
// copied, modified, or distributed except according to those terms.
use std::collections::BinaryHeap;
use std::sync::Mutex;
use std::usize;
// Thread ID manager which allocates thread IDs. It attempts to aggressively
// reuse thread IDs where possible to avoid cases where a ThreadLocal grows
// indefinitely when it is used by many short-lived threads.
struct ThreadIdManager {
limit: usize,
free_list: BinaryHeap<usize>,
}
impl ThreadIdManager {
fn new() -> ThreadIdManager {
ThreadIdManager {
limit: usize::MAX,
free_list: BinaryHeap::new(),
}
}
fn alloc(&mut self) -> usize {
if let Some(id) = self.free_list.pop() {
id
} else {
let id = self.limit;
self.limit = self.limit.checked_sub(1).expect("Ran out of thread IDs");
id
}
}
fn free(&mut self, id: usize) {
self.free_list.push(id);
}
}
lazy_static! {
static ref THREAD_ID_MANAGER: Mutex<ThreadIdManager> = Mutex::new(ThreadIdManager::new());
}
// Non-zero integer which is unique to the current thread while it is running.
// A thread ID may be reused after a thread exits.
struct ThreadId(usize);
impl ThreadId {
fn new() -> ThreadId {
ThreadId(THREAD_ID_MANAGER.lock().unwrap().alloc())
}
}
impl Drop for ThreadId {
fn drop(&mut self) {
THREAD_ID_MANAGER.lock().unwrap().free(self.0);
}
}
thread_local!(static THREAD_ID: ThreadId = ThreadId::new());
/// Returns a non-zero ID for the current thread
pub fn get() -> usize {
THREAD_ID.with(|x| x.0)
}

Просмотреть файл

@ -1,74 +0,0 @@
// Copyright 2017 Amanieu d'Antras
//
// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
// http://opensource.org/licenses/MIT>, at your option. This file may not be
// copied, modified, or distributed except according to those terms.
//! # unreachable
//! inlined from https://github.com/reem/rust-unreachable/
//!
//! An unreachable code optimization hint in stable rust, and some useful
//! extension traits for `Option` and `Result`.
//!
/// Hint to the optimizer that any code path which calls this function is
/// statically unreachable and can be removed.
///
/// Calling this function in reachable code invokes undefined behavior. Be
/// very, very sure this is what you want; often, a simple `panic!` is more
/// suitable.
#[inline]
pub unsafe fn unreachable() -> ! {
/// The empty type for cases which can't occur.
enum Void { }
let x: &Void = ::std::mem::transmute(1usize);
match *x {}
}
/// An extension trait for `Option<T>` providing unchecked unwrapping methods.
pub trait UncheckedOptionExt<T> {
/// Get the value out of this Option without checking for None.
unsafe fn unchecked_unwrap(self) -> T;
/// Assert that this Option is a None to the optimizer.
unsafe fn unchecked_unwrap_none(self);
}
/// An extension trait for `Result<T, E>` providing unchecked unwrapping methods.
pub trait UncheckedResultExt<T, E> {
/// Get the value out of this Result without checking for Err.
unsafe fn unchecked_unwrap_ok(self) -> T;
/// Get the error out of this Result without checking for Ok.
unsafe fn unchecked_unwrap_err(self) -> E;
}
impl<T> UncheckedOptionExt<T> for Option<T> {
unsafe fn unchecked_unwrap(self) -> T {
match self {
Some(x) => x,
None => unreachable()
}
}
unsafe fn unchecked_unwrap_none(self) {
if self.is_some() { unreachable() }
}
}
impl<T, E> UncheckedResultExt<T, E> for Result<T, E> {
unsafe fn unchecked_unwrap_ok(self) -> T {
match self {
Ok(x) => x,
Err(_) => unreachable()
}
}
unsafe fn unchecked_unwrap_err(self) -> E {
match self {
Ok(_) => unreachable(),
Err(e) => e
}
}
}