Backed out 15 changesets (bug 1794001, bug 1693271, bug 1793995) for causing build bustages on RustRegex.h CLOSED TREE

Backed out changeset 93fa076646e1 (bug 1793995)
Backed out changeset 3deec78af656 (bug 1793995)
Backed out changeset 61d74f2cf5cf (bug 1793995)
Backed out changeset 8484584cc787 (bug 1793995)
Backed out changeset f7d78fffc836 (bug 1793995)
Backed out changeset 37ccdfa60f9a (bug 1793995)
Backed out changeset 06d7d4823419 (bug 1793995)
Backed out changeset 5cb4daddadb9 (bug 1793995)
Backed out changeset 8b71c15f24c7 (bug 1693271)
Backed out changeset 77c06db920fe (bug 1693271)
Backed out changeset e286fb05d64f (bug 1794001)
Backed out changeset ea8ba9ce54c8 (bug 1794001)
Backed out changeset 27715593ce6f (bug 1794001)
Backed out changeset d1030c02b560 (bug 1794001)
Backed out changeset 80b01e63ca54 (bug 1794001)
This commit is contained in:
Cristian Tuns 2022-10-12 12:57:05 -04:00
Родитель daceb61587
Коммит 9b9ee3e060
48 изменённых файлов: 718 добавлений и 17201 удалений

20
Cargo.lock сгенерированный
Просмотреть файл

@ -2164,8 +2164,8 @@ dependencies = [
"processtools",
"profiler_helper",
"qcms",
"regex-ffi",
"rsdparsa_capi",
"rure",
"rusqlite",
"rust_minidump_writer_linux",
"static_prefs",
@ -4399,6 +4399,14 @@ dependencies = [
"regex-syntax",
]
[[package]]
name = "regex-ffi"
version = "0.1.0"
dependencies = [
"nsstring",
"regex",
]
[[package]]
name = "regex-syntax"
version = "0.6.27"
@ -4493,16 +4501,6 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d79b4b604167921892e84afbbaad9d5ad74e091bf6c511d9dbfb0593f09fabd"
[[package]]
name = "rure"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3de09595e75baee10da378a1fadfb50d04334a031d69dfb74d0cee3a94aa24c"
dependencies = [
"libc",
"regex",
]
[[package]]
name = "rusqlite"
version = "0.27.0"

Просмотреть файл

@ -513,20 +513,17 @@ nsresult ContentPrincipal::GetSiteIdentifier(SiteIdentifier& aSite) {
}
WebExtensionPolicy* ContentPrincipal::AddonPolicy() {
AssertIsOnMainThread();
if (!mAddon.isSome()) {
NS_ENSURE_TRUE(mURI, nullptr);
WebExtensionPolicy* policy =
mURI->SchemeIs("moz-extension") ? EPS().GetByURL(mURI.get()) : nullptr;
mAddon.emplace(policy ? policy->Core() : nullptr);
if (mURI->SchemeIs("moz-extension")) {
mAddon.emplace(EPS().GetByURL(mURI.get()));
} else {
mAddon.emplace(nullptr);
}
}
if (extensions::WebExtensionPolicyCore* policy = mAddon.ref()) {
return policy->GetMainThreadPolicy();
}
return nullptr;
return mAddon.value();
}
NS_IMETHODIMP

Просмотреть файл

@ -76,7 +76,7 @@ class ContentPrincipal final : public BasePrincipal {
private:
const nsCOMPtr<nsIURI> mURI;
nsCOMPtr<nsIURI> mDomain;
Maybe<RefPtr<extensions::WebExtensionPolicyCore>> mAddon;
Maybe<WeakPtr<extensions::WebExtensionPolicy>> mAddon;
};
} // namespace mozilla

Просмотреть файл

@ -11,16 +11,16 @@
[ChromeOnly, Exposed=Window]
interface MatchGlob {
[Throws]
constructor(UTF8String glob, optional boolean allowQuestion = true);
constructor(DOMString glob, optional boolean allowQuestion = true);
/**
* Returns true if the string matches the glob.
*/
boolean matches(UTF8String string);
boolean matches(DOMString string);
/**
* The glob string this MatchGlob represents.
*/
[Constant]
readonly attribute UTF8String glob;
readonly attribute DOMString glob;
};

Просмотреть файл

@ -7,7 +7,7 @@ interface URI;
interface WindowProxy;
typedef (MatchPatternSet or sequence<DOMString>) MatchPatternSetOrStringSequence;
typedef (MatchGlob or UTF8String) MatchGlobOrString;
typedef (MatchGlob or DOMString) MatchGlobOrString;
[ChromeOnly, Exposed=Window]
interface MozDocumentMatcher {
@ -74,6 +74,21 @@ interface MozDocumentMatcher {
[Constant]
readonly attribute MatchPatternSet? excludeMatches;
/**
* A set of glob matchers for URLs in which this script should run. If this
* list is present, the script will only run in URLs which match the
* `matches` pattern as well as one of these globs.
*/
[Cached, Constant, Frozen]
readonly attribute sequence<MatchGlob>? includeGlobs;
/**
* A set of glob matchers for URLs in which this script should not run, even
* if they match other include patterns or globs.
*/
[Cached, Constant, Frozen]
readonly attribute sequence<MatchGlob>? excludeGlobs;
/**
* The originAttributesPattern for which this script should be enabled for.
*/

Просмотреть файл

@ -176,14 +176,14 @@ interface WebExtensionPolicy {
* URL root is listed as a web accessible path. Access checks on a path, such
* as performed in nsScriptSecurityManager, use sourceMayAccessPath below.
*/
boolean isWebAccessiblePath(UTF8String pathname);
boolean isWebAccessiblePath(DOMString pathname);
/**
* Returns true if the given path relative to the extension's moz-extension:
* URL root may be accessed by web content at sourceURI. For Manifest V2,
* sourceURI is ignored and the path must merely be listed as web accessible.
*/
boolean sourceMayAccessPath(URI sourceURI, UTF8String pathname);
boolean sourceMayAccessPath(URI sourceURI, DOMString pathname);
/**
* Replaces localization placeholders in the given string with localized

Просмотреть файл

@ -15,7 +15,6 @@
#include "mozilla/dom/PContent.h"
#include "mozilla/dom/WindowGlobalChild.h"
#include "mozilla/extensions/MatchPattern.h"
#include "nsContentUtils.h"
#include "JSActorProtocolUtils.h"
@ -29,7 +28,7 @@ NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(JSWindowActorProtocol)
NS_INTERFACE_MAP_ENTRY(nsIDOMEventListener)
NS_INTERFACE_MAP_END
NS_IMPL_CYCLE_COLLECTION(JSWindowActorProtocol)
NS_IMPL_CYCLE_COLLECTION(JSWindowActorProtocol, mURIMatcher)
/* static */ already_AddRefed<JSWindowActorProtocol>
JSWindowActorProtocol::FromIPC(const JSWindowActorInfo& aInfo) {
@ -294,18 +293,30 @@ void JSWindowActorProtocol::RemoveObservers() {
}
}
extensions::MatchPatternSetCore* JSWindowActorProtocol::GetURIMatcher() {
extensions::MatchPatternSet* JSWindowActorProtocol::GetURIMatcher() {
// If we've already created the pattern set, return it.
if (mURIMatcher || mMatches.IsEmpty()) {
return mURIMatcher;
}
nsTArray<RefPtr<extensions::MatchPatternCore>> patterns(mMatches.Length());
for (const nsString& pattern : mMatches) {
patterns.AppendElement(new extensions::MatchPatternCore(
pattern, false, false, IgnoreErrors()));
// Constructing the MatchPatternSet requires a JS environment to be run in.
// We can construct it here in the JSM scope, as we will be keeping it around.
AutoJSAPI jsapi;
MOZ_ALWAYS_TRUE(jsapi.Init(xpc::PrivilegedJunkScope()));
GlobalObject global(jsapi.cx(), xpc::PrivilegedJunkScope());
nsTArray<OwningStringOrMatchPattern> patterns;
patterns.SetCapacity(mMatches.Length());
for (nsString& s : mMatches) {
auto entry = patterns.AppendElement();
entry->SetAsString() = s;
}
mURIMatcher = new extensions::MatchPatternSetCore(std::move(patterns));
MatchPatternOptions matchPatternOptions;
// Make MatchPattern's mSchemes create properly.
matchPatternOptions.mRestrictSchemes = false;
mURIMatcher = extensions::MatchPatternSet::Constructor(
global, patterns, matchPatternOptions, IgnoreErrors());
return mURIMatcher;
}
@ -365,7 +376,7 @@ bool JSWindowActorProtocol::Matches(BrowsingContext* aBrowsingContext,
return false;
}
if (extensions::MatchPatternSetCore* uriMatcher = GetURIMatcher()) {
if (extensions::MatchPatternSet* uriMatcher = GetURIMatcher()) {
if (!uriMatcher->Matches(aURI)) {
aRv.ThrowNotSupportedError(nsPrintfCString(
"Window protocol '%s' doesn't match uri %s", mName.get(),

Просмотреть файл

@ -77,7 +77,7 @@ class JSWindowActorProtocol final : public JSActorProtocol,
private:
explicit JSWindowActorProtocol(const nsACString& aName) : mName(aName) {}
extensions::MatchPatternSetCore* GetURIMatcher();
extensions::MatchPatternSet* GetURIMatcher();
bool RemoteTypePrefixMatches(const nsDependentCSubstring& aRemoteType);
bool MessageManagerGroupMatches(BrowsingContext* aBrowsingContext);
~JSWindowActorProtocol() = default;
@ -94,7 +94,7 @@ class JSWindowActorProtocol final : public JSActorProtocol,
ParentSide mParent;
ChildSide mChild;
RefPtr<extensions::MatchPatternSetCore> mURIMatcher;
RefPtr<extensions::MatchPatternSet> mURIMatcher;
};
} // namespace dom

Просмотреть файл

@ -459,7 +459,7 @@ FilenameTypeAndDetails nsContentSecurityUtils::FilenameToFilenameType(
sanitizedPathAndScheme.Append(u"can't get addon off main thread]"_ns);
}
AppendUTF8toUTF16(url.FilePath(), sanitizedPathAndScheme);
sanitizedPathAndScheme.Append(url.FilePath());
return FilenameTypeAndDetails(kExtensionURI, Some(sanitizedPathAndScheme));
}

Просмотреть файл

@ -941,33 +941,6 @@ who = "Mike Hommey <mh+mozilla@glandium.org>"
criteria = "safe-to-deploy"
delta = "0.7.0 -> 0.7.1"
[[audits.rure]]
who = "Nika Layzell <nika@thelayzells.com>"
criteria = "safe-to-deploy"
version = "0.2.2"
notes = """
This is a fairly straightforward FFI wrapper crate for `regex`, maintained by
the `regex` developers in the same repository.
This crate is explicitly designed for FFI use, and should not be used directly
by Rust code. The exported `extern \"C\"` functions are not marked as `unsafe`,
meaning that it is technically incorrect to use them from within Rust code,
however they are reasonable to use from C code.
The unsafe code in this crate heavily depends on the C caller maintaining
invariants, however these invariants are clearly documented in the `rure.h`
file, bundled with the crate.
I have checked the signatures of each function both in C++ and in the Rust to
ensure they match. In some places, the c `rure.h` header file is missing a
`const` qualifier which could be present given the Rust code, however this will
have no impact on ABI, and is fairly normal for FFI crates.
Panics are handled in all Rust FFI methods, meaning that projects which do not
disable unwinding will still consistently abort (using `libc::abort()`) if a
panic occurs in the Rust code.
"""
[[audits.rust_decimal]]
who = "Mike Hommey <mh+mozilla@glandium.org>"
criteria = "safe-to-deploy"

1
third_party/rust/rure/.cargo-checksum.json поставляемый
Просмотреть файл

@ -1 +0,0 @@
{"files":{"Cargo.toml":"6bed7b80456a66969f4fe9bb5341a0b927a7cd58e036441cbb3b79d67d86c24a","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","README.md":"e8462c4064a376c2b2d729cc766064cc97decd6a2bb325cf9c7b50be9b8897ce","ctest/compile":"48b692b2aca8b61dfbe372f46d3aeb242893cfa2d81b0a89a73eb2f5db6b6e27","ctest/test.c":"6565808675763c42f8f10bd95445eaab4eaa3618efcf8ec215d98c3a1cfe756d","examples/compile":"471a781860b733f9aa9c1691f33ac8e8a4e85efcb97540942432ba5b58fbb982","examples/iter.c":"ad8312b2271ee19bfaf681d1d8338afaa89e4b180174f008b8cf951a6275776f","examples/sherlock.txt":"242ec73a70f0a03dcbe007e32038e7deeaee004aaec9a09a07fa322743440fa8","include/rure.h":"ddd6056d434d4efaf6ad30b8a38798d61ad385b0c9866988f9b2d4306dc1a99a","src/error.rs":"965c0207eb6d9cf644580d13b2d2d3bd310ab5c1ff65cb1fc04abdbd08ce7fe8","src/lib.rs":"9e99e774ee2a3db507d1e2cd7142b680411d90cf2b033c19ea9a7ea59ae4ba98","src/macros.rs":"ef2d468c1babe1b2252e62ad953b14ce58afb87768dc88612a70df27456038d2","src/rure.rs":"a889bbf35ab2d0018eac1122fe69abbbe2880fb8f5da211a1f60f703fddb5c82","test":"e8b91d4378b3ba09b7dfecdfa733765569778f57bc1c72cecc718e4ad63c1537"},"package":"f3de09595e75baee10da378a1fadfb50d04334a031d69dfb74d0cee3a94aa24c"}

38
third_party/rust/rure/Cargo.toml поставляемый
Просмотреть файл

@ -1,38 +0,0 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.
[package]
edition = "2018"
name = "rure"
version = "0.2.2"
authors = ["The Rust Project Developers"]
description = """
A C API for Rust's regular expression library.
"""
homepage = "https://github.com/rust-lang/regex"
documentation = "https://github.com/rust-lang/regex/tree/master/regex-capi"
readme = "README.md"
license = "MIT OR Apache-2.0"
repository = "https://github.com/rust-lang/regex"
[lib]
name = "rure"
crate-type = [
"staticlib",
"cdylib",
"rlib",
]
[dependencies.libc]
version = "0.2"
[dependencies.regex]
version = "1"

201
third_party/rust/rure/LICENSE-APACHE поставляемый
Просмотреть файл

@ -1,201 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

25
third_party/rust/rure/LICENSE-MIT поставляемый
Просмотреть файл

@ -1,25 +0,0 @@
Copyright (c) 2014 The Rust Project Developers
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

103
third_party/rust/rure/README.md поставляемый
Просмотреть файл

@ -1,103 +0,0 @@
C API for RUst's REgex engine
=============================
rure is a C API to Rust's regex library, which guarantees linear time
searching using finite automata. In exchange, it must give up some common
regex features such as backreferences and arbitrary lookaround. It does
however include capturing groups, lazy matching, Unicode support and word
boundary assertions. Its matching semantics generally correspond to Perl's,
or "leftmost first." Namely, the match locations reported correspond to the
first match that would be found by a backtracking engine.
The header file (`includes/rure.h`) serves as the primary API documentation of
this library. Types and flags are documented first, and functions follow.
The syntax and possibly other useful things are documented in the Rust
API documentation: https://docs.rs/regex
Examples
--------
There are readable examples in the `ctest` and `examples` sub-directories.
Assuming you have
[Rust and Cargo installed](https://www.rust-lang.org/downloads.html)
(and a C compiler), then this should work to run the `iter` example:
```
$ git clone git://github.com/rust-lang/regex
$ cd regex/regex-capi/examples
$ ./compile
$ LD_LIBRARY_PATH=../target/release ./iter
```
Performance
-----------
It's fast. Its core matching engine is a lazy DFA, which is what GNU grep
and RE2 use. Like GNU grep, this regex engine can detect multi byte literals
in the regex and will use fast literal string searching to quickly skip
through the input to find possible match locations.
All memory usage is bounded and all searching takes linear time with respect
to the input string.
For more details, see the PERFORMANCE guide:
https://github.com/rust-lang/regex/blob/master/PERFORMANCE.md
Text encoding
-------------
All regular expressions must be valid UTF-8.
The text encoding of haystacks is more complicated. To a first
approximation, haystacks should be UTF-8. In fact, UTF-8 (and, one
supposes, ASCII) is the only well defined text encoding supported by this
library. It is impossible to match UTF-16, UTF-32 or any other encoding
without first transcoding it to UTF-8.
With that said, haystacks do not need to be valid UTF-8, and if they aren't
valid UTF-8, no performance penalty is paid. Whether invalid UTF-8 is
matched or not depends on the regular expression. For example, with the
`RURE_FLAG_UNICODE` flag enabled, the regex `.` is guaranteed to match a
single UTF-8 encoding of a Unicode codepoint (sans LF). In particular,
it will not match invalid UTF-8 such as `\xFF`, nor will it match surrogate
codepoints or "alternate" (i.e., non-minimal) encodings of codepoints.
However, with the `RURE_FLAG_UNICODE` flag disabled, the regex `.` will match
any *single* arbitrary byte (sans LF), including `\xFF`.
This provides a useful invariant: wherever `RURE_FLAG_UNICODE` is set, the
corresponding regex is guaranteed to match valid UTF-8. Invalid UTF-8 will
always prevent a match from happening when the flag is set. Since flags can be
toggled in the regular expression itself, this allows one to pick and choose
which parts of the regular expression must match UTF-8 or not.
Some good advice is to always enable the `RURE_FLAG_UNICODE` flag (which is
enabled when using `rure_compile_must`) and selectively disable the flag when
one wants to match arbitrary bytes. The flag can be disabled in a regular
expression with `(?-u)`.
Finally, if one wants to match specific invalid UTF-8 bytes, then you can
use escape sequences. e.g., `(?-u)\\xFF` will match `\xFF`. It's not
possible to use C literal escape sequences in this case since regular
expressions must be valid UTF-8.
Aborts
------
This library will abort your process if an unwinding panic is caught in the
Rust code. Generally, a panic occurs when there is a bug in the program or
if allocation failed. It is possible to cause this behavior by passing
invalid inputs to some functions. For example, giving an invalid capture
group index to `rure_captures_at` will cause Rust's bounds checks to fail,
which will cause a panic, which will be caught and printed to stderr. The
process will then `abort`.
Missing
-------
There are a few things missing from the C API that are present in the Rust API.
There's no particular (known) reason why they don't, they just haven't been
implemented yet.
* Splitting a string by a regex.
* Replacing regex matches in a string with some other text.

8
third_party/rust/rure/ctest/compile поставляемый
Просмотреть файл

@ -1,8 +0,0 @@
#!/bin/sh
set -ex
cargo build --manifest-path ../Cargo.toml
gcc -DDEBUG -o test test.c -ansi -Wall -I../include -L../../target/debug -lrure
# If you're using librure.a, then you'll need to link other stuff:
# -lutil -ldl -lpthread -lgcc_s -lc -lm -lrt -lutil -lrure

591
third_party/rust/rure/ctest/test.c поставляемый
Просмотреть файл

@ -1,591 +0,0 @@
#include <assert.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "rure.h"
#ifndef DEBUG
#define DEBUG false
#endif
bool test_is_match() {
bool passed = true;
const char *haystack = "snowman: \xE2\x98\x83";
rure *re = rure_compile_must("\\p{So}$");
bool matched = rure_is_match(re, (const uint8_t *)haystack,
strlen(haystack), 0);
if (!matched) {
if (DEBUG) {
fprintf(stderr,
"[test_is_match] expected match, but got no match\n");
}
passed = false;
}
rure_free(re);
return passed;
}
bool test_shortest_match() {
bool passed = true;
const char *haystack = "aaaaa";
rure *re = rure_compile_must("a+");
size_t end = 0;
bool matched = rure_shortest_match(re, (const uint8_t *)haystack,
strlen(haystack), 0, &end);
if (!matched) {
if (DEBUG) {
fprintf(stderr,
"[test_shortest_match] expected match, "
"but got no match\n");
}
passed = false;
}
size_t expect_end = 1;
if (end != expect_end) {
if (DEBUG) {
fprintf(stderr,
"[test_shortest_match] expected match end location %zu "
"but got %zu\n", expect_end, end);
}
passed = false;
}
rure_free(re);
return passed;
}
bool test_find() {
bool passed = true;
const char *haystack = "snowman: \xE2\x98\x83";
rure *re = rure_compile_must("\\p{So}$");
rure_match match = {0};
bool matched = rure_find(re, (const uint8_t *)haystack, strlen(haystack),
0, &match);
if (!matched) {
if (DEBUG) {
fprintf(stderr, "[test_find] expected match, but got no match\n");
}
passed = false;
}
size_t expect_start = 9;
size_t expect_end = 12;
if (match.start != expect_start || match.end != expect_end) {
if (DEBUG) {
fprintf(stderr,
"[test_find] expected match at (%zu, %zu), but "
"got match at (%zu, %zu)\n",
expect_start, expect_end, match.start, match.end);
}
passed = false;
}
rure_free(re);
return passed;
}
bool test_captures() {
bool passed = true;
const char *haystack = "snowman: \xE2\x98\x83";
rure *re = rure_compile_must(".(.*(?P<snowman>\\p{So}))$");
rure_match match = {0};
rure_captures *caps = rure_captures_new(re);
bool matched = rure_find_captures(re, (const uint8_t *)haystack,
strlen(haystack), 0, caps);
if (!matched) {
if (DEBUG) {
fprintf(stderr,
"[test_captures] expected match, but got no match\n");
}
passed = false;
}
size_t expect_captures_len = 3;
size_t captures_len = rure_captures_len(caps);
if (captures_len != expect_captures_len) {
if (DEBUG) {
fprintf(stderr,
"[test_captures] "
"expected capture group length to be %zd, but "
"got %zd\n",
expect_captures_len, captures_len);
}
passed = false;
goto done;
}
int32_t expect_capture_index = 2;
int32_t capture_index = rure_capture_name_index(re, "snowman");
if (capture_index != expect_capture_index) {
if (DEBUG) {
fprintf(stderr,
"[test_captures] "
"expected capture index %d for name 'snowman', but "
"got %d\n",
expect_capture_index, capture_index);
}
passed = false;
goto done;
}
size_t expect_start = 9;
size_t expect_end = 12;
rure_captures_at(caps, 2, &match);
if (match.start != expect_start || match.end != expect_end) {
if (DEBUG) {
fprintf(stderr,
"[test_captures] "
"expected capture 2 match at (%zu, %zu), "
"but got match at (%zu, %zu)\n",
expect_start, expect_end, match.start, match.end);
}
passed = false;
}
done:
rure_captures_free(caps);
rure_free(re);
return passed;
}
bool test_iter() {
bool passed = true;
const uint8_t *haystack = (const uint8_t *)"abc xyz";
size_t haystack_len = strlen((const char *)haystack);
rure *re = rure_compile_must("\\w+(\\w)");
rure_match match = {0};
rure_captures *caps = rure_captures_new(re);
rure_iter *it = rure_iter_new(re);
bool matched = rure_iter_next(it, haystack, haystack_len, &match);
if (!matched) {
if (DEBUG) {
fprintf(stderr,
"[test_iter] expected first match, but got no match\n");
}
passed = false;
goto done;
}
size_t expect_start = 0;
size_t expect_end = 3;
if (match.start != expect_start || match.end != expect_end) {
if (DEBUG) {
fprintf(stderr,
"[test_iter] expected first match at (%zu, %zu), but "
"got match at (%zu, %zu)\n",
expect_start, expect_end, match.start, match.end);
}
passed = false;
goto done;
}
matched = rure_iter_next_captures(it, haystack, haystack_len, caps);
if (!matched) {
if (DEBUG) {
fprintf(stderr,
"[test_iter] expected second match, but got no match\n");
}
passed = false;
goto done;
}
rure_captures_at(caps, 1, &match);
expect_start = 6;
expect_end = 7;
if (match.start != expect_start || match.end != expect_end) {
if (DEBUG) {
fprintf(stderr,
"[test_iter] expected second match at (%zu, %zu), but "
"got match at (%zu, %zu)\n",
expect_start, expect_end, match.start, match.end);
}
passed = false;
goto done;
}
done:
rure_iter_free(it);
rure_captures_free(caps);
rure_free(re);
return passed;
}
bool test_iter_capture_name(char *expect, char *given) {
bool passed = true;
if (strcmp(expect, given)) {
if (DEBUG) {
fprintf(stderr,
"[test_iter_capture_name] expected first capture "
"name '%s' got '%s'\n",
expect, given);
}
passed = false;
}
return passed;
}
bool test_iter_capture_names() {
bool passed = true;
char *name;
rure *re = rure_compile_must(
"(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})");
rure_iter_capture_names *it = rure_iter_capture_names_new(re);
bool result = rure_iter_capture_names_next(it, &name);
if (!result) {
if (DEBUG) {
fprintf(stderr,
"[test_iter_capture_names] expected a second name, "
"but got none\n");
}
passed = false;
goto done;
}
result = rure_iter_capture_names_next(it, &name);
passed = test_iter_capture_name("year", name);
if (!passed) {
goto done;
}
result = rure_iter_capture_names_next(it, &name);
passed = test_iter_capture_name("month", name);
if (!passed) {
goto done;
}
result = rure_iter_capture_names_next(it, &name);
passed = test_iter_capture_name("day", name);
if (!passed) {
goto done;
}
done:
rure_iter_capture_names_free(it);
rure_free(re);
return passed;
}
/*
* This tests whether we can set the flags correctly. In this case, we disable
* all flags, which includes disabling Unicode mode. When we disable Unicode
* mode, we can match arbitrary possibly invalid UTF-8 bytes, such as \xFF.
* (When Unicode mode is enabled, \xFF won't match .)
*/
bool test_flags() {
bool passed = true;
const char *pattern = ".";
const char *haystack = "\xFF";
rure *re = rure_compile((const uint8_t *)pattern, strlen(pattern),
0, NULL, NULL);
bool matched = rure_is_match(re, (const uint8_t *)haystack,
strlen(haystack), 0);
if (!matched) {
if (DEBUG) {
fprintf(stderr, "[test_flags] expected match, but got no match\n");
}
passed = false;
}
rure_free(re);
return passed;
}
bool test_compile_error() {
bool passed = true;
rure_error *err = rure_error_new();
rure *re = rure_compile((const uint8_t *)"(", 1, 0, NULL, err);
if (re != NULL) {
if (DEBUG) {
fprintf(stderr,
"[test_compile_error] "
"expected NULL regex pointer, but got non-NULL pointer\n");
}
passed = false;
rure_free(re);
}
const char *msg = rure_error_message(err);
if (NULL == strstr(msg, "unclosed group")) {
if (DEBUG) {
fprintf(stderr,
"[test_compile_error] "
"expected an 'unclosed parenthesis' error message, but "
"got this instead: '%s'\n", msg);
}
passed = false;
}
rure_error_free(err);
return passed;
}
bool test_compile_error_size_limit() {
bool passed = true;
rure_options *opts = rure_options_new();
rure_options_size_limit(opts, 0);
rure_error *err = rure_error_new();
rure *re = rure_compile((const uint8_t *)"\\w{100}", 8, 0, opts, err);
if (re != NULL) {
if (DEBUG) {
fprintf(stderr,
"[test_compile_error_size_limit] "
"expected NULL regex pointer, but got non-NULL pointer\n");
}
passed = false;
rure_free(re);
}
const char *msg = rure_error_message(err);
if (NULL == strstr(msg, "exceeds size")) {
if (DEBUG) {
fprintf(stderr,
"[test_compile_error] "
"expected an 'exceeds size' error message, but "
"got this instead: '%s'\n", msg);
}
passed = false;
}
rure_options_free(opts);
rure_error_free(err);
return passed;
}
bool test_regex_set_matches() {
#define PAT_COUNT 6
bool passed = true;
const char *patterns[] = {
"foo", "barfoo", "\\w+", "\\d+", "foobar", "bar"
};
const size_t patterns_lengths[] = {
3, 6, 3, 3, 6, 3
};
rure_error *err = rure_error_new();
rure_set *re = rure_compile_set((const uint8_t **) patterns,
patterns_lengths,
PAT_COUNT,
0,
NULL,
err);
if (re == NULL) {
passed = false;
goto done2;
}
if (rure_set_len(re) != PAT_COUNT) {
passed = false;
goto done1;
}
if (!rure_set_is_match(re, (const uint8_t *) "foobar", 6, 0)) {
passed = false;
goto done1;
}
if (rure_set_is_match(re, (const uint8_t *) "", 0, 0)) {
passed = false;
goto done1;
}
bool matches[PAT_COUNT];
if (!rure_set_matches(re, (const uint8_t *) "foobar", 6, 0, matches)) {
passed = false;
goto done1;
}
const bool match_target[] = {
true, false, true, false, true, true
};
int i;
for (i = 0; i < PAT_COUNT; ++i) {
if (matches[i] != match_target[i]) {
passed = false;
goto done1;
}
}
done1:
rure_set_free(re);
done2:
rure_error_free(err);
return passed;
#undef PAT_COUNT
}
bool test_regex_set_match_start() {
#define PAT_COUNT 3
bool passed = true;
const char *patterns[] = {
"foo", "bar", "fooo"
};
const size_t patterns_lengths[] = {
3, 3, 4
};
rure_error *err = rure_error_new();
rure_set *re = rure_compile_set((const uint8_t **) patterns,
patterns_lengths,
PAT_COUNT,
0,
NULL,
err);
if (re == NULL) {
passed = false;
goto done2;
}
if (rure_set_len(re) != PAT_COUNT) {
passed = false;
goto done1;
}
if (rure_set_is_match(re, (const uint8_t *)"foobiasdr", 7, 2)) {
passed = false;
goto done1;
}
{
bool matches[PAT_COUNT];
if (!rure_set_matches(re, (const uint8_t *)"fooobar", 8, 0, matches)) {
passed = false;
goto done1;
}
const bool match_target[] = {
true, true, true
};
int i;
for (i = 0; i < PAT_COUNT; ++i) {
if (matches[i] != match_target[i]) {
passed = false;
goto done1;
}
}
}
{
bool matches[PAT_COUNT];
if (!rure_set_matches(re, (const uint8_t *)"fooobar", 7, 1, matches)) {
passed = false;
goto done1;
}
const bool match_target[] = {
false, true, false
};
int i;
for (i = 0; i < PAT_COUNT; ++i) {
if (matches[i] != match_target[i]) {
passed = false;
goto done1;
}
}
}
done1:
rure_set_free(re);
done2:
rure_error_free(err);
return passed;
#undef PAT_COUNT
}
bool test_regex_set_options() {
bool passed = true;
rure_options *opts = rure_options_new();
rure_options_size_limit(opts, 0);
rure_error *err = rure_error_new();
const char *patterns[] = { "\\w{100}" };
const size_t patterns_lengths[] = { 8 };
rure_set *re = rure_compile_set(
(const uint8_t **) patterns, patterns_lengths, 1, 0, opts, err);
if (re != NULL) {
if (DEBUG) {
fprintf(stderr,
"[test_compile_error_size_limit] "
"expected NULL regex pointer, but got non-NULL pointer\n");
}
passed = false;
rure_set_free(re);
}
const char *msg = rure_error_message(err);
if (NULL == strstr(msg, "exceeds size")) {
if (DEBUG) {
fprintf(stderr,
"[test_compile_error] "
"expected an 'exceeds size' error message, but "
"got this instead: '%s'\n", msg);
}
passed = false;
}
rure_options_free(opts);
rure_error_free(err);
return passed;
}
bool test_escape() {
bool passed = true;
const char *pattern = "^[a-z]+.*$";
const char *expected_escaped = "\\^\\[a\\-z\\]\\+\\.\\*\\$";
const char *escaped = rure_escape_must(pattern);
if (!escaped) {
if (DEBUG) {
fprintf(stderr,
"[test_captures] expected escaped, but got no escaped\n");
}
passed = false;
} else if (strcmp(escaped, expected_escaped) != 0) {
if (DEBUG) {
fprintf(stderr,
"[test_captures] expected \"%s\", but got \"%s\"\n",
expected_escaped, escaped);
}
passed = false;
}
rure_cstring_free((char *) escaped);
return passed;
}
void run_test(bool (test)(), const char *name, bool *passed) {
if (!test()) {
*passed = false;
fprintf(stderr, "FAILED: %s\n", name);
} else {
fprintf(stderr, "PASSED: %s\n", name);
}
}
int main() {
bool passed = true;
run_test(test_is_match, "test_is_match", &passed);
run_test(test_shortest_match, "test_shortest_match", &passed);
run_test(test_find, "test_find", &passed);
run_test(test_captures, "test_captures", &passed);
run_test(test_iter, "test_iter", &passed);
run_test(test_iter_capture_names, "test_iter_capture_names", &passed);
run_test(test_flags, "test_flags", &passed);
run_test(test_compile_error, "test_compile_error", &passed);
run_test(test_compile_error_size_limit, "test_compile_error_size_limit",
&passed);
run_test(test_regex_set_matches, "test_regex_set_match", &passed);
run_test(test_regex_set_options, "test_regex_set_options", &passed);
run_test(test_regex_set_match_start, "test_regex_set_match_start",
&passed);
run_test(test_escape, "test_escape", &passed);
if (!passed) {
exit(1);
}
return 0;
}

9
third_party/rust/rure/examples/compile поставляемый
Просмотреть файл

@ -1,9 +0,0 @@
#!/bin/sh
set -ex
# N.B. Add `--release` flag to `cargo build` to make the example run faster.
cargo build --manifest-path ../Cargo.toml
gcc -O3 -DDEBUG -o iter iter.c -ansi -Wall -I../include -L../../target/debug -lrure
# If you're using librure.a, then you'll need to link other stuff:
# -lutil -ldl -lpthread -lgcc_s -lc -lm -lrt -lutil -lrure

99
third_party/rust/rure/examples/iter.c поставляемый
Просмотреть файл

@ -1,99 +0,0 @@
/*
* This example code shows how to iterate over all regex matches in a file,
* emit the match location and print the contents of a capturing group.
*/
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include "rure.h"
int main() {
/* Open a file and mmap it. */
int fd = open("sherlock.txt", O_RDONLY);
if (fd == -1) {
perror("failed to open sherlock.txt");
exit(1);
}
struct stat status;
if (fstat(fd, &status) == -1) {
perror("failed to stat sherlock.txt");
exit(1);
}
if ((uintmax_t)status.st_size > SIZE_MAX) {
perror("file too big");
exit(1);
}
if (status.st_size == 0) {
perror("file empty");
exit(1);
}
size_t sherlock_len = (size_t)status.st_size;
const uint8_t *sherlock = (const uint8_t *)mmap(
NULL, status.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
if (sherlock == MAP_FAILED) {
perror("could not mmap file");
exit(1);
}
/*
* Compile the regular expression. A more convenient routine,
* rure_compile_must, is also available, which will abort the process if
* and print an error message to stderr if the regex compilation fails.
* We show the full gory details here as an example.
*/
const char *pattern = "(\\w+)\\s+Holmes";
size_t pattern_len = strlen(pattern);
rure_error *err = rure_error_new();
rure *re = rure_compile((const uint8_t *)pattern, pattern_len,
RURE_FLAG_UNICODE | RURE_FLAG_CASEI, NULL, err);
if (NULL == re) {
/* A null regex means compilation failed and an error exists. */
printf("compilation of %s failed: %s\n",
pattern, rure_error_message(err));
rure_error_free(err);
munmap((char*)sherlock, sherlock_len);
exit(1);
}
rure_error_free(err);
/*
* Create an iterator to find all successive non-overlapping matches.
* For each match, we extract the location of the capturing group.
*/
rure_match group0 = {0};
rure_match group1 = {0};
rure_captures *caps = rure_captures_new(re);
rure_iter *it = rure_iter_new(re);
while (rure_iter_next_captures(it, sherlock, sherlock_len, caps)) {
/*
* Get the location of the full match and the capturing group.
* We know that both accesses are successful since the body of the
* loop only executes if there is a match and both capture groups
* must match in order for the entire regex to match.
*
* N.B. The zeroth group corresponds to the full match of the regex.
*/
rure_captures_at(caps, 0, &group0);
rure_captures_at(caps, 1, &group1);
printf("%.*s (match at: %zu, %zu)\n",
(int)(group1.end - group1.start),
sherlock + group1.start,
group0.start, group0.end);
}
/* Free all our resources. */
munmap((char*)sherlock, sherlock_len);
rure_captures_free(caps);
rure_iter_free(it);
rure_free(re);
return 0;
}

13052
third_party/rust/rure/examples/sherlock.txt поставляемый

Разница между файлами не показана из-за своего большого размера Загрузить разницу

585
third_party/rust/rure/include/rure.h поставляемый
Просмотреть файл

@ -1,585 +0,0 @@
#ifndef _RURE_H
#define _RURE_H
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* rure is the type of a compiled regular expression.
*
* An rure can be safely used from multiple threads simultaneously.
*/
typedef struct rure rure;
/*
* rure_set is the type of a set of compiled regular expressions.
*
* A rure can be safely used from multiple threads simultaneously.
*/
typedef struct rure_set rure_set;
/*
* rure_options is the set of non-flag configuration options for compiling
* a regular expression. Currently, only two options are available: setting
* the size limit of the compiled program and setting the size limit of the
* cache of states that the DFA uses while searching.
*
* For most uses, the default settings will work fine, and NULL can be passed
* wherever a *rure_options is expected.
*/
typedef struct rure_options rure_options;
/*
* The flags listed below can be used in rure_compile to set the default
* flags. All flags can otherwise be toggled in the expression itself using
* standard syntax, e.g., `(?i)` turns case insensitive matching on and `(?-i)`
* disables it.
*/
/* The case insensitive (i) flag. */
#define RURE_FLAG_CASEI (1 << 0)
/* The multi-line matching (m) flag. (^ and $ match new line boundaries.) */
#define RURE_FLAG_MULTI (1 << 1)
/* The any character (s) flag. (. matches new line.) */
#define RURE_FLAG_DOTNL (1 << 2)
/* The greedy swap (U) flag. (e.g., + is ungreedy and +? is greedy.) */
#define RURE_FLAG_SWAP_GREED (1 << 3)
/* The ignore whitespace (x) flag. */
#define RURE_FLAG_SPACE (1 << 4)
/* The Unicode (u) flag. */
#define RURE_FLAG_UNICODE (1 << 5)
/* The default set of flags enabled when no flags are set. */
#define RURE_DEFAULT_FLAGS RURE_FLAG_UNICODE
/*
* rure_match corresponds to the location of a single match in a haystack.
*/
typedef struct rure_match {
/* The start position. */
size_t start;
/* The end position. */
size_t end;
} rure_match;
/*
* rure_captures represents storage for sub-capture locations of a match.
*
* Computing the capture groups of a match can carry a significant performance
* penalty, so their use in the API is optional.
*
* An rure_captures value can be reused in multiple calls to rure_find_captures,
* so long as it is used with the compiled regular expression that created
* it.
*
* An rure_captures value may outlive its corresponding rure and can be freed
* independently.
*
* It is not safe to use from multiple threads simultaneously.
*/
typedef struct rure_captures rure_captures;
/*
* rure_iter is an iterator over successive non-overlapping matches in a
* particular haystack.
*
* An rure_iter value may not outlive its corresponding rure and should be freed
* before its corresponding rure is freed.
*
* It is not safe to use from multiple threads simultaneously.
*/
typedef struct rure_iter rure_iter;
/*
* rure_iter_capture_names is an iterator over the list of capture group names
* in this particular rure.
*
* An rure_iter_capture_names value may not outlive its corresponding rure,
* and should be freed before its corresponding rure is freed.
*
* It is not safe to use from multiple threads simultaneously.
*/
typedef struct rure_iter_capture_names rure_iter_capture_names;
/*
* rure_error is an error that caused compilation to fail.
*
* Most errors are syntax errors but an error can be returned if the compiled
* regular expression would be too big.
*
* Whenever a function accepts an *rure_error, it is safe to pass NULL. (But
* you will not get access to the error if one occurred.)
*
* It is not safe to use from multiple threads simultaneously.
*/
typedef struct rure_error rure_error;
/*
* rure_compile_must compiles the given pattern into a regular expression. If
* compilation fails for any reason, an error message is printed to stderr and
* the process is aborted.
*
* The pattern given should be in UTF-8. For convenience, this accepts a C
* string, which means the pattern cannot usefully contain NUL. If your pattern
* may contain NUL, consider using a regular expression escape sequence, or
* just use rure_compile.
*
* This uses RURE_DEFAULT_FLAGS.
*
* The compiled expression returned may be used from multiple threads
* simultaneously.
*/
rure *rure_compile_must(const char *pattern);
/*
* rure_compile compiles the given pattern into a regular expression. The
* pattern must be valid UTF-8 and the length corresponds to the number of
* bytes in the pattern.
*
* flags is a bitfield. Valid values are constants declared with prefix
* RURE_FLAG_.
*
* options contains non-flag configuration settings. If it's NULL, default
* settings are used. options may be freed immediately after a call to
* rure_compile.
*
* error is set if there was a problem compiling the pattern (including if the
* pattern is not valid UTF-8). If error is NULL, then no error information
* is returned. In all cases, if an error occurs, NULL is returned.
*
* The compiled expression returned may be used from multiple threads
* simultaneously.
*/
rure *rure_compile(const uint8_t *pattern, size_t length,
uint32_t flags, rure_options *options,
rure_error *error);
/*
* rure_free frees the given compiled regular expression.
*
* This must be called at most once for any rure.
*/
void rure_free(rure *re);
/*
* rure_is_match returns true if and only if re matches anywhere in haystack.
*
* haystack may contain arbitrary bytes, but ASCII compatible text is more
* useful. UTF-8 is even more useful. Other text encodings aren't supported.
* length should be the number of bytes in haystack.
*
* start is the position at which to start searching. Note that setting the
* start position is distinct from incrementing the pointer, since the regex
* engine may look at bytes before the start position to determine match
* information. For example, if the start position is greater than 0, then the
* \A ("begin text") anchor can never match.
*
* rure_is_match should be preferred to rure_find since it may be faster.
*
* N.B. The performance of this search is not impacted by the presence of
* capturing groups in your regular expression.
*/
bool rure_is_match(rure *re, const uint8_t *haystack, size_t length,
size_t start);
/*
* rure_find returns true if and only if re matches anywhere in haystack.
* If a match is found, then its start and end offsets (in bytes) are set
* on the match pointer given.
*
* haystack may contain arbitrary bytes, but ASCII compatible text is more
* useful. UTF-8 is even more useful. Other text encodings aren't supported.
* length should be the number of bytes in haystack.
*
* start is the position at which to start searching. Note that setting the
* start position is distinct from incrementing the pointer, since the regex
* engine may look at bytes before the start position to determine match
* information. For example, if the start position is greater than 0, then the
* \A ("begin text") anchor can never match.
*
* rure_find should be preferred to rure_find_captures since it may be faster.
*
* N.B. The performance of this search is not impacted by the presence of
* capturing groups in your regular expression.
*/
bool rure_find(rure *re, const uint8_t *haystack, size_t length,
size_t start, rure_match *match);
/*
* rure_find_captures returns true if and only if re matches anywhere in
* haystack. If a match is found, then all of its capture locations are stored
* in the captures pointer given.
*
* haystack may contain arbitrary bytes, but ASCII compatible text is more
* useful. UTF-8 is even more useful. Other text encodings aren't supported.
* length should be the number of bytes in haystack.
*
* start is the position at which to start searching. Note that setting the
* start position is distinct from incrementing the pointer, since the regex
* engine may look at bytes before the start position to determine match
* information. For example, if the start position is greater than 0, then the
* \A ("begin text") anchor can never match.
*
* Only use this function if you specifically need access to capture locations.
* It is not necessary to use this function just because your regular
* expression contains capturing groups.
*
* Capture locations can be accessed using the rure_captures_* functions.
*
* N.B. The performance of this search can be impacted by the number of
* capturing groups. If you're using this function, it may be beneficial to
* use non-capturing groups (e.g., `(?:re)`) where possible.
*/
bool rure_find_captures(rure *re, const uint8_t *haystack, size_t length,
size_t start, rure_captures *captures);
/*
* rure_shortest_match returns true if and only if re matches anywhere in
* haystack. If a match is found, then its end location is stored in the
* pointer given. The end location is the place at which the regex engine
* determined that a match exists, but may occur before the end of the proper
* leftmost-first match.
*
* haystack may contain arbitrary bytes, but ASCII compatible text is more
* useful. UTF-8 is even more useful. Other text encodings aren't supported.
* length should be the number of bytes in haystack.
*
* start is the position at which to start searching. Note that setting the
* start position is distinct from incrementing the pointer, since the regex
* engine may look at bytes before the start position to determine match
* information. For example, if the start position is greater than 0, then the
* \A ("begin text") anchor can never match.
*
* rure_shortest_match should be preferred to rure_find since it may be faster.
*
* N.B. The performance of this search is not impacted by the presence of
* capturing groups in your regular expression.
*/
bool rure_shortest_match(rure *re, const uint8_t *haystack, size_t length,
size_t start, size_t *end);
/*
* rure_capture_name_index returns the capture index for the name given. If
* no such named capturing group exists in re, then -1 is returned.
*
* The capture index may be used with rure_captures_at.
*
* This function never returns 0 since the first capture group always
* corresponds to the entire match and is always unnamed.
*/
int32_t rure_capture_name_index(rure *re, const char *name);
/*
* rure_iter_capture_names_new creates a new capture_names iterator.
*
* An iterator will report all successive capture group names of re.
*/
rure_iter_capture_names *rure_iter_capture_names_new(rure *re);
/*
* rure_iter_capture_names_free frees the iterator given.
*
* It must be called at most once.
*/
void rure_iter_capture_names_free(rure_iter_capture_names *it);
/*
* rure_iter_capture_names_next advances the iterator and returns true
* if and only if another capture group name exists.
*
* The value of the capture group name is written to the provided pointer.
*/
bool rure_iter_capture_names_next(rure_iter_capture_names *it, char **name);
/*
* rure_iter_new creates a new iterator.
*
* An iterator will report all successive non-overlapping matches of re.
* When calling iterator functions, the same haystack and length must be
* supplied to all invocations. (Strict pointer equality is, however, not
* required.)
*/
rure_iter *rure_iter_new(rure *re);
/*
* rure_iter_free frees the iterator given.
*
* It must be called at most once.
*/
void rure_iter_free(rure_iter *it);
/*
* rure_iter_next advances the iterator and returns true if and only if a
* match was found. If a match is found, then the match pointer is set with the
* start and end location of the match, in bytes.
*
* If no match is found, then subsequent calls will return false indefinitely.
*
* haystack may contain arbitrary bytes, but ASCII compatible text is more
* useful. UTF-8 is even more useful. Other text encodings aren't supported.
* length should be the number of bytes in haystack. The given haystack must
* be logically equivalent to all other haystacks given to this iterator.
*
* rure_iter_next should be preferred to rure_iter_next_captures since it may
* be faster.
*
* N.B. The performance of this search is not impacted by the presence of
* capturing groups in your regular expression.
*/
bool rure_iter_next(rure_iter *it, const uint8_t *haystack, size_t length,
rure_match *match);
/*
* rure_iter_next_captures advances the iterator and returns true if and only if a
* match was found. If a match is found, then all of its capture locations are
* stored in the captures pointer given.
*
* If no match is found, then subsequent calls will return false indefinitely.
*
* haystack may contain arbitrary bytes, but ASCII compatible text is more
* useful. UTF-8 is even more useful. Other text encodings aren't supported.
* length should be the number of bytes in haystack. The given haystack must
* be logically equivalent to all other haystacks given to this iterator.
*
* Only use this function if you specifically need access to capture locations.
* It is not necessary to use this function just because your regular
* expression contains capturing groups.
*
* Capture locations can be accessed using the rure_captures_* functions.
*
* N.B. The performance of this search can be impacted by the number of
* capturing groups. If you're using this function, it may be beneficial to
* use non-capturing groups (e.g., `(?:re)`) where possible.
*/
bool rure_iter_next_captures(rure_iter *it,
const uint8_t *haystack, size_t length,
rure_captures *captures);
/*
* rure_captures_new allocates storage for all capturing groups in re.
*
* An rure_captures value may be reused on subsequent calls to
* rure_find_captures or rure_iter_next_captures.
*
* An rure_captures value may be freed independently of re, although any
* particular rure_captures should be used only with the re given here.
*
* It is not safe to use an rure_captures value from multiple threads
* simultaneously.
*/
rure_captures *rure_captures_new(rure *re);
/*
* rure_captures_free frees the given captures.
*
* This must be called at most once.
*/
void rure_captures_free(rure_captures *captures);
/*
* rure_captures_at returns true if and only if the capturing group at the
* index given was part of a match. If so, the given match pointer is populated
* with the start and end location (in bytes) of the capturing group.
*
* If no capture group with the index i exists, then false is
* returned. (A capturing group exists if and only if i is less than
* rure_captures_len(captures).)
*
* Note that index 0 corresponds to the full match.
*/
bool rure_captures_at(rure_captures *captures, size_t i, rure_match *match);
/*
* rure_captures_len returns the number of capturing groups in the given
* captures.
*/
size_t rure_captures_len(rure_captures *captures);
/*
* rure_options_new allocates space for options.
*
* Options may be freed immediately after a call to rure_compile, but otherwise
* may be freely used in multiple calls to rure_compile.
*
* It is not safe to set options from multiple threads simultaneously. It is
* safe to call rure_compile from multiple threads simultaneously using the
* same options pointer.
*/
rure_options *rure_options_new();
/*
* rure_options_free frees the given options.
*
* This must be called at most once.
*/
void rure_options_free(rure_options *options);
/*
* rure_options_size_limit sets the appoximate size limit of the compiled
* regular expression.
*
* This size limit roughly corresponds to the number of bytes occupied by a
* single compiled program. If the program would exceed this number, then a
* compilation error will be returned from rure_compile.
*/
void rure_options_size_limit(rure_options *options, size_t limit);
/*
* rure_options_dfa_size_limit sets the approximate size of the cache used by
* the DFA during search.
*
* This roughly corresponds to the number of bytes that the DFA will use while
* searching.
*
* Note that this is a *per thread* limit. There is no way to set a global
* limit. In particular, if a regular expression is used from multiple threads
* simultaneously, then each thread may use up to the number of bytes
* specified here.
*/
void rure_options_dfa_size_limit(rure_options *options, size_t limit);
/*
* rure_compile_set compiles the given list of patterns into a single regular
* expression which can be matched in a linear-scan. Each pattern in patterns
* must be valid UTF-8 and the length of each pattern in patterns corresponds
* to a byte length in patterns_lengths.
*
* The number of patterns to compile is specified by patterns_count. patterns
* must contain at least this many entries.
*
* flags is a bitfield. Valid values are constants declared with prefix
* RURE_FLAG_.
*
* options contains non-flag configuration settings. If it's NULL, default
* settings are used. options may be freed immediately after a call to
* rure_compile.
*
* error is set if there was a problem compiling the pattern.
*
* The compiled expression set returned may be used from multiple threads.
*/
rure_set *rure_compile_set(const uint8_t **patterns,
const size_t *patterns_lengths,
size_t patterns_count,
uint32_t flags,
rure_options *options,
rure_error *error);
/*
* rure_set_free frees the given compiled regular expression set.
*
* This must be called at most once for any rure_set.
*/
void rure_set_free(rure_set *re);
/*
* rure_is_match returns true if and only if any regexes within the set
* match anywhere in the haystack. Once a match has been located, the
* matching engine will quit immediately.
*
* haystack may contain arbitrary bytes, but ASCII compatible text is more
* useful. UTF-8 is even more useful. Other text encodings aren't supported.
* length should be the number of bytes in haystack.
*
* start is the position at which to start searching. Note that setting the
* start position is distinct from incrementing the pointer, since the regex
* engine may look at bytes before the start position to determine match
* information. For example, if the start position is greater than 0, then the
* \A ("begin text") anchor can never match.
*/
bool rure_set_is_match(rure_set *re, const uint8_t *haystack, size_t length,
size_t start);
/*
* rure_set_matches compares each regex in the set against the haystack and
* modifies matches with the match result of each pattern. Match results are
* ordered in the same way as the rure_set was compiled. For example,
* index 0 of matches corresponds to the first pattern passed to
* `rure_compile_set`.
*
* haystack may contain arbitrary bytes, but ASCII compatible text is more
* useful. UTF-8 is even more useful. Other text encodings aren't supported.
* length should be the number of bytes in haystack.
*
* start is the position at which to start searching. Note that setting the
* start position is distinct from incrementing the pointer, since the regex
* engine may look at bytes before the start position to determine match
* information. For example, if the start position is greater than 0, then the
* \A ("begin text") anchor can never match.
*
* matches must be greater than or equal to the number of patterns the
* rure_set was compiled with.
*
* Only use this function if you specifically need to know which regexes
* matched within the set. To determine if any of the regexes matched without
* caring which, use rure_set_is_match.
*/
bool rure_set_matches(rure_set *re, const uint8_t *haystack, size_t length,
size_t start, bool *matches);
/*
* rure_set_len returns the number of patterns rure_set was compiled with.
*/
size_t rure_set_len(rure_set *re);
/*
* rure_error_new allocates space for an error.
*
* If error information is desired, then rure_error_new should be called
* to create an rure_error pointer, and that pointer can be passed to
* rure_compile. If an error occurred, then rure_compile will return NULL and
* the error pointer will be set. A message can then be extracted.
*
* It is not safe to use errors from multiple threads simultaneously. An error
* value may be reused on subsequent calls to rure_compile.
*/
rure_error *rure_error_new();
/*
* rure_error_free frees the error given.
*
* This must be called at most once.
*/
void rure_error_free(rure_error *err);
/*
* rure_error_message returns a NUL terminated string that describes the error
* message.
*
* The pointer returned must not be freed. Instead, it will be freed when
* rure_error_free is called. If err is used in subsequent calls to
* rure_compile, then this pointer may change or become invalid.
*/
const char *rure_error_message(rure_error *err);
/*
* rure_escape_must returns a NUL terminated string where all meta characters
* have been escaped. If escaping fails for any reason, an error message is
* printed to stderr and the process is aborted.
*
* The pattern given should be in UTF-8. For convenience, this accepts a C
* string, which means the pattern cannot contain a NUL byte. These correspond
* to the only two failure conditions of this function. That is, if the caller
* guarantees that the given pattern is valid UTF-8 and does not contain a
* NUL byte, then this is guaranteed to succeed (modulo out-of-memory errors).
*
* The pointer returned must not be freed directly. Instead, it should be freed
* by calling rure_cstring_free.
*/
const char *rure_escape_must(const char *pattern);
/*
* rure_cstring_free frees the string given.
*
* This must be called at most once per string.
*/
void rure_cstring_free(char *s);
#ifdef __cplusplus
}
#endif
#endif

79
third_party/rust/rure/src/error.rs поставляемый
Просмотреть файл

@ -1,79 +0,0 @@
use std::ffi;
use std::ffi::CString;
use std::fmt;
use std::str;
use libc::c_char;
use regex;
#[derive(Debug)]
pub struct Error {
message: Option<CString>,
kind: ErrorKind,
}
#[derive(Debug)]
pub enum ErrorKind {
None,
Str(str::Utf8Error),
Regex(regex::Error),
Nul(ffi::NulError),
}
impl Error {
pub fn new(kind: ErrorKind) -> Error {
Error { message: None, kind: kind }
}
pub fn is_err(&self) -> bool {
match self.kind {
ErrorKind::None => false,
ErrorKind::Str(_) | ErrorKind::Regex(_) | ErrorKind::Nul(_) => {
true
}
}
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.kind {
ErrorKind::None => write!(f, "no error"),
ErrorKind::Str(ref e) => e.fmt(f),
ErrorKind::Regex(ref e) => e.fmt(f),
ErrorKind::Nul(ref e) => e.fmt(f),
}
}
}
ffi_fn! {
fn rure_error_new() -> *mut Error {
Box::into_raw(Box::new(Error::new(ErrorKind::None)))
}
}
ffi_fn! {
fn rure_error_free(err: *mut Error) {
unsafe { drop(Box::from_raw(err)); }
}
}
ffi_fn! {
fn rure_error_message(err: *mut Error) -> *const c_char {
let err = unsafe { &mut *err };
let cmsg = match CString::new(format!("{}", err)) {
Ok(msg) => msg,
Err(err) => {
// I guess this can probably happen if the regex itself has a
// NUL, and that NUL re-occurs in the context presented by the
// error message. In this case, just show as much as we can.
let nul = err.nul_position();
let msg = err.into_vec();
CString::new(msg[0..nul].to_owned()).unwrap()
}
};
let p = cmsg.as_ptr();
err.message = Some(cmsg);
p
}
}

7
third_party/rust/rure/src/lib.rs поставляемый
Просмотреть файл

@ -1,7 +0,0 @@
#[macro_use]
mod macros;
mod error;
mod rure;
pub use crate::error::*;
pub use crate::rure::*;

36
third_party/rust/rure/src/macros.rs поставляемый
Просмотреть файл

@ -1,36 +0,0 @@
macro_rules! ffi_fn {
(fn $name:ident($($arg:ident: $arg_ty:ty),*,) -> $ret:ty $body:block) => {
ffi_fn!(fn $name($($arg: $arg_ty),*) -> $ret $body);
};
(fn $name:ident($($arg:ident: $arg_ty:ty),*) -> $ret:ty $body:block) => {
#[no_mangle]
pub extern fn $name($($arg: $arg_ty),*) -> $ret {
use ::std::io::{self, Write};
use ::std::panic::{self, AssertUnwindSafe};
use ::libc::abort;
match panic::catch_unwind(AssertUnwindSafe(move || $body)) {
Ok(v) => v,
Err(err) => {
let msg = if let Some(&s) = err.downcast_ref::<&str>() {
s.to_owned()
} else if let Some(s) = err.downcast_ref::<String>() {
s.to_owned()
} else {
"UNABLE TO SHOW RESULT OF PANIC.".to_owned()
};
let _ = writeln!(
&mut io::stderr(),
"panic unwind caught, aborting: {:?}",
msg);
unsafe { abort() }
}
}
}
};
(fn $name:ident($($arg:ident: $arg_ty:ty),*,) $body:block) => {
ffi_fn!(fn $name($($arg: $arg_ty),*) -> () $body);
};
(fn $name:ident($($arg:ident: $arg_ty:ty),*) $body:block) => {
ffi_fn!(fn $name($($arg: $arg_ty),*) -> () $body);
};
}

629
third_party/rust/rure/src/rure.rs поставляемый
Просмотреть файл

@ -1,629 +0,0 @@
use std::collections::HashMap;
use std::ffi::{CStr, CString};
use std::ops::Deref;
use std::ptr;
use std::slice;
use std::str;
use libc::{c_char, size_t};
use regex::bytes;
use crate::error::{Error, ErrorKind};
const RURE_FLAG_CASEI: u32 = 1 << 0;
const RURE_FLAG_MULTI: u32 = 1 << 1;
const RURE_FLAG_DOTNL: u32 = 1 << 2;
const RURE_FLAG_SWAP_GREED: u32 = 1 << 3;
const RURE_FLAG_SPACE: u32 = 1 << 4;
const RURE_FLAG_UNICODE: u32 = 1 << 5;
const RURE_DEFAULT_FLAGS: u32 = RURE_FLAG_UNICODE;
pub struct Regex {
re: bytes::Regex,
capture_names: HashMap<String, i32>,
}
pub struct Options {
size_limit: usize,
dfa_size_limit: usize,
}
// The `RegexSet` is not exposed with option support or matching at an
// arbitrary position with a crate just yet. To circumvent this, we use
// the `Exec` structure directly.
pub struct RegexSet {
re: bytes::RegexSet,
}
#[repr(C)]
pub struct rure_match {
pub start: size_t,
pub end: size_t,
}
pub struct Captures(bytes::Locations);
pub struct Iter {
re: *const Regex,
last_end: usize,
last_match: Option<usize>,
}
pub struct IterCaptureNames {
capture_names: bytes::CaptureNames<'static>,
name_ptrs: Vec<*mut c_char>,
}
impl Deref for Regex {
type Target = bytes::Regex;
fn deref(&self) -> &bytes::Regex {
&self.re
}
}
impl Deref for RegexSet {
type Target = bytes::RegexSet;
fn deref(&self) -> &bytes::RegexSet {
&self.re
}
}
impl Default for Options {
fn default() -> Options {
Options { size_limit: 10 * (1 << 20), dfa_size_limit: 2 * (1 << 20) }
}
}
ffi_fn! {
fn rure_compile_must(pattern: *const c_char) -> *const Regex {
let len = unsafe { CStr::from_ptr(pattern).to_bytes().len() };
let pat = pattern as *const u8;
let mut err = Error::new(ErrorKind::None);
let re = rure_compile(
pat, len, RURE_DEFAULT_FLAGS, ptr::null(), &mut err);
if err.is_err() {
let _ = writeln!(&mut io::stderr(), "{}", err);
let _ = writeln!(
&mut io::stderr(), "aborting from rure_compile_must");
unsafe { abort() }
}
re
}
}
ffi_fn! {
fn rure_compile(
pattern: *const u8,
length: size_t,
flags: u32,
options: *const Options,
error: *mut Error,
) -> *const Regex {
let pat = unsafe { slice::from_raw_parts(pattern, length) };
let pat = match str::from_utf8(pat) {
Ok(pat) => pat,
Err(err) => {
unsafe {
if !error.is_null() {
*error = Error::new(ErrorKind::Str(err));
}
return ptr::null();
}
}
};
let mut builder = bytes::RegexBuilder::new(pat);
if !options.is_null() {
let options = unsafe { &*options };
builder.size_limit(options.size_limit);
builder.dfa_size_limit(options.dfa_size_limit);
}
builder.case_insensitive(flags & RURE_FLAG_CASEI > 0);
builder.multi_line(flags & RURE_FLAG_MULTI > 0);
builder.dot_matches_new_line(flags & RURE_FLAG_DOTNL > 0);
builder.swap_greed(flags & RURE_FLAG_SWAP_GREED > 0);
builder.ignore_whitespace(flags & RURE_FLAG_SPACE > 0);
builder.unicode(flags & RURE_FLAG_UNICODE > 0);
match builder.build() {
Ok(re) => {
let mut capture_names = HashMap::new();
for (i, name) in re.capture_names().enumerate() {
if let Some(name) = name {
capture_names.insert(name.to_owned(), i as i32);
}
}
let re = Regex {
re: re,
capture_names: capture_names,
};
Box::into_raw(Box::new(re))
}
Err(err) => {
unsafe {
if !error.is_null() {
*error = Error::new(ErrorKind::Regex(err));
}
ptr::null()
}
}
}
}
}
ffi_fn! {
fn rure_free(re: *const Regex) {
unsafe { drop(Box::from_raw(re as *mut Regex)); }
}
}
ffi_fn! {
fn rure_is_match(
re: *const Regex,
haystack: *const u8,
len: size_t,
start: size_t,
) -> bool {
let re = unsafe { &*re };
let haystack = unsafe { slice::from_raw_parts(haystack, len) };
re.is_match_at(haystack, start)
}
}
ffi_fn! {
fn rure_find(
re: *const Regex,
haystack: *const u8,
len: size_t,
start: size_t,
match_info: *mut rure_match,
) -> bool {
let re = unsafe { &*re };
let haystack = unsafe { slice::from_raw_parts(haystack, len) };
re.find_at(haystack, start).map(|m| unsafe {
if !match_info.is_null() {
(*match_info).start = m.start();
(*match_info).end = m.end();
}
}).is_some()
}
}
ffi_fn! {
fn rure_find_captures(
re: *const Regex,
haystack: *const u8,
len: size_t,
start: size_t,
captures: *mut Captures,
) -> bool {
let re = unsafe { &*re };
let haystack = unsafe { slice::from_raw_parts(haystack, len) };
let slots = unsafe { &mut (*captures).0 };
re.read_captures_at(slots, haystack, start).is_some()
}
}
ffi_fn! {
fn rure_shortest_match(
re: *const Regex,
haystack: *const u8,
len: size_t,
start: size_t,
end: *mut usize,
) -> bool {
let re = unsafe { &*re };
let haystack = unsafe { slice::from_raw_parts(haystack, len) };
match re.shortest_match_at(haystack, start) {
None => false,
Some(i) => {
if !end.is_null() {
unsafe {
*end = i;
}
}
true
}
}
}
}
ffi_fn! {
fn rure_capture_name_index(
re: *const Regex,
name: *const c_char,
) -> i32 {
let re = unsafe { &*re };
let name = unsafe { CStr::from_ptr(name) };
let name = match name.to_str() {
Err(_) => return -1,
Ok(name) => name,
};
re.capture_names.get(name).map(|&i|i).unwrap_or(-1)
}
}
ffi_fn! {
fn rure_iter_capture_names_new(
re: *const Regex,
) -> *mut IterCaptureNames {
let re = unsafe { &*re };
Box::into_raw(Box::new(IterCaptureNames {
capture_names: re.re.capture_names(),
name_ptrs: Vec::new(),
}))
}
}
ffi_fn! {
fn rure_iter_capture_names_free(it: *mut IterCaptureNames) {
unsafe {
let it = &mut *it;
while let Some(ptr) = it.name_ptrs.pop() {
drop(CString::from_raw(ptr));
}
drop(Box::from_raw(it));
}
}
}
ffi_fn! {
fn rure_iter_capture_names_next(
it: *mut IterCaptureNames,
capture_name: *mut *mut c_char,
) -> bool {
if capture_name.is_null() {
return false;
}
let it = unsafe { &mut *it };
let cn = match it.capture_names.next() {
// Top-level iterator ran out of capture groups
None => return false,
Some(val) => {
let name = match val {
// inner Option didn't have a name
None => "",
Some(name) => name
};
name
}
};
unsafe {
let cs = match CString::new(cn.as_bytes()) {
Result::Ok(val) => val,
Result::Err(_) => return false
};
let ptr = cs.into_raw();
it.name_ptrs.push(ptr);
*capture_name = ptr;
}
true
}
}
ffi_fn! {
fn rure_iter_new(
re: *const Regex,
) -> *mut Iter {
Box::into_raw(Box::new(Iter {
re: re,
last_end: 0,
last_match: None,
}))
}
}
ffi_fn! {
fn rure_iter_free(it: *mut Iter) {
unsafe { drop(Box::from_raw(it)); }
}
}
ffi_fn! {
fn rure_iter_next(
it: *mut Iter,
haystack: *const u8,
len: size_t,
match_info: *mut rure_match,
) -> bool {
let it = unsafe { &mut *it };
let re = unsafe { &*it.re };
let text = unsafe { slice::from_raw_parts(haystack, len) };
if it.last_end > text.len() {
return false;
}
let (s, e) = match re.find_at(text, it.last_end) {
None => return false,
Some(m) => (m.start(), m.end()),
};
if s == e {
// This is an empty match. To ensure we make progress, start
// the next search at the smallest possible starting position
// of the next match following this one.
it.last_end += 1;
// Don't accept empty matches immediately following a match.
// Just move on to the next match.
if Some(e) == it.last_match {
return rure_iter_next(it, haystack, len, match_info);
}
} else {
it.last_end = e;
}
it.last_match = Some(e);
if !match_info.is_null() {
unsafe {
(*match_info).start = s;
(*match_info).end = e;
}
}
true
}
}
ffi_fn! {
fn rure_iter_next_captures(
it: *mut Iter,
haystack: *const u8,
len: size_t,
captures: *mut Captures,
) -> bool {
let it = unsafe { &mut *it };
let re = unsafe { &*it.re };
let slots = unsafe { &mut (*captures).0 };
let text = unsafe { slice::from_raw_parts(haystack, len) };
if it.last_end > text.len() {
return false;
}
let (s, e) = match re.read_captures_at(slots, text, it.last_end) {
None => return false,
Some(m) => (m.start(), m.end()),
};
if s == e {
// This is an empty match. To ensure we make progress, start
// the next search at the smallest possible starting position
// of the next match following this one.
it.last_end += 1;
// Don't accept empty matches immediately following a match.
// Just move on to the next match.
if Some(e) == it.last_match {
return rure_iter_next_captures(it, haystack, len, captures);
}
} else {
it.last_end = e;
}
it.last_match = Some(e);
true
}
}
ffi_fn! {
fn rure_captures_new(re: *const Regex) -> *mut Captures {
let re = unsafe { &*re };
let captures = Captures(re.locations());
Box::into_raw(Box::new(captures))
}
}
ffi_fn! {
fn rure_captures_free(captures: *const Captures) {
unsafe { drop(Box::from_raw(captures as *mut Captures)); }
}
}
ffi_fn! {
fn rure_captures_at(
captures: *const Captures,
i: size_t,
match_info: *mut rure_match,
) -> bool {
let locs = unsafe { &(*captures).0 };
match locs.pos(i) {
Some((start, end)) => {
if !match_info.is_null() {
unsafe {
(*match_info).start = start;
(*match_info).end = end;
}
}
true
}
_ => false
}
}
}
ffi_fn! {
fn rure_captures_len(captures: *const Captures) -> size_t {
unsafe { (*captures).0.len() }
}
}
ffi_fn! {
fn rure_options_new() -> *mut Options {
Box::into_raw(Box::new(Options::default()))
}
}
ffi_fn! {
fn rure_options_free(options: *mut Options) {
unsafe { drop(Box::from_raw(options)); }
}
}
ffi_fn! {
fn rure_options_size_limit(options: *mut Options, limit: size_t) {
let options = unsafe { &mut *options };
options.size_limit = limit;
}
}
ffi_fn! {
fn rure_options_dfa_size_limit(options: *mut Options, limit: size_t) {
let options = unsafe { &mut *options };
options.dfa_size_limit = limit;
}
}
ffi_fn! {
fn rure_compile_set(
patterns: *const *const u8,
patterns_lengths: *const size_t,
patterns_count: size_t,
flags: u32,
options: *const Options,
error: *mut Error
) -> *const RegexSet {
let (raw_pats, raw_patsl) = unsafe {
(
slice::from_raw_parts(patterns, patterns_count),
slice::from_raw_parts(patterns_lengths, patterns_count)
)
};
let mut pats = Vec::with_capacity(patterns_count);
for (&raw_pat, &raw_patl) in raw_pats.iter().zip(raw_patsl) {
let pat = unsafe { slice::from_raw_parts(raw_pat, raw_patl) };
pats.push(match str::from_utf8(pat) {
Ok(pat) => pat,
Err(err) => {
unsafe {
if !error.is_null() {
*error = Error::new(ErrorKind::Str(err));
}
return ptr::null();
}
}
});
}
let mut builder = bytes::RegexSetBuilder::new(pats);
if !options.is_null() {
let options = unsafe { &*options };
builder.size_limit(options.size_limit);
builder.dfa_size_limit(options.dfa_size_limit);
}
builder.case_insensitive(flags & RURE_FLAG_CASEI > 0);
builder.multi_line(flags & RURE_FLAG_MULTI > 0);
builder.dot_matches_new_line(flags & RURE_FLAG_DOTNL > 0);
builder.swap_greed(flags & RURE_FLAG_SWAP_GREED > 0);
builder.ignore_whitespace(flags & RURE_FLAG_SPACE > 0);
builder.unicode(flags & RURE_FLAG_UNICODE > 0);
match builder.build() {
Ok(re) => {
Box::into_raw(Box::new(RegexSet { re: re }))
}
Err(err) => {
unsafe {
if !error.is_null() {
*error = Error::new(ErrorKind::Regex(err))
}
ptr::null()
}
}
}
}
}
ffi_fn! {
fn rure_set_free(re: *const RegexSet) {
unsafe { drop(Box::from_raw(re as *mut RegexSet)); }
}
}
ffi_fn! {
fn rure_set_is_match(
re: *const RegexSet,
haystack: *const u8,
len: size_t,
start: size_t
) -> bool {
let re = unsafe { &*re };
let haystack = unsafe { slice::from_raw_parts(haystack, len) };
re.is_match_at(haystack, start)
}
}
ffi_fn! {
fn rure_set_matches(
re: *const RegexSet,
haystack: *const u8,
len: size_t,
start: size_t,
matches: *mut bool
) -> bool {
let re = unsafe { &*re };
let mut matches = unsafe {
slice::from_raw_parts_mut(matches, re.len())
};
let haystack = unsafe { slice::from_raw_parts(haystack, len) };
// read_matches_at isn't guaranteed to set non-matches to false
for item in matches.iter_mut() {
*item = false;
}
re.read_matches_at(&mut matches, haystack, start)
}
}
ffi_fn! {
fn rure_set_len(re: *const RegexSet) -> size_t {
unsafe { (*re).len() }
}
}
ffi_fn! {
fn rure_escape_must(pattern: *const c_char) -> *const c_char {
let len = unsafe { CStr::from_ptr(pattern).to_bytes().len() };
let pat = pattern as *const u8;
let mut err = Error::new(ErrorKind::None);
let esc = rure_escape(pat, len, &mut err);
if err.is_err() {
let _ = writeln!(&mut io::stderr(), "{}", err);
let _ = writeln!(
&mut io::stderr(), "aborting from rure_escape_must");
unsafe { abort() }
}
esc
}
}
/// A helper function that implements fallible escaping in a way that returns
/// an error if escaping failed.
///
/// This should ideally be exposed, but it needs API design work. In
/// particular, this should not return a C string, but a `const uint8_t *`
/// instead, since it may contain a NUL byte.
fn rure_escape(
pattern: *const u8,
length: size_t,
error: *mut Error,
) -> *const c_char {
let pat: &[u8] = unsafe { slice::from_raw_parts(pattern, length) };
let str_pat = match str::from_utf8(pat) {
Ok(val) => val,
Err(err) => unsafe {
if !error.is_null() {
*error = Error::new(ErrorKind::Str(err));
}
return ptr::null();
},
};
let esc_pat = regex::escape(str_pat);
let c_esc_pat = match CString::new(esc_pat) {
Ok(val) => val,
Err(err) => unsafe {
if !error.is_null() {
*error = Error::new(ErrorKind::Nul(err));
}
return ptr::null();
},
};
c_esc_pat.into_raw() as *const c_char
}
ffi_fn! {
fn rure_cstring_free(s: *mut c_char) {
unsafe { drop(CString::from_raw(s)); }
}
}

7
third_party/rust/rure/test поставляемый
Просмотреть файл

@ -1,7 +0,0 @@
#!/bin/sh
set -e
cargo build --verbose
(cd ctest && ./compile && LD_LIBRARY_PATH=../../target/debug ./test)
(cd examples && ./compile && LD_LIBRARY_PATH=../../target/debug ./iter)

Просмотреть файл

@ -24,7 +24,6 @@
#include "mozIExtensionProcessScript.h"
#include "nsEscape.h"
#include "nsGkAtoms.h"
#include "nsHashKeys.h"
#include "nsIChannel.h"
#include "nsIContentPolicy.h"
#include "mozilla/dom/Document.h"
@ -63,13 +62,6 @@ static const char kDocElementInserted[] = "initial-document-element-inserted";
* ExtensionPolicyService
*****************************************************************************/
using CoreByHostMap = nsTHashMap<nsCStringASCIICaseInsensitiveHashKey,
RefPtr<extensions::WebExtensionPolicyCore>>;
static StaticMutex sCoreByHostMutex;
static StaticAutoPtr<CoreByHostMap> sCoreByHost
MOZ_GUARDED_BY(sCoreByHostMutex);
/* static */
mozIExtensionProcessScript& ExtensionPolicyService::ProcessScript() {
static nsCOMPtr<mozIExtensionProcessScript> sProcessScript;
@ -86,8 +78,6 @@ mozIExtensionProcessScript& ExtensionPolicyService::ProcessScript() {
}
/* static */ ExtensionPolicyService& ExtensionPolicyService::GetSingleton() {
MOZ_ASSERT(NS_IsMainThread());
static RefPtr<ExtensionPolicyService> sExtensionPolicyService;
if (MOZ_UNLIKELY(!sExtensionPolicyService)) {
@ -98,13 +88,6 @@ mozIExtensionProcessScript& ExtensionPolicyService::ProcessScript() {
return *sExtensionPolicyService.get();
}
/* static */
RefPtr<extensions::WebExtensionPolicyCore>
ExtensionPolicyService::GetCoreByHost(const nsACString& aHost) {
StaticMutexAutoLock lock(sCoreByHostMutex);
return sCoreByHost ? sCoreByHost->Get(aHost) : nullptr;
}
ExtensionPolicyService::ExtensionPolicyService() {
mObs = services::GetObserverService();
MOZ_RELEASE_ASSERT(mObs);
@ -113,22 +96,10 @@ ExtensionPolicyService::ExtensionPolicyService() {
mDefaultCSPV3.SetIsVoid(true);
RegisterObservers();
{
StaticMutexAutoLock lock(sCoreByHostMutex);
MOZ_DIAGNOSTIC_ASSERT(!sCoreByHost,
"ExtensionPolicyService created twice?");
sCoreByHost = new CoreByHostMap();
}
}
ExtensionPolicyService::~ExtensionPolicyService() {
UnregisterWeakMemoryReporter(this);
{
StaticMutexAutoLock lock(sCoreByHostMutex);
sCoreByHost = nullptr;
}
}
bool ExtensionPolicyService::UseRemoteExtensions() const {
@ -173,11 +144,6 @@ bool ExtensionPolicyService::RegisterExtension(WebExtensionPolicy& aPolicy) {
mExtensions.InsertOrUpdate(aPolicy.Id(), RefPtr{&aPolicy});
mExtensionHosts.InsertOrUpdate(aPolicy.MozExtensionHostname(),
RefPtr{&aPolicy});
{
StaticMutexAutoLock lock(sCoreByHostMutex);
sCoreByHost->InsertOrUpdate(aPolicy.MozExtensionHostname(), aPolicy.Core());
}
return true;
}
@ -192,11 +158,6 @@ bool ExtensionPolicyService::UnregisterExtension(WebExtensionPolicy& aPolicy) {
mExtensions.Remove(aPolicy.Id());
mExtensionHosts.Remove(aPolicy.MozExtensionHostname());
{
StaticMutexAutoLock lock(sCoreByHostMutex);
sCoreByHost->Remove(aPolicy.MozExtensionHostname());
}
return true;
}

Просмотреть файл

@ -60,12 +60,6 @@ class ExtensionPolicyService final : public nsIAddonPolicyService,
return do_AddRef(&GetSingleton());
}
// Unlike the other methods on the ExtensionPolicyService, this method is
// threadsafe, and can look up a WebExtensionPolicyCore by hostname on any
// thread.
static RefPtr<extensions::WebExtensionPolicyCore> GetCoreByHost(
const nsACString& aHost);
WebExtensionPolicy* GetByID(const nsAtom* aAddonId) {
return mExtensions.GetWeak(aAddonId);
}

Просмотреть файл

@ -12,7 +12,6 @@
#include "jspubtd.h"
#include "js/RootingAPI.h"
#include "mozilla/RustRegex.h"
#include "nsCOMPtr.h"
#include "nsCycleCollectionParticipant.h"
#include "nsISupports.h"
@ -23,75 +22,57 @@ namespace extensions {
class MatchPattern;
class MatchGlobCore final {
public:
NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MatchGlobCore)
MatchGlobCore(const nsACString& aGlob, bool aAllowQuestion, ErrorResult& aRv);
bool Matches(const nsACString& aString) const;
bool IsWildcard() const { return mIsPrefix && mPathLiteral.IsEmpty(); }
void GetGlob(nsACString& aGlob) const { aGlob = mGlob; }
private:
~MatchGlobCore() = default;
// The original glob string that this glob object represents.
const nsCString mGlob;
// The literal path string to match against. If this contains a non-void
// value, the glob matches against this exact literal string, rather than
// performng a pattern match. If mIsPrefix is true, the literal must appear
// at the start of the matched string. If it is false, the the literal must
// be exactly equal to the matched string.
nsCString mPathLiteral;
bool mIsPrefix = false;
// The regular expression object which is equivalent to this glob pattern.
// Used for matching if, and only if, mPathLiteral is non-void.
RustRegex mRegExp;
};
class MatchGlob final : public nsISupports, public nsWrapperCache {
public:
NS_DECL_CYCLE_COLLECTING_ISUPPORTS
NS_DECL_CYCLE_COLLECTION_WRAPPERCACHE_CLASS(MatchGlob)
NS_DECL_CYCLE_COLLECTION_SCRIPT_HOLDER_CLASS(MatchGlob)
static already_AddRefed<MatchGlob> Constructor(dom::GlobalObject& aGlobal,
const nsACString& aGlob,
const nsAString& aGlob,
bool aAllowQuestion,
ErrorResult& aRv);
explicit MatchGlob(nsISupports* aParent,
already_AddRefed<MatchGlobCore> aCore)
: mParent(aParent), mCore(std::move(aCore)) {}
bool Matches(const nsAString& aString) const;
bool Matches(const nsACString& aString) const {
return Core()->Matches(aString);
}
bool IsWildcard() const { return mIsPrefix && mPathLiteral.IsEmpty(); }
bool IsWildcard() const { return Core()->IsWildcard(); }
void GetGlob(nsACString& aGlob) const { Core()->GetGlob(aGlob); }
MatchGlobCore* Core() const { return mCore; }
void GetGlob(nsAString& aGlob) const { aGlob = mGlob; }
nsISupports* GetParentObject() const { return mParent; }
virtual JSObject* WrapObject(JSContext* aCx,
JS::Handle<JSObject*> aGivenProto) override;
protected:
virtual ~MatchGlob();
private:
~MatchGlob() = default;
friend class MatchPattern;
explicit MatchGlob(nsISupports* aParent) : mParent(aParent) {}
void Init(JSContext* aCx, const nsAString& aGlob, bool aAllowQuestion,
ErrorResult& aRv);
nsCOMPtr<nsISupports> mParent;
RefPtr<MatchGlobCore> mCore;
// The original glob string that this glob object represents.
nsString mGlob;
// The literal path string to match against. If this contains a non-void
// value, the glob matches against this exact literal string, rather than
// performng a pattern match. If mIsPrefix is true, the literal must appear
// at the start of the matched string. If it is false, the the literal must
// be exactly equal to the matched string.
nsString mPathLiteral;
bool mIsPrefix = false;
// The regular expression object which is equivalent to this glob pattern.
// Used for matching if, and only if, mPathLiteral is non-void.
JS::Heap<JSObject*> mRegExp;
};
class MatchGlobSet final : public CopyableTArray<RefPtr<MatchGlobCore>> {
class MatchGlobSet final : public CopyableTArray<RefPtr<MatchGlob>> {
public:
// Note: We can't use the nsTArray constructors directly, since the static
// analyzer doesn't handle their MOZ_IMPLICIT annotations correctly.
@ -100,10 +81,10 @@ class MatchGlobSet final : public CopyableTArray<RefPtr<MatchGlobCore>> {
explicit MatchGlobSet(const nsTArray& aOther) : CopyableTArray(aOther) {}
MOZ_IMPLICIT MatchGlobSet(nsTArray&& aOther)
: CopyableTArray(std::move(aOther)) {}
MOZ_IMPLICIT MatchGlobSet(std::initializer_list<RefPtr<MatchGlobCore>> aIL)
MOZ_IMPLICIT MatchGlobSet(std::initializer_list<RefPtr<MatchGlob>> aIL)
: CopyableTArray(aIL) {}
bool Matches(const nsACString& aValue) const;
bool Matches(const nsAString& aValue) const;
};
} // namespace extensions

Просмотреть файл

@ -26,34 +26,47 @@ using namespace mozilla::dom;
* AtomSet
*****************************************************************************/
template <typename Range, typename AsAtom>
static AtomSet::ArrayType AtomSetFromRange(Range&& aRange,
AsAtom&& aTransform) {
AtomSet::ArrayType atoms;
atoms.SetCapacity(RangeSize(aRange));
std::transform(aRange.begin(), aRange.end(), MakeBackInserter(atoms),
std::forward<AsAtom>(aTransform));
AtomSet::AtomSet(const nsTArray<nsString>& aElems) {
mElems.SetCapacity(aElems.Length());
atoms.Sort();
for (const auto& elem : aElems) {
mElems.AppendElement(NS_AtomizeMainThread(elem));
}
SortAndUniquify();
}
AtomSet::AtomSet(const char** aElems) {
for (const char** elemp = aElems; *elemp; elemp++) {
mElems.AppendElement(NS_Atomize(*elemp));
}
SortAndUniquify();
}
AtomSet::AtomSet(std::initializer_list<nsAtom*> aIL) {
mElems.SetCapacity(aIL.size());
for (const auto& elem : aIL) {
mElems.AppendElement(elem);
}
SortAndUniquify();
}
void AtomSet::SortAndUniquify() {
mElems.Sort();
nsAtom* prev = nullptr;
atoms.RemoveElementsBy([&prev](const RefPtr<nsAtom>& aAtom) {
mElems.RemoveElementsBy([&prev](const RefPtr<nsAtom>& aAtom) {
bool remove = aAtom == prev;
prev = aAtom;
return remove;
});
atoms.Compact();
return atoms;
mElems.Compact();
}
AtomSet::AtomSet(const nsTArray<nsString>& aElems)
: mElems(AtomSetFromRange(
aElems, [](const nsString& elem) { return NS_Atomize(elem); })) {}
AtomSet::AtomSet(std::initializer_list<nsAtom*> aIL)
: mElems(AtomSetFromRange(aIL, [](nsAtom* elem) { return elem; })) {}
bool AtomSet::Intersects(const AtomSet& aOther) const {
for (const auto& atom : *this) {
if (aOther.Contains(atom)) {
@ -68,6 +81,20 @@ bool AtomSet::Intersects(const AtomSet& aOther) const {
return false;
}
void AtomSet::Add(nsAtom* aAtom) {
auto index = mElems.IndexOfFirstElementGt(aAtom);
if (index == 0 || mElems[index - 1] != aAtom) {
mElems.InsertElementAt(index, aAtom);
}
}
void AtomSet::Remove(nsAtom* aAtom) {
auto index = mElems.BinaryIndexOf(aAtom);
if (index != ArrayType::NoIndex) {
mElems.RemoveElementAt(index);
}
}
/*****************************************************************************
* URLInfo
*****************************************************************************/
@ -96,20 +123,24 @@ const nsAtom* URLInfo::HostAtom() const {
return mHostAtom;
}
const nsCString& URLInfo::FilePath() const {
const nsString& URLInfo::FilePath() const {
if (mFilePath.IsEmpty()) {
nsCString path;
nsCOMPtr<nsIURL> url = do_QueryInterface(mURI);
if (!url || NS_FAILED(url->GetFilePath(mFilePath))) {
if (url && NS_SUCCEEDED(url->GetFilePath(path))) {
AppendUTF8toUTF16(path, mFilePath);
} else {
mFilePath = Path();
}
}
return mFilePath;
}
const nsCString& URLInfo::Path() const {
const nsString& URLInfo::Path() const {
if (mPath.IsEmpty()) {
if (NS_FAILED(URINoRef()->GetPathQueryRef(mPath))) {
mPath.Truncate();
nsCString path;
if (NS_SUCCEEDED(URINoRef()->GetPathQueryRef(path))) {
AppendUTF8toUTF16(path, mPath);
}
}
return mPath;
@ -191,41 +222,41 @@ const nsCString& CookieInfo::RawHost() const {
}
/*****************************************************************************
* MatchPatternCore
* MatchPattern
*****************************************************************************/
#define DEFINE_STATIC_ATOM_SET(name, ...) \
static already_AddRefed<AtomSet> name() { \
MOZ_ASSERT(NS_IsMainThread()); \
static StaticRefPtr<AtomSet> sAtomSet; \
RefPtr<AtomSet> atomSet = sAtomSet; \
if (!atomSet) { \
atomSet = sAtomSet = new AtomSet{__VA_ARGS__}; \
ClearOnShutdown(&sAtomSet); \
} \
return atomSet.forget(); \
}
DEFINE_STATIC_ATOM_SET(PermittedSchemes, nsGkAtoms::http, nsGkAtoms::https,
nsGkAtoms::ws, nsGkAtoms::wss, nsGkAtoms::file,
nsGkAtoms::ftp, nsGkAtoms::data);
const char* PERMITTED_SCHEMES[] = {"http", "https", "ws", "wss",
"file", "ftp", "data", nullptr};
// Known schemes that are followed by "://" instead of ":".
DEFINE_STATIC_ATOM_SET(HostLocatorSchemes, nsGkAtoms::http, nsGkAtoms::https,
nsGkAtoms::ws, nsGkAtoms::wss, nsGkAtoms::file,
nsGkAtoms::ftp, nsGkAtoms::moz_extension,
nsGkAtoms::chrome, nsGkAtoms::resource, nsGkAtoms::moz,
nsGkAtoms::moz_icon, nsGkAtoms::moz_gio);
const char* HOST_LOCATOR_SCHEMES[] = {
"http", "https", "ws", "wss", "file", "ftp", "moz-extension",
"chrome", "resource", "moz", "moz-icon", "moz-gio", nullptr};
DEFINE_STATIC_ATOM_SET(WildcardSchemes, nsGkAtoms::http, nsGkAtoms::https,
nsGkAtoms::ws, nsGkAtoms::wss);
const char* WILDCARD_SCHEMES[] = {"http", "https", "ws", "wss", nullptr};
#undef DEFINE_STATIC_ATOM_SET
/* static */
already_AddRefed<MatchPattern> MatchPattern::Constructor(
dom::GlobalObject& aGlobal, const nsAString& aPattern,
const MatchPatternOptions& aOptions, ErrorResult& aRv) {
RefPtr<MatchPattern> pattern = new MatchPattern(aGlobal.GetAsSupports());
pattern->Init(aGlobal.Context(), aPattern, aOptions.mIgnorePath,
aOptions.mRestrictSchemes, aRv);
if (aRv.Failed()) {
return nullptr;
}
return pattern.forget();
}
MatchPatternCore::MatchPatternCore(const nsAString& aPattern, bool aIgnorePath,
bool aRestrictSchemes, ErrorResult& aRv) {
MOZ_ASSERT(NS_IsMainThread());
RefPtr<AtomSet> permittedSchemes = PermittedSchemes();
void MatchPattern::Init(JSContext* aCx, const nsAString& aPattern,
bool aIgnorePath, bool aRestrictSchemes,
ErrorResult& aRv) {
RefPtr<AtomSet> permittedSchemes;
nsresult rv = AtomSet::Get<PERMITTED_SCHEMES>(permittedSchemes);
if (NS_FAILED(rv)) {
aRv.Throw(rv);
return;
}
mPattern = aPattern;
@ -251,10 +282,19 @@ MatchPatternCore::MatchPatternCore(const nsAString& aPattern, bool aIgnorePath,
RefPtr<nsAtom> scheme = NS_AtomizeMainThread(StringHead(aPattern, index));
bool requireHostLocatorScheme = true;
if (scheme == nsGkAtoms::_asterisk) {
mSchemes = WildcardSchemes();
rv = AtomSet::Get<WILDCARD_SCHEMES>(mSchemes);
if (NS_FAILED(rv)) {
aRv.Throw(rv);
return;
}
} else if (!aRestrictSchemes || permittedSchemes->Contains(scheme) ||
scheme == nsGkAtoms::moz_extension) {
RefPtr<AtomSet> hostLocatorSchemes = HostLocatorSchemes();
RefPtr<AtomSet> hostLocatorSchemes;
rv = AtomSet::Get<HOST_LOCATOR_SCHEMES>(hostLocatorSchemes);
if (NS_FAILED(rv)) {
aRv.Throw(rv);
return;
}
mSchemes = new AtomSet({scheme});
requireHostLocatorScheme = hostLocatorSchemes->Contains(scheme);
} else {
@ -319,16 +359,17 @@ MatchPatternCore::MatchPatternCore(const nsAString& aPattern, bool aIgnorePath,
return;
}
NS_ConvertUTF16toUTF8 path(tail);
auto path = tail;
if (path.IsEmpty()) {
aRv.Throw(NS_ERROR_INVALID_ARG);
return;
}
mPath = new MatchGlobCore(path, false, aRv);
mPath = new MatchGlob(this);
mPath->Init(aCx, path, false, aRv);
}
bool MatchPatternCore::MatchesDomain(const nsACString& aDomain) const {
bool MatchPattern::MatchesDomain(const nsACString& aDomain) const {
if (DomainIsWildcard() || mDomain == aDomain) {
return true;
}
@ -344,8 +385,8 @@ bool MatchPatternCore::MatchesDomain(const nsACString& aDomain) const {
return false;
}
bool MatchPatternCore::Matches(const nsAString& aURL, bool aExplicit,
ErrorResult& aRv) const {
bool MatchPattern::Matches(const nsAString& aURL, bool aExplicit,
ErrorResult& aRv) const {
nsCOMPtr<nsIURI> uri;
nsresult rv = NS_NewURI(getter_AddRefs(uri), aURL);
if (NS_FAILED(rv)) {
@ -356,7 +397,7 @@ bool MatchPatternCore::Matches(const nsAString& aURL, bool aExplicit,
return Matches(uri.get(), aExplicit);
}
bool MatchPatternCore::Matches(const URLInfo& aURL, bool aExplicit) const {
bool MatchPattern::Matches(const URLInfo& aURL, bool aExplicit) const {
if (aExplicit && mMatchSubdomain) {
return false;
}
@ -376,7 +417,7 @@ bool MatchPatternCore::Matches(const URLInfo& aURL, bool aExplicit) const {
return true;
}
bool MatchPatternCore::MatchesCookie(const CookieInfo& aCookie) const {
bool MatchPattern::MatchesCookie(const CookieInfo& aCookie) const {
if (!mSchemes->Contains(nsGkAtoms::https) &&
(aCookie.IsSecure() || !mSchemes->Contains(nsGkAtoms::http))) {
return false;
@ -404,7 +445,7 @@ bool MatchPatternCore::MatchesCookie(const CookieInfo& aCookie) const {
return StringTail(mDomain, host.Length()) == host;
}
bool MatchPatternCore::SubsumesDomain(const MatchPatternCore& aPattern) const {
bool MatchPattern::SubsumesDomain(const MatchPattern& aPattern) const {
if (!mMatchSubdomain && aPattern.mMatchSubdomain &&
aPattern.mDomain == mDomain) {
return false;
@ -413,7 +454,7 @@ bool MatchPatternCore::SubsumesDomain(const MatchPatternCore& aPattern) const {
return MatchesDomain(aPattern.mDomain);
}
bool MatchPatternCore::Subsumes(const MatchPatternCore& aPattern) const {
bool MatchPattern::Subsumes(const MatchPattern& aPattern) const {
for (auto& scheme : *aPattern.mSchemes) {
if (!mSchemes->Contains(scheme)) {
return false;
@ -423,7 +464,7 @@ bool MatchPatternCore::Subsumes(const MatchPatternCore& aPattern) const {
return SubsumesDomain(aPattern);
}
bool MatchPatternCore::Overlaps(const MatchPatternCore& aPattern) const {
bool MatchPattern::Overlaps(const MatchPattern& aPattern) const {
if (!mSchemes->Intersects(*aPattern.mSchemes)) {
return false;
}
@ -431,24 +472,6 @@ bool MatchPatternCore::Overlaps(const MatchPatternCore& aPattern) const {
return SubsumesDomain(aPattern) || aPattern.SubsumesDomain(*this);
}
/*****************************************************************************
* MatchPattern
*****************************************************************************/
/* static */
already_AddRefed<MatchPattern> MatchPattern::Constructor(
dom::GlobalObject& aGlobal, const nsAString& aPattern,
const MatchPatternOptions& aOptions, ErrorResult& aRv) {
RefPtr<MatchPattern> pattern = new MatchPattern(
aGlobal.GetAsSupports(),
MakeAndAddRef<MatchPatternCore>(aPattern, aOptions.mIgnorePath,
aOptions.mRestrictSchemes, aRv));
if (aRv.Failed()) {
return nullptr;
}
return pattern.forget();
}
JSObject* MatchPattern::WrapObject(JSContext* aCx,
JS::Handle<JSObject*> aGivenProto) {
return MatchPattern_Binding::Wrap(aCx, this, aGivenProto);
@ -456,11 +479,16 @@ JSObject* MatchPattern::WrapObject(JSContext* aCx,
/* static */
bool MatchPattern::MatchesAllURLs(const URLInfo& aURL) {
RefPtr<AtomSet> permittedSchemes = PermittedSchemes();
RefPtr<AtomSet> permittedSchemes;
nsresult rv = AtomSet::Get<PERMITTED_SCHEMES>(permittedSchemes);
if (NS_FAILED(rv)) {
NS_WARNING("Failed to retrireve PERMITTED_SCHEMES AtomSet");
return false;
}
return permittedSchemes->Contains(aURL.Scheme());
}
NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(MatchPattern, mParent)
NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(MatchPattern, mPath, mParent)
NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(MatchPattern)
NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
@ -470,84 +498,6 @@ NS_INTERFACE_MAP_END
NS_IMPL_CYCLE_COLLECTING_ADDREF(MatchPattern)
NS_IMPL_CYCLE_COLLECTING_RELEASE(MatchPattern)
bool MatchPatternSetCore::Matches(const nsAString& aURL, bool aExplicit,
ErrorResult& aRv) const {
nsCOMPtr<nsIURI> uri;
nsresult rv = NS_NewURI(getter_AddRefs(uri), aURL);
if (NS_FAILED(rv)) {
aRv.Throw(rv);
return false;
}
return Matches(uri.get(), aExplicit);
}
bool MatchPatternSetCore::Matches(const URLInfo& aURL, bool aExplicit) const {
for (const auto& pattern : mPatterns) {
if (pattern->Matches(aURL, aExplicit)) {
return true;
}
}
return false;
}
bool MatchPatternSetCore::MatchesCookie(const CookieInfo& aCookie) const {
for (const auto& pattern : mPatterns) {
if (pattern->MatchesCookie(aCookie)) {
return true;
}
}
return false;
}
bool MatchPatternSetCore::Subsumes(const MatchPatternCore& aPattern) const {
for (const auto& pattern : mPatterns) {
if (pattern->Subsumes(aPattern)) {
return true;
}
}
return false;
}
bool MatchPatternSetCore::SubsumesDomain(
const MatchPatternCore& aPattern) const {
for (const auto& pattern : mPatterns) {
if (pattern->SubsumesDomain(aPattern)) {
return true;
}
}
return false;
}
bool MatchPatternSetCore::Overlaps(
const MatchPatternSetCore& aPatternSet) const {
for (const auto& pattern : aPatternSet.mPatterns) {
if (Overlaps(*pattern)) {
return true;
}
}
return false;
}
bool MatchPatternSetCore::Overlaps(const MatchPatternCore& aPattern) const {
for (const auto& pattern : mPatterns) {
if (pattern->Overlaps(aPattern)) {
return true;
}
}
return false;
}
bool MatchPatternSetCore::OverlapsAll(
const MatchPatternSetCore& aPatternSet) const {
for (const auto& pattern : aPatternSet.mPatterns) {
if (!Overlaps(*pattern)) {
return false;
}
}
return aPatternSet.mPatterns.Length() > 0;
}
/*****************************************************************************
* MatchPatternSet
*****************************************************************************/
@ -557,37 +507,100 @@ already_AddRefed<MatchPatternSet> MatchPatternSet::Constructor(
dom::GlobalObject& aGlobal,
const nsTArray<dom::OwningStringOrMatchPattern>& aPatterns,
const MatchPatternOptions& aOptions, ErrorResult& aRv) {
MatchPatternSetCore::ArrayType patterns;
ArrayType patterns;
for (auto& elem : aPatterns) {
if (elem.IsMatchPattern()) {
patterns.AppendElement(elem.GetAsMatchPattern()->Core());
patterns.AppendElement(elem.GetAsMatchPattern());
} else {
RefPtr<MatchPatternCore> pattern =
new MatchPatternCore(elem.GetAsString(), aOptions.mIgnorePath,
aOptions.mRestrictSchemes, aRv);
RefPtr<MatchPattern> pattern =
MatchPattern::Constructor(aGlobal, elem.GetAsString(), aOptions, aRv);
if (aRv.Failed()) {
if (!pattern) {
return nullptr;
}
patterns.AppendElement(std::move(pattern));
}
}
RefPtr<MatchPatternSet> patternSet = new MatchPatternSet(
aGlobal.GetAsSupports(),
do_AddRef(new MatchPatternSetCore(std::move(patterns))));
RefPtr<MatchPatternSet> patternSet =
new MatchPatternSet(aGlobal.GetAsSupports(), std::move(patterns));
return patternSet.forget();
}
void MatchPatternSet::GetPatterns(ArrayType& aPatterns) {
if (!mPatternsCache) {
mPatternsCache.emplace(Core()->mPatterns.Length());
for (auto& elem : Core()->mPatterns) {
mPatternsCache->AppendElement(new MatchPattern(this, do_AddRef(elem)));
bool MatchPatternSet::Matches(const nsAString& aURL, bool aExplicit,
ErrorResult& aRv) const {
nsCOMPtr<nsIURI> uri;
nsresult rv = NS_NewURI(getter_AddRefs(uri), aURL);
if (NS_FAILED(rv)) {
aRv.Throw(rv);
return false;
}
return Matches(uri.get(), aExplicit);
}
bool MatchPatternSet::Matches(const URLInfo& aURL, bool aExplicit) const {
for (const auto& pattern : mPatterns) {
if (pattern->Matches(aURL, aExplicit)) {
return true;
}
}
aPatterns.AppendElements(*mPatternsCache);
return false;
}
bool MatchPatternSet::MatchesCookie(const CookieInfo& aCookie) const {
for (const auto& pattern : mPatterns) {
if (pattern->MatchesCookie(aCookie)) {
return true;
}
}
return false;
}
bool MatchPatternSet::Subsumes(const MatchPattern& aPattern) const {
for (const auto& pattern : mPatterns) {
if (pattern->Subsumes(aPattern)) {
return true;
}
}
return false;
}
bool MatchPatternSet::SubsumesDomain(const MatchPattern& aPattern) const {
for (const auto& pattern : mPatterns) {
if (pattern->SubsumesDomain(aPattern)) {
return true;
}
}
return false;
}
bool MatchPatternSet::Overlaps(const MatchPatternSet& aPatternSet) const {
for (const auto& pattern : aPatternSet.mPatterns) {
if (Overlaps(*pattern)) {
return true;
}
}
return false;
}
bool MatchPatternSet::Overlaps(const MatchPattern& aPattern) const {
for (const auto& pattern : mPatterns) {
if (pattern->Overlaps(aPattern)) {
return true;
}
}
return false;
}
bool MatchPatternSet::OverlapsAll(const MatchPatternSet& aPatternSet) const {
for (const auto& pattern : aPatternSet.mPatterns) {
if (!Overlaps(*pattern)) {
return false;
}
}
return aPatternSet.mPatterns.Length() > 0;
}
JSObject* MatchPatternSet::WrapObject(JSContext* aCx,
@ -595,7 +608,7 @@ JSObject* MatchPatternSet::WrapObject(JSContext* aCx,
return MatchPatternSet_Binding::Wrap(aCx, this, aGivenProto);
}
NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(MatchPatternSet, mPatternsCache, mParent)
NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(MatchPatternSet, mPatterns, mParent)
NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(MatchPatternSet)
NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
@ -606,14 +619,30 @@ NS_IMPL_CYCLE_COLLECTING_ADDREF(MatchPatternSet)
NS_IMPL_CYCLE_COLLECTING_RELEASE(MatchPatternSet)
/*****************************************************************************
* MatchGlobCore
* MatchGlob
*****************************************************************************/
MatchGlobCore::MatchGlobCore(const nsACString& aGlob, bool aAllowQuestion,
ErrorResult& aRv)
: mGlob(aGlob) {
MatchGlob::~MatchGlob() { mozilla::DropJSObjects(this); }
/* static */
already_AddRefed<MatchGlob> MatchGlob::Constructor(dom::GlobalObject& aGlobal,
const nsAString& aGlob,
bool aAllowQuestion,
ErrorResult& aRv) {
RefPtr<MatchGlob> glob = new MatchGlob(aGlobal.GetAsSupports());
glob->Init(aGlobal.Context(), aGlob, aAllowQuestion, aRv);
if (aRv.Failed()) {
return nullptr;
}
return glob.forget();
}
void MatchGlob::Init(JSContext* aCx, const nsAString& aGlob,
bool aAllowQuestion, ErrorResult& aRv) {
mGlob = aGlob;
// Check for a literal match with no glob metacharacters.
auto index = mGlob.FindCharInSet(aAllowQuestion ? "*?" : "*");
auto index = mGlob.FindCharInSet(aAllowQuestion ? u"*?" : u"*");
if (index < 0) {
mPathLiteral = mGlob;
return;
@ -630,7 +659,7 @@ MatchGlobCore::MatchGlobCore(const nsACString& aGlob, bool aAllowQuestion,
// Fall back to the regexp slow path.
constexpr auto metaChars = ".+*?^${}()|[]\\"_ns;
nsAutoCString escaped;
nsAutoString escaped;
escaped.Append('^');
// For any continuous string of * (and ? if aAllowQuestion) wildcards, only
@ -659,15 +688,37 @@ MatchGlobCore::MatchGlobCore(const nsACString& aGlob, bool aAllowQuestion,
escaped.Append('$');
mRegExp = RustRegex(escaped);
if (!mRegExp) {
aRv.ThrowTypeError("failed to compile regex for glob");
// TODO: Switch to the Rust regexp crate, when Rust integration is easier.
// It uses a much more efficient, linear time matching algorithm, and
// doesn't require special casing for the literal and prefix cases.
mRegExp = JS::NewUCRegExpObject(aCx, escaped.get(), escaped.Length(), 0);
if (mRegExp) {
mozilla::HoldJSObjects(this);
} else {
aRv.NoteJSContextException(aCx);
}
}
bool MatchGlobCore::Matches(const nsACString& aString) const {
bool MatchGlob::Matches(const nsAString& aString) const {
if (mRegExp) {
return mRegExp.IsMatch(aString);
AutoJSAPI jsapi;
jsapi.Init();
JSContext* cx = jsapi.cx();
JSAutoRealm ar(cx, mRegExp);
JS::Rooted<JSObject*> regexp(cx, mRegExp);
JS::Rooted<JS::Value> result(cx);
nsString input(aString);
size_t index = 0;
if (!JS::ExecuteRegExpNoStatics(cx, regexp, input.BeginWriting(),
aString.Length(), &index, true, &result)) {
return false;
}
return result.isBoolean() && result.toBoolean();
}
if (mIsPrefix) {
@ -677,30 +728,27 @@ bool MatchGlobCore::Matches(const nsACString& aString) const {
return mPathLiteral == aString;
}
/*****************************************************************************
* MatchGlob
*****************************************************************************/
/* static */
already_AddRefed<MatchGlob> MatchGlob::Constructor(dom::GlobalObject& aGlobal,
const nsACString& aGlob,
bool aAllowQuestion,
ErrorResult& aRv) {
RefPtr<MatchGlob> glob =
new MatchGlob(aGlobal.GetAsSupports(),
MakeAndAddRef<MatchGlobCore>(aGlob, aAllowQuestion, aRv));
if (aRv.Failed()) {
return nullptr;
}
return glob.forget();
}
JSObject* MatchGlob::WrapObject(JSContext* aCx,
JS::Handle<JSObject*> aGivenProto) {
return MatchGlob_Binding::Wrap(aCx, this, aGivenProto);
}
NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(MatchGlob, mParent)
NS_IMPL_CYCLE_COLLECTION_CLASS(MatchGlob)
NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(MatchGlob)
NS_IMPL_CYCLE_COLLECTION_UNLINK_PRESERVED_WRAPPER
NS_IMPL_CYCLE_COLLECTION_UNLINK(mParent)
tmp->mRegExp = nullptr;
NS_IMPL_CYCLE_COLLECTION_UNLINK_END
NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(MatchGlob)
NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mParent)
NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
NS_IMPL_CYCLE_COLLECTION_TRACE_BEGIN(MatchGlob)
NS_IMPL_CYCLE_COLLECTION_TRACE_PRESERVED_WRAPPER
NS_IMPL_CYCLE_COLLECTION_TRACE_JS_MEMBER_CALLBACK(mRegExp)
NS_IMPL_CYCLE_COLLECTION_TRACE_END
NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(MatchGlob)
NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
@ -714,7 +762,7 @@ NS_IMPL_CYCLE_COLLECTING_RELEASE(MatchGlob)
* MatchGlobSet
*****************************************************************************/
bool MatchGlobSet::Matches(const nsACString& aValue) const {
bool MatchGlobSet::Matches(const nsAString& aValue) const {
for (auto& glob : *this) {
if (glob->Matches(aValue)) {
return true;

Просмотреть файл

@ -32,20 +32,22 @@ namespace extensions {
using dom::MatchPatternOptions;
// A sorted, immutable, binary-search-backed set of atoms, optimized for
// frequent lookups.
class AtomSet final {
public:
// A sorted, binary-search-backed set of atoms, optimized for frequent lookups
// and infrequent updates.
class AtomSet final : public RefCounted<AtomSet> {
using ArrayType = AutoTArray<RefPtr<nsAtom>, 1>;
NS_INLINE_DECL_THREADSAFE_REFCOUNTING(AtomSet)
public:
MOZ_DECLARE_REFCOUNTED_TYPENAME(AtomSet)
explicit AtomSet(const nsTArray<nsString>& aElems);
explicit AtomSet(const char** aElems);
MOZ_IMPLICIT AtomSet(std::initializer_list<nsAtom*> aIL);
bool Contains(const nsAString& elem) const {
RefPtr<nsAtom> atom = NS_Atomize(elem);
RefPtr<nsAtom> atom = NS_AtomizeMainThread(elem);
return Contains(atom);
}
@ -60,6 +62,45 @@ class AtomSet final {
bool Intersects(const AtomSet& aOther) const;
void Add(nsAtom* aElem);
void Remove(nsAtom* aElem);
void Add(const nsAString& aElem) {
RefPtr<nsAtom> atom = NS_AtomizeMainThread(aElem);
return Add(atom);
}
void Remove(const nsAString& aElem) {
RefPtr<nsAtom> atom = NS_AtomizeMainThread(aElem);
return Remove(atom);
}
// Returns a cached, statically-allocated matcher for the given set of
// literal strings.
template <const char** schemes>
[[nodiscard]] static nsresult Get(RefPtr<AtomSet>& aMatcherOut) {
static RefPtr<AtomSet> sMatcher;
if (MOZ_UNLIKELY(!sMatcher)) {
// If this static method is called late during the shutdown,
// ClearOnShutdown would be destroying the instance before the
// RefPtr gets to the caller, let's make sure that this method
// signature does make it more clear by returning an explicit
// not discardable nsresult.
if (PastShutdownPhase(ShutdownPhase::XPCOMShutdownFinal)) {
aMatcherOut = nullptr;
return NS_ERROR_ILLEGAL_DURING_SHUTDOWN;
}
sMatcher = new AtomSet(schemes);
ClearOnShutdown(&sMatcher);
}
MOZ_ASSERT(sMatcher);
aMatcherOut = do_AddRef(sMatcher);
return NS_OK;
}
void Get(nsTArray<nsString>& aResult) const {
aResult.SetCapacity(mElems.Length());
@ -77,9 +118,9 @@ class AtomSet final {
}
private:
~AtomSet() = default;
ArrayType mElems;
const ArrayType mElems;
void SortAndUniquify();
};
// A helper class to lazily retrieve, transcode, and atomize certain URI
@ -102,8 +143,8 @@ class URLInfo final {
nsAtom* Scheme() const;
const nsCString& Host() const;
const nsAtom* HostAtom() const;
const nsCString& Path() const;
const nsCString& FilePath() const;
const nsString& Path() const;
const nsString& FilePath() const;
const nsString& Spec() const;
const nsCString& CSpec() const;
@ -119,8 +160,8 @@ class URLInfo final {
mutable nsCString mHost;
mutable RefPtr<nsAtom> mHostAtom;
mutable nsCString mPath;
mutable nsCString mFilePath;
mutable nsString mPath;
mutable nsString mFilePath;
mutable nsString mSpec;
mutable nsCString mCSpec;
@ -148,34 +189,51 @@ class MOZ_STACK_CLASS CookieInfo final {
mutable nsCString mRawHost;
};
class MatchPatternCore final {
public:
NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MatchPatternCore)
class MatchPattern final : public nsISupports, public nsWrapperCache {
NS_DECL_CYCLE_COLLECTING_ISUPPORTS
NS_DECL_CYCLE_COLLECTION_WRAPPERCACHE_CLASS(MatchPattern)
// NOTE: Must be constructed on the main thread!
MatchPatternCore(const nsAString& aPattern, bool aIgnorePath,
bool aRestrictSchemes, ErrorResult& aRv);
static already_AddRefed<MatchPattern> Constructor(
dom::GlobalObject& aGlobal, const nsAString& aPattern,
const MatchPatternOptions& aOptions, ErrorResult& aRv);
bool Matches(const nsAString& aURL, bool aExplicit, ErrorResult& aRv) const;
bool Matches(const URLInfo& aURL, bool aExplicit = false) const;
bool Matches(const URLInfo& aURL, bool aExplicit, ErrorResult& aRv) const {
return Matches(aURL, aExplicit);
}
bool MatchesCookie(const CookieInfo& aCookie) const;
bool MatchesDomain(const nsACString& aDomain) const;
bool Subsumes(const MatchPatternCore& aPattern) const;
bool Subsumes(const MatchPattern& aPattern) const;
bool SubsumesDomain(const MatchPatternCore& aPattern) const;
bool SubsumesDomain(const MatchPattern& aPattern) const;
bool Overlaps(const MatchPatternCore& aPattern) const;
bool Overlaps(const MatchPattern& aPattern) const;
bool DomainIsWildcard() const { return mMatchSubdomain && mDomain.IsEmpty(); }
void GetPattern(nsAString& aPattern) const { aPattern = mPattern; }
nsISupports* GetParentObject() const { return mParent; }
virtual JSObject* WrapObject(JSContext* aCx,
JS::Handle<JSObject*> aGivenProto) override;
protected:
virtual ~MatchPattern() = default;
private:
~MatchPatternCore() = default;
explicit MatchPattern(nsISupports* aParent) : mParent(aParent) {}
void Init(JSContext* aCx, const nsAString& aPattern, bool aIgnorePath,
bool aRestrictSchemes, ErrorResult& aRv);
nsCOMPtr<nsISupports> mParent;
// The normalized match pattern string that this object represents.
nsString mPattern;
@ -197,76 +255,7 @@ class MatchPatternCore final {
// The glob against which the URL path must match. If null, the path is
// ignored entirely. If non-null, the path must match this glob.
RefPtr<MatchGlobCore> mPath;
};
class MatchPattern final : public nsISupports, public nsWrapperCache {
NS_DECL_CYCLE_COLLECTING_ISUPPORTS
NS_DECL_CYCLE_COLLECTION_WRAPPERCACHE_CLASS(MatchPattern)
static already_AddRefed<MatchPattern> Constructor(
dom::GlobalObject& aGlobal, const nsAString& aPattern,
const MatchPatternOptions& aOptions, ErrorResult& aRv);
bool Matches(const nsAString& aURL, bool aExplicit, ErrorResult& aRv) const {
return Core()->Matches(aURL, aExplicit, aRv);
}
bool Matches(const URLInfo& aURL, bool aExplicit = false) const {
return Core()->Matches(aURL, aExplicit);
}
bool Matches(const URLInfo& aURL, bool aExplicit, ErrorResult& aRv) const {
return Matches(aURL, aExplicit);
}
bool MatchesCookie(const CookieInfo& aCookie) const {
return Core()->MatchesCookie(aCookie);
}
bool MatchesDomain(const nsACString& aDomain) const {
return Core()->MatchesDomain(aDomain);
}
bool Subsumes(const MatchPattern& aPattern) const {
return Core()->Subsumes(*aPattern.Core());
}
bool SubsumesDomain(const MatchPattern& aPattern) const {
return Core()->SubsumesDomain(*aPattern.Core());
}
bool Overlaps(const MatchPattern& aPattern) const {
return Core()->Overlaps(*aPattern.Core());
}
bool DomainIsWildcard() const { return Core()->DomainIsWildcard(); }
void GetPattern(nsAString& aPattern) const { Core()->GetPattern(aPattern); }
MatchPatternCore* Core() const { return mCore; }
nsISupports* GetParentObject() const { return mParent; }
virtual JSObject* WrapObject(JSContext* aCx,
JS::Handle<JSObject*> aGivenProto) override;
protected:
virtual ~MatchPattern() = default;
private:
friend class MatchPatternSet;
explicit MatchPattern(nsISupports* aParent,
already_AddRefed<MatchPatternCore> aCore)
: mParent(aParent), mCore(std::move(aCore)) {}
void Init(JSContext* aCx, const nsAString& aPattern, bool aIgnorePath,
bool aRestrictSchemes, ErrorResult& aRv);
nsCOMPtr<nsISupports> mParent;
RefPtr<MatchPatternCore> mCore;
RefPtr<MatchGlob> mPath;
public:
// A quick way to check if a particular URL matches <all_urls> without
@ -274,48 +263,6 @@ class MatchPattern final : public nsISupports, public nsWrapperCache {
static bool MatchesAllURLs(const URLInfo& aURL);
};
class MatchPatternSetCore final {
public:
NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MatchPatternSetCore)
using ArrayType = nsTArray<RefPtr<MatchPatternCore>>;
explicit MatchPatternSetCore(ArrayType&& aPatterns)
: mPatterns(std::move(aPatterns)) {}
static already_AddRefed<MatchPatternSet> Constructor(
dom::GlobalObject& aGlobal,
const nsTArray<dom::OwningStringOrMatchPattern>& aPatterns,
const MatchPatternOptions& aOptions, ErrorResult& aRv);
bool Matches(const nsAString& aURL, bool aExplicit, ErrorResult& aRv) const;
bool Matches(const URLInfo& aURL, bool aExplicit = false) const;
bool MatchesCookie(const CookieInfo& aCookie) const;
bool Subsumes(const MatchPatternCore& aPattern) const;
bool SubsumesDomain(const MatchPatternCore& aPattern) const;
bool Overlaps(const MatchPatternCore& aPattern) const;
bool Overlaps(const MatchPatternSetCore& aPatternSet) const;
bool OverlapsAll(const MatchPatternSetCore& aPatternSet) const;
void GetPatterns(ArrayType& aPatterns) {
aPatterns.AppendElements(mPatterns);
}
private:
friend class MatchPatternSet;
~MatchPatternSetCore() = default;
ArrayType mPatterns;
};
class MatchPatternSet final : public nsISupports, public nsWrapperCache {
NS_DECL_CYCLE_COLLECTING_ISUPPORTS
NS_DECL_CYCLE_COLLECTION_WRAPPERCACHE_CLASS(MatchPatternSet)
@ -327,46 +274,30 @@ class MatchPatternSet final : public nsISupports, public nsWrapperCache {
const nsTArray<dom::OwningStringOrMatchPattern>& aPatterns,
const MatchPatternOptions& aOptions, ErrorResult& aRv);
bool Matches(const nsAString& aURL, bool aExplicit, ErrorResult& aRv) const {
return Core()->Matches(aURL, aExplicit, aRv);
}
bool Matches(const nsAString& aURL, bool aExplicit, ErrorResult& aRv) const;
bool Matches(const URLInfo& aURL, bool aExplicit = false) const {
return Core()->Matches(aURL, aExplicit);
}
bool Matches(const URLInfo& aURL, bool aExplicit = false) const;
bool Matches(const URLInfo& aURL, bool aExplicit, ErrorResult& aRv) const {
return Matches(aURL, aExplicit);
}
bool MatchesCookie(const CookieInfo& aCookie) const {
return Core()->MatchesCookie(aCookie);
bool MatchesCookie(const CookieInfo& aCookie) const;
bool Subsumes(const MatchPattern& aPattern) const;
bool SubsumesDomain(const MatchPattern& aPattern) const;
bool Overlaps(const MatchPattern& aPattern) const;
bool Overlaps(const MatchPatternSet& aPatternSet) const;
bool OverlapsAll(const MatchPatternSet& aPatternSet) const;
void GetPatterns(ArrayType& aPatterns) {
aPatterns.AppendElements(mPatterns);
}
bool Subsumes(const MatchPattern& aPattern) const {
return Core()->Subsumes(*aPattern.Core());
}
bool SubsumesDomain(const MatchPattern& aPattern) const {
return Core()->SubsumesDomain(*aPattern.Core());
}
bool Overlaps(const MatchPattern& aPattern) const {
return Core()->Overlaps(*aPattern.Core());
}
bool Overlaps(const MatchPatternSet& aPatternSet) const {
return Core()->Overlaps(*aPatternSet.Core());
}
bool OverlapsAll(const MatchPatternSet& aPatternSet) const {
return Core()->OverlapsAll(*aPatternSet.Core());
}
void GetPatterns(ArrayType& aPatterns);
MatchPatternSetCore* Core() const { return mCore; }
nsISupports* GetParentObject() const { return mParent; }
virtual JSObject* WrapObject(JSContext* aCx,
@ -376,15 +307,12 @@ class MatchPatternSet final : public nsISupports, public nsWrapperCache {
virtual ~MatchPatternSet() = default;
private:
explicit MatchPatternSet(nsISupports* aParent,
already_AddRefed<MatchPatternSetCore> aCore)
: mParent(aParent), mCore(std::move(aCore)) {}
explicit MatchPatternSet(nsISupports* aParent, ArrayType&& aPatterns)
: mParent(aParent), mPatterns(std::forward<ArrayType>(aPatterns)) {}
nsCOMPtr<nsISupports> mParent;
RefPtr<MatchPatternSetCore> mCore;
mozilla::Maybe<ArrayType> mPatternsCache;
ArrayType mPatterns;
};
} // namespace extensions

Просмотреть файл

@ -137,6 +137,13 @@ class MozDocumentMatcher : public nsISupports, public nsWrapperCache {
MatchPatternSet* GetExcludeMatches() { return mExcludeMatches; }
const MatchPatternSet* GetExcludeMatches() const { return mExcludeMatches; }
void GetIncludeGlobs(Nullable<MatchGlobArray>& aGlobs) {
ToNullable(mExcludeGlobs, aGlobs);
}
void GetExcludeGlobs(Nullable<MatchGlobArray>& aGlobs) {
ToNullable(mExcludeGlobs, aGlobs);
}
Nullable<uint64_t> GetFrameID() const { return mFrameID; }
void GetOriginAttributesPatterns(JSContext* aCx,
@ -172,6 +179,25 @@ class MozDocumentMatcher : public nsISupports, public nsWrapperCache {
Nullable<uint64_t> mFrameID;
bool mMatchAboutBlank;
Nullable<dom::Sequence<OriginAttributesPattern>> mOriginAttributesPatterns;
private:
template <typename T, typename U>
void ToNullable(const Nullable<T>& aInput, Nullable<U>& aOutput) {
if (aInput.IsNull()) {
aOutput.SetNull();
} else {
aOutput.SetValue(aInput.Value());
}
}
template <typename T, typename U>
void ToNullable(const Nullable<T>& aInput, Nullable<nsTArray<U>>& aOutput) {
if (aInput.IsNull()) {
aOutput.SetNull();
} else {
aOutput.SetValue(aInput.Value().Clone());
}
}
};
class WebExtensionContentScript final : public MozDocumentMatcher {

Просмотреть файл

@ -3,7 +3,6 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "MainThreadUtils.h"
#include "mozilla/ExtensionPolicyService.h"
#include "mozilla/extensions/DocumentObserver.h"
#include "mozilla/extensions/WebExtensionContentScript.h"
@ -19,7 +18,6 @@
#include "nsGlobalWindowInner.h"
#include "nsIObserver.h"
#include "nsISubstitutingProtocolHandler.h"
#include "nsLiteralString.h"
#include "nsNetUtil.h"
#include "nsPrintfCString.h"
@ -81,15 +79,14 @@ static nsISubstitutingProtocolHandler* Proto() {
return sHandler;
}
bool ParseGlobs(GlobalObject& aGlobal,
Sequence<OwningMatchGlobOrUTF8String> aGlobs,
nsTArray<RefPtr<MatchGlobCore>>& aResult, ErrorResult& aRv) {
bool ParseGlobs(GlobalObject& aGlobal, Sequence<OwningMatchGlobOrString> aGlobs,
nsTArray<RefPtr<MatchGlob>>& aResult, ErrorResult& aRv) {
for (auto& elem : aGlobs) {
if (elem.IsMatchGlob()) {
aResult.AppendElement(elem.GetAsMatchGlob()->Core());
aResult.AppendElement(elem.GetAsMatchGlob());
} else {
RefPtr<MatchGlobCore> glob =
new MatchGlobCore(elem.GetAsUTF8String(), true, aRv);
RefPtr<MatchGlob> glob =
MatchGlob::Constructor(aGlobal, elem.GetAsString(), true, aRv);
if (aRv.Failed()) {
return false;
}
@ -149,11 +146,8 @@ WebAccessibleResource::WebAccessibleResource(
if (!aInit.mMatches.IsNull()) {
MatchPatternOptions options;
options.mRestrictSchemes = true;
RefPtr<MatchPatternSet> matches =
ParseMatches(aGlobal, aInit.mMatches.Value(), options,
ErrorBehavior::CreateEmptyPattern, aRv);
MOZ_DIAGNOSTIC_ASSERT(!aRv.Failed());
mMatches = matches->Core();
mMatches = ParseMatches(aGlobal, aInit.mMatches.Value(), options,
ErrorBehavior::CreateEmptyPattern, aRv);
}
if (!aInit.mExtension_ids.IsNull()) {
@ -165,46 +159,59 @@ bool WebAccessibleResource::IsExtensionMatch(const URLInfo& aURI) {
if (!mExtensionIDs) {
return false;
}
RefPtr<WebExtensionPolicyCore> policy =
ExtensionPolicyService::GetCoreByHost(aURI.Host());
WebExtensionPolicy* policy = EPS().GetByHost(aURI.Host());
return policy && (mExtensionIDs->Contains(nsGkAtoms::_asterisk) ||
mExtensionIDs->Contains(policy->Id()));
}
NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(WebAccessibleResource)
NS_INTERFACE_MAP_ENTRY(nsISupports)
NS_INTERFACE_MAP_END
NS_IMPL_CYCLE_COLLECTION(WebAccessibleResource)
NS_IMPL_CYCLE_COLLECTING_ADDREF(WebAccessibleResource)
NS_IMPL_CYCLE_COLLECTING_RELEASE(WebAccessibleResource)
/*****************************************************************************
* WebExtensionPolicyCore
* WebExtensionPolicy
*****************************************************************************/
WebExtensionPolicyCore::WebExtensionPolicyCore(GlobalObject& aGlobal,
WebExtensionPolicy* aPolicy,
const WebExtensionInit& aInit,
ErrorResult& aRv)
: mPolicy(aPolicy),
mId(NS_AtomizeMainThread(aInit.mId)),
WebExtensionPolicy::WebExtensionPolicy(GlobalObject& aGlobal,
const WebExtensionInit& aInit,
ErrorResult& aRv)
: mId(NS_AtomizeMainThread(aInit.mId)),
mName(aInit.mName),
mType(NS_AtomizeMainThread(aInit.mType)),
mManifestVersion(aInit.mManifestVersion),
mExtensionPageCSP(aInit.mExtensionPageCSP),
mLocalizeCallback(aInit.mLocalizeCallback),
mIsPrivileged(aInit.mIsPrivileged),
mTemporarilyInstalled(aInit.mTemporarilyInstalled),
mBackgroundWorkerScript(aInit.mBackgroundWorkerScript) {
mPermissions(new AtomSet(aInit.mPermissions)) {
MatchPatternOptions options;
options.mRestrictSchemes = !HasPermission(nsGkAtoms::mozillaAddons);
// In practice this is not necessary, but in tests where the uuid
// passed in is not lowercased various tests can fail.
ToLowerCase(aInit.mMozExtensionHostname, mHostname);
// Initialize the base CSP and extension page CSP
if (mManifestVersion < 3) {
nsresult rv = Preferences::GetString(BASE_CSP_PREF_V2, mBaseCSP);
if (NS_FAILED(rv)) {
mBaseCSP = NS_LITERAL_STRING_FROM_CSTRING(DEFAULT_BASE_CSP_V2);
}
} else {
nsresult rv = Preferences::GetString(BASE_CSP_PREF_V3, mBaseCSP);
if (NS_FAILED(rv)) {
mBaseCSP = NS_LITERAL_STRING_FROM_CSTRING(DEFAULT_BASE_CSP_V3);
}
mHostPermissions = ParseMatches(aGlobal, aInit.mAllowedOrigins, options,
ErrorBehavior::CreateEmptyPattern, aRv);
if (aRv.Failed()) {
return;
}
if (!aInit.mBackgroundScripts.IsNull()) {
mBackgroundScripts.SetValue().AppendElements(
aInit.mBackgroundScripts.Value());
}
if (!aInit.mBackgroundWorkerScript.IsEmpty()) {
mBackgroundWorkerScript.Assign(aInit.mBackgroundWorkerScript);
}
InitializeBaseCSP();
if (mExtensionPageCSP.IsVoid()) {
if (mManifestVersion < 3) {
EPS().GetDefaultCSP(mExtensionPageCSP);
@ -223,65 +230,6 @@ WebExtensionPolicyCore::WebExtensionPolicyCore(GlobalObject& aGlobal,
mWebAccessibleResources.AppendElement(std::move(resource));
}
nsresult rv = NS_NewURI(getter_AddRefs(mBaseURI), aInit.mBaseURL);
if (NS_FAILED(rv)) {
aRv.Throw(rv);
}
}
bool WebExtensionPolicyCore::SourceMayAccessPath(
const URLInfo& aURI, const nsACString& aPath) const {
if (aURI.Scheme() == nsGkAtoms::moz_extension &&
MozExtensionHostname().Equals(aURI.Host())) {
// An extension can always access it's own paths.
return true;
}
// Bug 1786564 Static themes need to allow access to theme resources.
if (Type() == nsGkAtoms::theme) {
RefPtr<WebExtensionPolicyCore> policyCore =
ExtensionPolicyService::GetCoreByHost(aURI.Host());
return policyCore != nullptr;
}
if (ManifestVersion() < 3) {
return IsWebAccessiblePath(aPath);
}
for (const auto& resource : mWebAccessibleResources) {
if (resource->SourceMayAccessPath(aURI, aPath)) {
return true;
}
}
return false;
}
/*****************************************************************************
* WebExtensionPolicy
*****************************************************************************/
WebExtensionPolicy::WebExtensionPolicy(GlobalObject& aGlobal,
const WebExtensionInit& aInit,
ErrorResult& aRv)
: mCore(new WebExtensionPolicyCore(aGlobal, this, aInit, aRv)),
mLocalizeCallback(aInit.mLocalizeCallback),
mPermissions(new AtomSet(aInit.mPermissions)) {
if (aRv.Failed()) {
return;
}
MatchPatternOptions options;
options.mRestrictSchemes = !HasPermission(nsGkAtoms::mozillaAddons);
mHostPermissions = ParseMatches(aGlobal, aInit.mAllowedOrigins, options,
ErrorBehavior::CreateEmptyPattern, aRv);
if (aRv.Failed()) {
return;
}
if (!aInit.mBackgroundScripts.IsNull()) {
mBackgroundScripts.SetValue().AppendElements(
aInit.mBackgroundScripts.Value());
}
mContentScripts.SetCapacity(aInit.mContentScripts.Length());
for (const auto& scriptInit : aInit.mContentScripts) {
// The activeTab permission is only for dynamically injected scripts,
@ -302,6 +250,11 @@ WebExtensionPolicy::WebExtensionPolicy(GlobalObject& aGlobal,
if (aInit.mReadyPromise.WasPassed()) {
mReadyPromise = &aInit.mReadyPromise.Value();
}
nsresult rv = NS_NewURI(getter_AddRefs(mBaseURI), aInit.mBaseURL);
if (NS_FAILED(rv)) {
aRv.Throw(rv);
}
}
already_AddRefed<WebExtensionPolicy> WebExtensionPolicy::Constructor(
@ -314,6 +267,22 @@ already_AddRefed<WebExtensionPolicy> WebExtensionPolicy::Constructor(
return policy.forget();
}
void WebExtensionPolicy::InitializeBaseCSP() {
if (mManifestVersion < 3) {
nsresult rv = Preferences::GetString(BASE_CSP_PREF_V2, mBaseCSP);
if (NS_FAILED(rv)) {
mBaseCSP.AssignLiteral(DEFAULT_BASE_CSP_V2);
}
return;
}
// Version 3 or higher.
nsresult rv = Preferences::GetString(BASE_CSP_PREF_V3, mBaseCSP);
if (NS_FAILED(rv)) {
mBaseCSP.AssignLiteral(DEFAULT_BASE_CSP_V3);
}
}
/* static */
void WebExtensionPolicy::GetActiveExtensions(
dom::GlobalObject& aGlobal,
@ -364,7 +333,7 @@ bool WebExtensionPolicy::Enable() {
mBrowsingContextGroup = group->MakeKeepAlivePtr();
}
Unused << Proto()->SetSubstitution(MozExtensionHostname(), BaseURI());
Unused << Proto()->SetSubstitution(MozExtensionHostname(), mBaseURI);
mActive = true;
return true;
@ -402,7 +371,7 @@ void WebExtensionPolicy::GetURL(const nsAString& aPath, nsAString& aResult,
Result<nsString, nsresult> WebExtensionPolicy::GetURL(
const nsAString& aPath) const {
nsPrintfCString spec("%s://%s/", kProto, MozExtensionHostname().get());
nsPrintfCString spec("%s://%s/", kProto, mHostname.get());
nsCOMPtr<nsIURI> uri;
MOZ_TRY(NS_NewURI(getter_AddRefs(uri), spec));
@ -471,6 +440,30 @@ bool WebExtensionPolicy::BackgroundServiceWorkerEnabled(GlobalObject& aGlobal) {
return StaticPrefs::extensions_backgroundServiceWorker_enabled_AtStartup();
}
bool WebExtensionPolicy::SourceMayAccessPath(const URLInfo& aURI,
const nsAString& aPath) const {
if (aURI.Scheme() == nsGkAtoms::moz_extension &&
mHostname.Equals(aURI.Host())) {
// An extension can always access it's own paths.
return true;
}
// Bug 1786564 Static themes need to allow access to theme resources.
if (mType == nsGkAtoms::theme) {
WebExtensionPolicy* policy = EPS().GetByHost(aURI.Host());
return policy != nullptr;
}
if (mManifestVersion < 3) {
return IsWebAccessiblePath(aPath);
}
for (const auto& resource : mWebAccessibleResources) {
if (resource->SourceMayAccessPath(aURI, aPath)) {
return true;
}
}
return false;
}
namespace {
/**
* Maintains a dynamically updated AtomSet based on the comma-separated
@ -639,26 +632,9 @@ uint64_t WebExtensionPolicy::GetBrowsingContextGroupId(ErrorResult& aRv) {
return 0;
}
WebExtensionPolicy::~WebExtensionPolicy() { mCore->ClearPolicyWeakRef(); }
NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE_CLASS(WebExtensionPolicy)
NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(WebExtensionPolicy)
NS_IMPL_CYCLE_COLLECTION_UNLINK(mParent)
NS_IMPL_CYCLE_COLLECTION_UNLINK(mBrowsingContextGroup)
NS_IMPL_CYCLE_COLLECTION_UNLINK(mLocalizeCallback)
NS_IMPL_CYCLE_COLLECTION_UNLINK(mHostPermissions)
NS_IMPL_CYCLE_COLLECTION_UNLINK(mContentScripts)
NS_IMPL_CYCLE_COLLECTION_UNLINK_PRESERVED_WRAPPER
AssertIsOnMainThread();
tmp->mCore->ClearPolicyWeakRef();
NS_IMPL_CYCLE_COLLECTION_UNLINK_END
NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(WebExtensionPolicy)
NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mParent)
NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mBrowsingContextGroup)
NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mLocalizeCallback)
NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mHostPermissions)
NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mContentScripts)
NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE_WEAK_PTR(
WebExtensionPolicy, mParent, mBrowsingContextGroup, mLocalizeCallback,
mHostPermissions, mWebAccessibleResources, mContentScripts)
NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(WebExtensionPolicy)
NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
@ -848,11 +824,11 @@ bool MozDocumentMatcher::MatchesURI(const URLInfo& aURL,
return false;
}
if (!mIncludeGlobs.IsNull() && !mIncludeGlobs.Value().Matches(aURL.CSpec())) {
if (!mIncludeGlobs.IsNull() && !mIncludeGlobs.Value().Matches(aURL.Spec())) {
return false;
}
if (!mExcludeGlobs.IsNull() && mExcludeGlobs.Value().Matches(aURL.CSpec())) {
if (!mExcludeGlobs.IsNull() && mExcludeGlobs.Value().Matches(aURL.Spec())) {
return false;
}
@ -899,7 +875,8 @@ JSObject* WebExtensionContentScript::WrapObject(
}
NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(MozDocumentMatcher, mMatches,
mExcludeMatches, mExtension)
mExcludeMatches, mIncludeGlobs,
mExcludeGlobs, mExtension)
NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(MozDocumentMatcher)
NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY

Просмотреть файл

@ -6,7 +6,6 @@
#ifndef mozilla_extensions_WebExtensionPolicy_h
#define mozilla_extensions_WebExtensionPolicy_h
#include "MainThreadUtils.h"
#include "mozilla/dom/BindingDeclarations.h"
#include "mozilla/dom/BrowsingContextGroup.h"
#include "mozilla/dom/Nullable.h"
@ -38,19 +37,20 @@ using dom::WebExtensionLocalizeCallback;
class DocInfo;
class WebExtensionContentScript;
class WebAccessibleResource final {
class WebAccessibleResource final : public nsISupports {
public:
NS_INLINE_DECL_THREADSAFE_REFCOUNTING(WebAccessibleResource)
NS_DECL_CYCLE_COLLECTING_ISUPPORTS
NS_DECL_CYCLE_COLLECTION_CLASS(WebAccessibleResource)
WebAccessibleResource(dom::GlobalObject& aGlobal,
const WebAccessibleResourceInit& aInit,
ErrorResult& aRv);
bool IsWebAccessiblePath(const nsACString& aPath) const {
bool IsWebAccessiblePath(const nsAString& aPath) const {
return mWebAccessiblePaths.Matches(aPath);
}
bool SourceMayAccessPath(const URLInfo& aURI, const nsACString& aPath) {
bool SourceMayAccessPath(const URLInfo& aURI, const nsAString& aPath) {
return mWebAccessiblePaths.Matches(aPath) &&
(IsHostMatch(aURI) || IsExtensionMatch(aURI));
}
@ -61,103 +61,18 @@ class WebAccessibleResource final {
bool IsExtensionMatch(const URLInfo& aURI);
private:
~WebAccessibleResource() = default;
protected:
virtual ~WebAccessibleResource() = default;
private:
MatchGlobSet mWebAccessiblePaths;
RefPtr<MatchPatternSetCore> mMatches;
RefPtr<MatchPatternSet> mMatches;
RefPtr<AtomSet> mExtensionIDs;
};
/// The thread-safe component of the WebExtensionPolicy.
///
/// Acts as a weak reference to the base WebExtensionPolicy.
class WebExtensionPolicyCore final {
public:
NS_INLINE_DECL_THREADSAFE_REFCOUNTING(WebExtensionPolicyCore)
nsAtom* Id() const { return mId; }
const nsCString& MozExtensionHostname() const { return mHostname; }
nsIURI* BaseURI() const { return mBaseURI; }
bool IsPrivileged() { return mIsPrivileged; }
bool TemporarilyInstalled() { return mTemporarilyInstalled; }
const nsString& Name() const { return mName; }
nsAtom* Type() const { return mType; }
uint32_t ManifestVersion() const { return mManifestVersion; }
const nsString& ExtensionPageCSP() const { return mExtensionPageCSP; }
const nsString& BaseCSP() const { return mBaseCSP; }
const nsString& BackgroundWorkerScript() const {
return mBackgroundWorkerScript;
}
bool IsWebAccessiblePath(const nsACString& aPath) const {
for (const auto& resource : mWebAccessibleResources) {
if (resource->IsWebAccessiblePath(aPath)) {
return true;
}
}
return false;
}
bool SourceMayAccessPath(const URLInfo& aURI, const nsACString& aPath) const;
// Try to get a reference to the cycle-collected main-thread-only
// WebExtensionPolicy instance.
//
// Will return nullptr if the policy has already been unlinked or destroyed.
WebExtensionPolicy* GetMainThreadPolicy() const
MOZ_REQUIRES(sMainThreadCapability) {
return mPolicy;
}
private:
friend class WebExtensionPolicy;
WebExtensionPolicyCore(dom::GlobalObject& aGlobal,
WebExtensionPolicy* aPolicy,
const WebExtensionInit& aInit, ErrorResult& aRv);
~WebExtensionPolicyCore() = default;
void ClearPolicyWeakRef() MOZ_REQUIRES(sMainThreadCapability) {
mPolicy = nullptr;
}
// Unless otherwise guarded by a capability, all members on
// WebExtensionPolicyCore should be immutable and threadsafe.
WebExtensionPolicy* MOZ_NON_OWNING_REF mPolicy
MOZ_GUARDED_BY(sMainThreadCapability);
const RefPtr<nsAtom> mId;
/* const */ nsCString mHostname;
/* const */ nsCOMPtr<nsIURI> mBaseURI;
const nsString mName;
const RefPtr<nsAtom> mType;
const uint32_t mManifestVersion;
/* const */ nsString mExtensionPageCSP;
/* const */ nsString mBaseCSP;
const bool mIsPrivileged;
const bool mTemporarilyInstalled;
const nsString mBackgroundWorkerScript;
/* const */ nsTArray<RefPtr<WebAccessibleResource>> mWebAccessibleResources;
};
class WebExtensionPolicy final : public nsISupports, public nsWrapperCache {
class WebExtensionPolicy final : public nsISupports,
public nsWrapperCache,
public SupportsWeakPtr {
public:
NS_DECL_CYCLE_COLLECTING_ISUPPORTS
NS_DECL_CYCLE_COLLECTION_WRAPPERCACHE_CLASS(WebExtensionPolicy)
@ -168,26 +83,21 @@ class WebExtensionPolicy final : public nsISupports, public nsWrapperCache {
dom::GlobalObject& aGlobal, const WebExtensionInit& aInit,
ErrorResult& aRv);
WebExtensionPolicyCore* Core() const { return mCore; }
nsAtom* Id() const { return mId; }
void GetId(nsAString& aId) const { aId = nsDependentAtomString(mId); };
nsAtom* Id() const { return mCore->Id(); }
void GetId(nsAString& aId) const { aId = nsDependentAtomString(Id()); };
const nsCString& MozExtensionHostname() const {
return mCore->MozExtensionHostname();
}
const nsCString& MozExtensionHostname() const { return mHostname; }
void GetMozExtensionHostname(nsACString& aHostname) const {
aHostname = MozExtensionHostname();
}
nsIURI* BaseURI() const { return mCore->BaseURI(); }
void GetBaseURL(nsACString& aBaseURL) const {
MOZ_ALWAYS_SUCCEEDS(mCore->BaseURI()->GetSpec(aBaseURL));
MOZ_ALWAYS_SUCCEEDS(mBaseURI->GetSpec(aBaseURL));
}
bool IsPrivileged() { return mCore->IsPrivileged(); }
bool IsPrivileged() { return mIsPrivileged; }
bool TemporarilyInstalled() { return mCore->TemporarilyInstalled(); }
bool TemporarilyInstalled() { return mTemporarilyInstalled; }
void GetURL(const nsAString& aPath, nsAString& aURL, ErrorResult& aRv) const;
@ -205,13 +115,16 @@ class WebExtensionPolicy final : public nsISupports, public nsWrapperCache {
bool aCheckRestricted = true,
bool aAllowFilePermission = false) const;
bool IsWebAccessiblePath(const nsACString& aPath) const {
return mCore->IsWebAccessiblePath(aPath);
bool IsWebAccessiblePath(const nsAString& aPath) const {
for (const auto& resource : mWebAccessibleResources) {
if (resource->IsWebAccessiblePath(aPath)) {
return true;
}
}
return false;
}
bool SourceMayAccessPath(const URLInfo& aURI, const nsACString& aPath) const {
return mCore->SourceMayAccessPath(aURI, aPath);
}
bool SourceMayAccessPath(const URLInfo& aURI, const nsAString& aPath) const;
bool HasPermission(const nsAtom* aPermission) const {
return mPermissions->Contains(aPermission);
@ -228,21 +141,21 @@ class WebExtensionPolicy final : public nsISupports, public nsWrapperCache {
MOZ_CAN_RUN_SCRIPT
void Localize(const nsAString& aInput, nsString& aResult) const;
const nsString& Name() const { return mCore->Name(); }
void GetName(nsAString& aName) const { aName = Name(); }
const nsString& Name() const { return mName; }
void GetName(nsAString& aName) const { aName = mName; }
nsAtom* Type() const { return mCore->Type(); }
nsAtom* Type() const { return mType; }
void GetType(nsAString& aType) const {
aType = nsDependentAtomString(Type());
aType = nsDependentAtomString(mType);
};
uint32_t ManifestVersion() const { return mCore->ManifestVersion(); }
uint32_t ManifestVersion() const { return mManifestVersion; }
const nsString& ExtensionPageCSP() const { return mCore->ExtensionPageCSP(); }
void GetExtensionPageCSP(nsAString& aCSP) const { aCSP = ExtensionPageCSP(); }
const nsString& ExtensionPageCSP() const { return mExtensionPageCSP; }
void GetExtensionPageCSP(nsAString& aCSP) const { aCSP = mExtensionPageCSP; }
const nsString& BaseCSP() const { return mCore->BaseCSP(); }
void GetBaseCSP(nsAString& aCSP) const { aCSP = BaseCSP(); }
const nsString& BaseCSP() const { return mBaseCSP; }
void GetBaseCSP(nsAString& aCSP) const { aCSP = mBaseCSP; }
already_AddRefed<MatchPatternSet> AllowedOrigins() {
return do_AddRef(mHostPermissions);
@ -274,15 +187,12 @@ class WebExtensionPolicy final : public nsISupports, public nsWrapperCache {
JS::MutableHandle<JSObject*> aResult) const;
dom::Promise* ReadyPromise() const { return mReadyPromise; }
const nsString& BackgroundWorkerScript() const {
return mCore->BackgroundWorkerScript();
}
void GetBackgroundWorker(nsString& aScriptURL) const {
aScriptURL.Assign(BackgroundWorkerScript());
aScriptURL.Assign(mBackgroundWorkerScript);
}
bool IsManifestBackgroundWorker(const nsAString& aWorkerScriptURL) const {
return BackgroundWorkerScript().Equals(aWorkerScriptURL);
return mBackgroundWorkerScript.Equals(aWorkerScriptURL);
}
uint64_t GetBrowsingContextGroupId() const;
@ -315,7 +225,7 @@ class WebExtensionPolicy final : public nsISupports, public nsWrapperCache {
JS::Handle<JSObject*> aGivenProto) override;
protected:
~WebExtensionPolicy();
virtual ~WebExtensionPolicy() = default;
private:
WebExtensionPolicy(dom::GlobalObject& aGlobal, const WebExtensionInit& aInit,
@ -323,10 +233,19 @@ class WebExtensionPolicy final : public nsISupports, public nsWrapperCache {
bool Enable();
bool Disable();
void InitializeBaseCSP();
nsCOMPtr<nsISupports> mParent;
RefPtr<WebExtensionPolicyCore> mCore;
RefPtr<nsAtom> mId;
nsCString mHostname;
nsCOMPtr<nsIURI> mBaseURI;
nsString mName;
RefPtr<nsAtom> mType;
uint32_t mManifestVersion = 2;
nsString mExtensionPageCSP;
nsString mBaseCSP;
dom::BrowsingContextGroup::KeepAlivePtr mBrowsingContextGroup;
@ -334,11 +253,16 @@ class WebExtensionPolicy final : public nsISupports, public nsWrapperCache {
RefPtr<WebExtensionLocalizeCallback> mLocalizeCallback;
bool mIsPrivileged;
bool mTemporarilyInstalled;
RefPtr<AtomSet> mPermissions;
RefPtr<MatchPatternSet> mHostPermissions;
dom::Nullable<nsTArray<nsString>> mBackgroundScripts;
nsString mBackgroundWorkerScript;
nsTArray<RefPtr<WebAccessibleResource>> mWebAccessibleResources;
nsTArray<RefPtr<WebExtensionContentScript>> mContentScripts;
RefPtr<dom::Promise> mReadyPromise;

Просмотреть файл

@ -468,23 +468,12 @@ add_task(async function test_MatchGlob() {
});
add_task(async function test_MatchGlob_redundant_wildcards_backtracking() {
const first_limit = AppConstants.DEBUG ? 200 : 10;
{
// Bug 1570868 - repeated * in tabs.query glob causes too much backtracking.
let title = `Monster${"*".repeat(99)}Mash`;
// The first run could take longer than subsequent runs, as the DFA is lazily created.
let first_start = Date.now();
let glob = new MatchGlob(title);
let first_matches = glob.matches(title);
let first_duration = Date.now() - first_start;
ok(first_matches, `Expected match: ${title}, ${title}`);
ok(
first_duration < first_limit,
`First matching duration: ${first_duration}ms (limit: ${first_limit}ms)`
);
let start = Date.now();
let glob = new MatchGlob(title);
let matches = glob.matches(title);
let duration = Date.now() - start;
@ -495,18 +484,8 @@ add_task(async function test_MatchGlob_redundant_wildcards_backtracking() {
// Similarly with any continuous combination of ?**???****? wildcards.
let title = `Monster${"?*".repeat(99)}Mash`;
// The first run could take longer than subsequent runs, as the DFA is lazily created.
let first_start = Date.now();
let glob = new MatchGlob(title);
let first_matches = glob.matches(title);
let first_duration = Date.now() - first_start;
ok(first_matches, `Expected match: ${title}, ${title}`);
ok(
first_duration < first_limit,
`First matching duration: ${first_duration}ms (limit: ${first_limit}ms)`
);
let start = Date.now();
let glob = new MatchGlob(title);
let matches = glob.matches(title);
let duration = Date.now() - start;

Просмотреть файл

@ -17,7 +17,7 @@
#include "mozilla/dom/HTMLOptionElement.h"
#include "mozilla/dom/HTMLSelectElement.h"
#include "mozilla/HashTable.h"
#include "mozilla/RustRegex.h"
#include "mozilla/regex_ffi_generated.h"
#include "nsContentUtils.h"
#include "nsIFrame.h"
#include "nsIFrameInlines.h"
@ -25,6 +25,16 @@
#include "nsTStringHasher.h"
#include "mozilla/StaticPtr.h"
namespace mozilla {
template <>
class DefaultDelete<regex::ffi::RegexWrapper> {
public:
void operator()(regex::ffi::RegexWrapper* aPtr) const {
regex::ffi::regex_delete(aPtr);
}
};
} // namespace mozilla
namespace mozilla::dom {
static const char kWhitespace[] = "\b\t\r\n ";
@ -610,7 +620,7 @@ class FormAutofillImpl {
nsTArray<FormAutofillConfidences>& aResults, ErrorResult& aRv);
private:
const RustRegex& GetRegex(RegexKey key);
const regex::ffi::RegexWrapper& GetRegex(RegexKey key);
bool StringMatchesRegExp(const nsACString& str, RegexKey key);
bool StringMatchesRegExp(const nsAString& str, RegexKey key);
@ -657,7 +667,7 @@ class FormAutofillImpl {
// Array that holds RegexWrapper that created by regex::ffi::regex_new
using RegexWrapperArray =
EnumeratedArray<RegexKey, RegexKey::Count,
RustRegex>;
UniquePtr<regex::ffi::RegexWrapper>>;
RegexWrapperArray mRegexes;
};
@ -677,18 +687,16 @@ FormAutofillImpl::FormAutofillImpl() {
}
}
const RustRegex& FormAutofillImpl::GetRegex(RegexKey aKey) {
const regex::ffi::RegexWrapper& FormAutofillImpl::GetRegex(RegexKey aKey) {
if (!mRegexes[aKey]) {
RustRegex regex(mRuleMap[aKey], RustRegexOptions().CaseInsensitive(true));
MOZ_DIAGNOSTIC_ASSERT(regex);
mRegexes[aKey] = std::move(regex);
mRegexes[aKey].reset(regex::ffi::regex_new(&mRuleMap[aKey]));
}
return mRegexes[aKey];
return *mRegexes[aKey];
}
bool FormAutofillImpl::StringMatchesRegExp(const nsACString& aStr,
RegexKey aKey) {
return GetRegex(aKey).IsMatch(aStr);
return regex::ffi::regex_is_match(&GetRegex(aKey), &aStr);
}
bool FormAutofillImpl::StringMatchesRegExp(const nsAString& aStr,
@ -710,7 +718,7 @@ bool FormAutofillImpl::TextContentMatchesRegExp(Element& element,
size_t FormAutofillImpl::CountRegExpMatches(const nsACString& aStr,
RegexKey aKey) {
return GetRegex(aKey).CountMatches(aStr);
return regex::ffi::regex_count_matches(&GetRegex(aKey), &aStr);
}
size_t FormAutofillImpl::CountRegExpMatches(const nsAString& aStr,

Просмотреть файл

@ -136,5 +136,15 @@ if CONFIG["MOZ_BACKGROUNDTASKS"]:
if CONFIG["MOZ_BUILD_APP"] == "browser":
DIRS += ["corroborator"]
if CONFIG["COMPILE_ENVIRONMENT"]:
CbindgenHeader(
"regex_ffi_generated.h",
inputs=["/toolkit/components/regex-ffi"],
)
EXPORTS.mozilla += [
"!regex_ffi_generated.h",
]
if CONFIG["MOZ_UNIFFI_FIXTURES"]:
DIRS += ["uniffi-bindgen-gecko-js/fixtures"]

Просмотреть файл

@ -0,0 +1,10 @@
[package]
name = "regex-ffi"
version = "0.1.0"
authors = ["Doug Thayer <dothayer@mozilla.com>"]
edition = "2018"
license = "MPL-2.0"
[dependencies]
regex = "1"
nsstring = { path = "../../../xpcom/rust/nsstring" }

Просмотреть файл

@ -0,0 +1,12 @@
header = """/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */"""
autogen_warning = """/* DO NOT MODIFY THIS MANUALLY! This file was generated using cbindgen. See RunCbindgen.py */
"""
include_version = true
braces = "SameLine"
line_length = 100
tab_width = 2
language = "C++"
# Put FFI calls in the `mozilla::regex::ffi` namespace.
namespaces = ["mozilla", "regex", "ffi"]

Просмотреть файл

@ -0,0 +1,42 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
pub extern crate regex;
use nsstring::nsACString;
pub use regex::{Regex, RegexBuilder};
use std::ptr;
pub struct RegexWrapper {
regex: Regex,
}
#[no_mangle]
pub extern "C" fn regex_new(pattern: &nsACString) -> *mut RegexWrapper {
let pattern = pattern.to_utf8();
let re = match RegexBuilder::new(&pattern).case_insensitive(true).build() {
Ok(re) => re,
Err(_err) => {
return ptr::null_mut();
}
};
let re = RegexWrapper { regex: re };
Box::into_raw(Box::new(re))
}
#[no_mangle]
pub unsafe extern "C" fn regex_delete(re: *mut RegexWrapper) {
drop(Box::from_raw(re));
}
#[no_mangle]
pub extern "C" fn regex_is_match(re: &RegexWrapper, text: &nsACString) -> bool {
let re = &re.regex;
re.is_match(&text.to_utf8())
}
#[no_mangle]
pub extern "C" fn regex_count_matches(re: &RegexWrapper, text: &nsACString) -> usize {
let re = &re.regex;
re.find_iter(&text.to_utf8()).count()
}

Просмотреть файл

@ -46,9 +46,3 @@ if CONFIG["CPU_ARCH"] != "x86":
if CONFIG["MOZ_BITS_DOWNLOAD"]:
RUST_TESTS += ["bits_client"]
# Export the `rure` crate's included .h file. The symbols defined in that file
# will be exported from the `gkrust-shared` crate.
EXPORTS += [
"/third_party/rust/rure/include/rure.h",
]

Просмотреть файл

@ -66,7 +66,7 @@ unic-langid = { version = "0.9", features = ["likelysubtags"] }
unic-langid-ffi = { path = "../../../../intl/locale/rust/unic-langid-ffi" }
fluent-langneg = { version = "0.13", features = ["cldr"] }
fluent-langneg-ffi = { path = "../../../../intl/locale/rust/fluent-langneg-ffi" }
rure = "0.2.2"
regex-ffi = { path = "../../../components/regex-ffi" }
rust_minidump_writer_linux = { path = "../../../crashreporter/rust_minidump_writer_linux", optional = true }
gecko-profiler = { path = "../../../../tools/profiler/rust-api"}
midir_impl = { path = "../../../../dom/midi/midir_impl", optional = true }

Просмотреть файл

@ -73,7 +73,7 @@ extern crate fluent_langneg_ffi;
extern crate fluent;
extern crate fluent_ffi;
extern crate rure;
extern crate regex_ffi;
extern crate fluent_fallback;
extern crate l10nregistry_ffi;

Просмотреть файл

@ -1978,13 +1978,6 @@ STATIC_ATOMS = [
Atom("webRequestFilterResponse_serviceWorkerScript", "webRequestFilterResponse.serviceWorkerScript"),
Atom("http", "http"),
Atom("https", "https"),
Atom("ws", "ws"),
Atom("wss", "wss"),
Atom("ftp", "ftp"),
Atom("chrome", "chrome"),
Atom("moz", "moz"),
Atom("moz_icon", "moz-icon"),
Atom("moz_gio", "moz-gio"),
Atom("proxy", "proxy"),
Atom("privateBrowsingAllowedPermission", "internal:privateBrowsingAllowed"),
Atom("svgContextPropertiesAllowedPermission", "internal:svgContextPropertiesAllowed"),

Просмотреть файл

@ -1,708 +0,0 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef mozilla_RustRegex_h
#define mozilla_RustRegex_h
#include "nsPrintfCString.h"
#include "nsTArray.h"
#include "rure.h"
#include "mozilla/Maybe.h"
#include "mozilla/UniquePtr.h"
namespace mozilla {
// This header is a thin wrapper around the `rure.h` header file, which declares
// the C API for interacting with the rust `regex` crate. This is intended to
// make the type more ergonomic to use with mozilla types.
class RustRegex;
class RustRegexSet;
class RustRegexOptions;
class RustRegexCaptures;
class RustRegexIter;
class RustRegexIterCaptureNames;
using RustRegexMatch = rure_match;
/*
* RustRegexCaptures represents storage for sub-capture locations of a match.
*
* Computing the capture groups of a match can carry a significant performance
* penalty, so their use in the API is optional.
*
* A RustRegexCaptures value may outlive its corresponding RustRegex and can be
* freed independently.
*
* It is not safe to use from multiple threads simultaneously.
*/
class RustRegexCaptures final {
public:
RustRegexCaptures() = default;
// Check if the `RustRegexCaptures` object is valid.
bool IsValid() const { return mPtr != nullptr; }
explicit operator bool() const { return IsValid(); }
/*
* CaptureAt returns Some if and only if the capturing group at the
* index given was part of the match. If so, the returned RustRegexMatch
* object contains the start and end offsets (in bytes) of the match.
*
* If no capture group with the index aIdx exists, or the group was not part
* of the match, then Nothing is returned. (A capturing group exists if and
* only if aIdx is less than Length().)
*
* Note that index 0 corresponds to the full match.
*/
Maybe<RustRegexMatch> CaptureAt(size_t aIdx) const {
RustRegexMatch match;
if (mPtr && rure_captures_at(mPtr.get(), aIdx, &match)) {
return Some(match);
}
return Nothing();
}
Maybe<RustRegexMatch> operator[](size_t aIdx) const {
return CaptureAt(aIdx);
}
/*
* Returns the number of capturing groups in this `RustRegexCaptures`.
*/
size_t Length() const { return mPtr ? rure_captures_len(mPtr.get()) : 0; }
private:
friend class RustRegex;
friend class RustRegexIter;
explicit RustRegexCaptures(rure* aRe)
: mPtr(aRe ? rure_captures_new(aRe) : nullptr) {}
struct Deleter {
void operator()(rure_captures* ptr) const { rure_captures_free(ptr); }
};
UniquePtr<rure_captures, Deleter> mPtr;
};
/*
* RustRegexIterCaptureNames is an iterator over the list of capture group names
* in this particular RustRegex.
*
* A RustRegexIterCaptureNames value may not outlive its corresponding
* RustRegex, and should be destroyed before its corresponding RustRegex is
* destroyed.
*
* It is not safe to use from multiple threads simultaneously.
*/
class RustRegexIterCaptureNames {
public:
RustRegexIterCaptureNames() = delete;
// Check if the `RustRegexIterCaptureNames` object is valid.
bool IsValid() const { return mPtr != nullptr; }
explicit operator bool() const { return IsValid(); }
/*
* Advances the iterator and returns true if and only if another capture group
* name exists.
*
* The value of the capture group name is written to the provided pointer.
*/
mozilla::Maybe<const char*> Next() {
char* next = nullptr;
if (mPtr && rure_iter_capture_names_next(mPtr.get(), &next)) {
return Some(next);
}
return Nothing();
}
private:
friend class RustRegex;
explicit RustRegexIterCaptureNames(rure* aRe)
: mPtr(aRe ? rure_iter_capture_names_new(aRe) : nullptr) {}
struct Deleter {
void operator()(rure_iter_capture_names* ptr) const {
rure_iter_capture_names_free(ptr);
}
};
UniquePtr<rure_iter_capture_names, Deleter> mPtr;
};
/*
* RustRegexIter is an iterator over successive non-overlapping matches in a
* particular haystack.
*
* A RustRegexIter value may not outlive its corresponding RustRegex and should
* be destroyed before its corresponding RustRegex is destroyed.
*
* It is not safe to use from multiple threads simultaneously.
*/
class RustRegexIter {
public:
RustRegexIter() = delete;
// Check if the `RustRegexIter` object is valid.
bool IsValid() const { return mPtr != nullptr; }
explicit operator bool() const { return IsValid(); }
/*
* Next() returns Some if and only if this regex matches anywhere in haystack.
* The returned RustRegexMatch object contains the start and end offsets (in
* bytes) of the match.
*
* If no match is found, then subsequent calls will return Nothing()
* indefinitely.
*
* Next() should be preferred to NextCaptures() since it may be faster.
*
* N.B. The performance of this search is not impacted by the presence of
* capturing groups in your regular expression.
*/
mozilla::Maybe<RustRegexMatch> Next() {
RustRegexMatch match{};
if (mPtr &&
rure_iter_next(mPtr.get(), mHaystackPtr, mHaystackSize, &match)) {
return Some(match);
}
return Nothing();
}
/*
* NextCaptures returns a valid RustRegexCaptures if and only if this regex
* matches anywhere in haystack. If a match is found, then all of its capture
* locations are stored in the returned RustRegexCaptures object.
*
* If no match is found, then subsequent calls will return an invalid
* `RustRegexCaptures` indefinitely.
*
* Only use this function if you specifically need access to capture
* locations. It is not necessary to use this function just because your
* regular expression contains capturing groups.
*
* Capture locations can be accessed using the methods on RustRegexCaptures.
*
* N.B. The performance of this search can be impacted by the number of
* capturing groups. If you're using this function, it may be beneficial to
* use non-capturing groups (e.g., `(?:re)`) where possible.
*/
RustRegexCaptures NextCaptures() {
RustRegexCaptures captures(mRe);
if (mPtr && rure_iter_next_captures(mPtr.get(), mHaystackPtr, mHaystackSize,
captures.mPtr.get())) {
return captures;
}
return {};
}
private:
friend class RustRegex;
RustRegexIter(rure* aRe, const std::string_view& aHaystack)
: mRe(aRe),
mHaystackPtr(reinterpret_cast<const uint8_t*>(aHaystack.data())),
mHaystackSize(aHaystack.size()),
mPtr(aRe ? rure_iter_new(aRe) : nullptr) {}
rure* MOZ_NON_OWNING_REF mRe;
const uint8_t* MOZ_NON_OWNING_REF mHaystackPtr;
size_t mHaystackSize;
struct Deleter {
void operator()(rure_iter* ptr) const { rure_iter_free(ptr); }
};
UniquePtr<rure_iter, Deleter> mPtr;
};
/*
* RustRegexOptions is the set of configuration options for compiling a regular
* expression.
*
* All flags on this type can be used to set default flags while compiling, and
* can be toggled in the expression itself using standard syntax, e.g. `(?i)`
* turns case-insensitive matching on, and `(?-i)` disables it.
*
* In addition, two non-flag options are available: setting the size limit of
* the compiled program and setting the size limit of the cache of states that
* the DFA uses while searching.
*
* For most uses, the default settings will work fine, and a default-constructed
* RustRegexOptions can be passed.
*/
class RustRegexOptions {
public:
RustRegexOptions() = default;
/*
* Set the value for the case insensitive (i) flag.
*
* When enabled, letters in the pattern will match both upper case and lower
* case variants.
*/
RustRegexOptions& CaseInsensitive(bool aYes) {
return SetFlag(aYes, RURE_FLAG_CASEI);
}
/*
* Set the value for the multi-line matching (m) flag.
*
* When enabled, ^ matches the beginning of lines and $ matches the end of
* lines.
*
* By default, they match beginning/end of the input.
*/
RustRegexOptions& MultiLine(bool aYes) {
return SetFlag(aYes, RURE_FLAG_MULTI);
}
/*
* Set the value for the any character (s) flag, where in . matches anything
* when s is set and matches anything except for new line when it is not set
* (the default).
*
* N.B. matches anything means any byte when Unicode is disabled and means
* any valid UTF-8 encoding of any Unicode scalar value when Unicode is
* enabled.
*/
RustRegexOptions& DotMatchesNewLine(bool aYes) {
return SetFlag(aYes, RURE_FLAG_DOTNL);
}
/*
* Set the value for the greedy swap (U) flag.
*
* When enabled, a pattern like a* is lazy (tries to find shortest match) and
* a*? is greedy (tries to find longest match).
*
* By default, a* is greedy and a*? is lazy.
*/
RustRegexOptions& SwapGreed(bool aYes) {
return SetFlag(aYes, RURE_FLAG_SWAP_GREED);
}
/*
* Set the value for the ignore whitespace (x) flag.
*
* When enabled, whitespace such as new lines and spaces will be ignored
* between expressions of the pattern, and # can be used to start a comment
* until the next new line.
*/
RustRegexOptions& IgnoreWhitespace(bool aYes) {
return SetFlag(aYes, RURE_FLAG_SPACE);
}
/*
* Set the value for the Unicode (u) flag.
*
* Enabled by default. When disabled, character classes such as \w only match
* ASCII word characters instead of all Unicode word characters.
*/
RustRegexOptions& Unicode(bool aYes) {
return SetFlag(aYes, RURE_FLAG_UNICODE);
}
/*
* SizeLimit sets the appoximate size limit of the compiled regular
* expression.
*
* This size limit roughly corresponds to the number of bytes occupied by
* a single compiled program. If the program would exceed this number,
* then an invalid RustRegex will be constructed.
*/
RustRegexOptions& SizeLimit(size_t aLimit) {
mSizeLimit = Some(aLimit);
return *this;
}
/*
* DFASizeLimit sets the approximate size of the cache used by the DFA during
* search.
*
* This roughly corresponds to the number of bytes that the DFA will use while
* searching.
*
* Note that this is a *per thread* limit. There is no way to set a global
* limit. In particular, if a regular expression is used from multiple threads
* simultaneously, then each thread may use up to the number of bytes
* specified here.
*/
RustRegexOptions& DFASizeLimit(size_t aLimit) {
mDFASizeLimit = Some(aLimit);
return *this;
}
private:
friend class RustRegex;
friend class RustRegexSet;
struct OptionsDeleter {
void operator()(rure_options* ptr) const { rure_options_free(ptr); }
};
UniquePtr<rure_options, OptionsDeleter> GetOptions() const {
UniquePtr<rure_options, OptionsDeleter> options;
if (mSizeLimit || mDFASizeLimit) {
options.reset(rure_options_new());
if (mSizeLimit) {
rure_options_size_limit(options.get(), *mSizeLimit);
}
if (mDFASizeLimit) {
rure_options_dfa_size_limit(options.get(), *mDFASizeLimit);
}
}
return options;
}
uint32_t GetFlags() const { return mFlags; }
RustRegexOptions& SetFlag(bool aYes, uint32_t aFlag) {
if (aYes) {
mFlags |= aFlag;
} else {
mFlags &= ~aFlag;
}
return *this;
}
uint32_t mFlags = RURE_DEFAULT_FLAGS;
Maybe<size_t> mSizeLimit;
Maybe<size_t> mDFASizeLimit;
};
/*
* RustRegex is the type of a compiled regular expression.
*
* A RustRegex can be safely used from multiple threads simultaneously.
*
* When calling the matching methods on this type, they will generally have the
* following parameters:
*
* aHaystack
* may contain arbitrary bytes, but ASCII compatible text is more useful.
* UTF-8 is even more useful. Other text encodings aren't supported.
*
* aStart
* the position in bytes at which to start searching. Note that setting the
* start position is distinct from using a substring for `aHaystack`, since
* the regex engine may look at bytes before the start position to determine
* match information. For example, if the start position is greater than 0,
* then the \A ("begin text") anchor can never match.
*/
class RustRegex final {
public:
// Create a new invalid RustRegex object
RustRegex() = default;
/*
* Compiles the given pattern into a regular expression. The pattern must be
* valid UTF-8 and the length corresponds to the number of bytes in the
* pattern.
*
* If an error occurs, the constructed RustRegex will be `!IsValid()`.
*
* The compiled expression returned may be used from multiple threads
* simultaneously.
*/
explicit RustRegex(const std::string_view& aPattern,
const RustRegexOptions& aOptions = {}) {
#ifdef DEBUG
rure_error* error = rure_error_new();
#else
rure_error* error = nullptr;
#endif
mPtr.reset(rure_compile(reinterpret_cast<const uint8_t*>(aPattern.data()),
aPattern.size(), aOptions.GetFlags(),
aOptions.GetOptions().get(), error));
#ifdef DEBUG
if (!mPtr) {
NS_WARNING(nsPrintfCString("RustRegex compile failed: %s",
rure_error_message(error))
.get());
}
rure_error_free(error);
#endif
}
// Check if the compiled `RustRegex` is valid.
bool IsValid() const { return mPtr != nullptr; }
explicit operator bool() const { return IsValid(); }
/*
* IsMatch returns true if and only if this regex matches anywhere in
* aHaystack.
*
* See the type-level comment for details on aHaystack and aStart.
*
* IsMatch() should be preferred to Find() since it may be faster.
*
* N.B. The performance of this search is not impacted by the presence of
* capturing groups in your regular expression.
*/
bool IsMatch(const std::string_view& aHaystack, size_t aStart = 0) const {
return mPtr &&
rure_is_match(mPtr.get(),
reinterpret_cast<const uint8_t*>(aHaystack.data()),
aHaystack.size(), aStart);
}
/*
* Find returns Some if and only if this regex matches anywhere in
* haystack. The returned RustRegexMatch object contains the start and end
* offsets (in bytes) of the match.
*
* See the type-level comment for details on aHaystack and aStart.
*
* Find() should be preferred to FindCaptures() since it may be faster.
*
* N.B. The performance of this search is not impacted by the presence of
* capturing groups in your regular expression.
*/
Maybe<RustRegexMatch> Find(const std::string_view& aHaystack,
size_t aStart = 0) const {
RustRegexMatch match{};
if (mPtr && rure_find(mPtr.get(),
reinterpret_cast<const uint8_t*>(aHaystack.data()),
aHaystack.size(), aStart, &match)) {
return Some(match);
}
return Nothing();
}
/*
* FindCaptures() returns a valid RustRegexCaptures if and only if this
* regex matches anywhere in haystack. If a match is found, then all of its
* capture locations are stored in the returned RustRegexCaptures object.
*
* See the type-level comment for details on aHaystack and aStart.
*
* Only use this function if you specifically need access to capture
* locations. It is not necessary to use this function just because your
* regular expression contains capturing groups.
*
* Capture locations can be accessed using the methods on RustRegexCaptures.
*
* N.B. The performance of this search can be impacted by the number of
* capturing groups. If you're using this function, it may be beneficial to
* use non-capturing groups (e.g., `(?:re)`) where possible.
*/
RustRegexCaptures FindCaptures(const std::string_view& aHaystack,
size_t aStart = 0) const {
RustRegexCaptures captures(mPtr.get());
if (mPtr &&
rure_find_captures(mPtr.get(),
reinterpret_cast<const uint8_t*>(aHaystack.data()),
aHaystack.size(), aStart, captures.mPtr.get())) {
return captures;
}
return {};
}
/*
* ShortestMatch() returns Some if and only if this regex matches anywhere
* in haystack. If a match is found, then its end location is stored in the
* pointer given. The end location is the place at which the regex engine
* determined that a match exists, but may occur before the end of the
* proper leftmost-first match.
*
* See the type-level comment for details on aHaystack and aStart.
*
* ShortestMatch should be preferred to Find since it may be faster.
*
* N.B. The performance of this search is not impacted by the presence of
* capturing groups in your regular expression.
*/
Maybe<size_t> ShortestMatch(const std::string_view& aHaystack,
size_t aStart = 0) const {
size_t end = 0;
if (mPtr &&
rure_shortest_match(mPtr.get(),
reinterpret_cast<const uint8_t*>(aHaystack.data()),
aHaystack.size(), aStart, &end)) {
return Some(end);
}
return Nothing();
}
/*
* Create an iterator over all successive non-overlapping matches of this
* regex in aHaystack.
*
* See the type-level comment for details on aHaystack.
*
* Both aHaystack and this regex must remain valid until the returned
* `RustRegexIter` is destroyed.
*/
RustRegexIter IterMatches(const std::string_view& aHaystack) const {
return RustRegexIter(mPtr.get(), aHaystack);
}
/*
* Returns the capture index for the name given. If no such named capturing
* group exists in this regex, then -1 is returned.
*
* The capture index may be used with RustRegexCaptures::CaptureAt.
*
* This function never returns 0 since the first capture group always
* corresponds to the entire match and is always unnamed.
*/
int32_t CaptureNameIndex(const char* aName) const {
return mPtr ? rure_capture_name_index(mPtr.get(), aName) : -1;
}
/*
* Create an iterator over the list of capture group names in this particular
* regex.
*
* This regex must remain valid until the returned `RustRegexIterCaptureNames`
* is destroyed.
*/
RustRegexIterCaptureNames IterCaptureNames() const {
return RustRegexIterCaptureNames(mPtr.get());
}
/*
* Count the number of successive non-overlapping matches of this regex in
* aHaystack.
*
* See the type-level comment for details on aHaystack.
*/
size_t CountMatches(const std::string_view& aHaystack) const {
size_t count = 0;
auto iter = IterMatches(aHaystack);
while (iter.Next()) {
count++;
}
return count;
}
private:
struct Deleter {
void operator()(rure* ptr) const { rure_free(ptr); }
};
UniquePtr<rure, Deleter> mPtr;
};
/*
* RustRegexSet is the type of a set of compiled regular expression.
*
* A RustRegexSet can be safely used from multiple threads simultaneously.
*
* When calling the matching methods on this type, they will generally have the
* following parameters:
*
* aHaystack
* may contain arbitrary bytes, but ASCII compatible text is more useful.
* UTF-8 is even more useful. Other text encodings aren't supported.
*
* aStart
* the position in bytes at which to start searching. Note that setting the
* start position is distinct from using a substring for `aHaystack`, since
* the regex engine may look at bytes before the start position to determine
* match information. For example, if the start position is greater than 0,
* then the \A ("begin text") anchor can never match.
*/
class RustRegexSet final {
public:
/*
* Compiles the given range of patterns into a single regular expression which
* can be matched in a linear-scan. Each pattern in aPatterns must be valid
* UTF-8, and implicitly coerce to `std::string_view`.
*
* If an error occurs, the constructed RustRegexSet will be `!IsValid()`.
*
* The compiled expression returned may be used from multiple threads
* simultaneously.
*/
template <typename Patterns>
explicit RustRegexSet(Patterns&& aPatterns,
const RustRegexOptions& aOptions = {}) {
#ifdef DEBUG
rure_error* error = rure_error_new();
#else
rure_error* error = nullptr;
#endif
AutoTArray<const uint8_t*, 4> patternPtrs;
AutoTArray<size_t, 4> patternSizes;
for (auto&& pattern : std::forward<Patterns>(aPatterns)) {
std::string_view view = pattern;
patternPtrs.AppendElement(
reinterpret_cast<const uint8_t*>(pattern.data()));
patternSizes.AppendElement(pattern.size());
}
mPtr.reset(rure_compile_set(patternPtrs.Elements(), patternSizes.Elements(),
patternPtrs.Length(), aOptions.GetFlags(),
aOptions.GetOptions().get(), error));
#ifdef DEBUG
if (!mPtr) {
NS_WARNING(nsPrintfCString("RustRegexSet compile failed: %s",
rure_error_message(error))
.get());
}
rure_error_free(error);
#endif
}
// Check if the `RustRegexSet` object is valid.
bool IsValid() const { return mPtr != nullptr; }
explicit operator bool() const { return IsValid(); }
/*
* IsMatch returns true if and only if any regexes within the set
* match anywhere in the haystack. Once a match has been located, the
* matching engine will quit immediately.
*
* See the type-level comment for details on aHaystack and aStart.
*/
bool IsMatch(const std::string_view& aHaystack, size_t aStart = 0) const {
return mPtr &&
rure_set_is_match(mPtr.get(),
reinterpret_cast<const uint8_t*>(aHaystack.data()),
aHaystack.size(), aStart);
}
struct SetMatches {
bool matchedAny = false;
nsTArray<bool> matches;
};
/*
* Matches() compares each regex in the set against the haystack and
* returns a list with the match result of each pattern. Match results are
* ordered in the same way as the regex set was compiled. For example, index 0
* of matches corresponds to the first pattern passed to the constructor.
*
* See the type-level comment for details on aHaystack and aStart.
*
* Only use this function if you specifically need to know which regexes
* matched within the set. To determine if any of the regexes matched without
* caring which, use IsMatch.
*/
SetMatches Matches(const std::string_view& aHaystack,
size_t aStart = 0) const {
nsTArray<bool> matches;
matches.SetLength(Length());
bool any = mPtr && rure_set_matches(
mPtr.get(),
reinterpret_cast<const uint8_t*>(aHaystack.data()),
aHaystack.size(), aStart, matches.Elements());
return SetMatches{any, std::move(matches)};
}
/*
* Returns the number of patterns the regex set was compiled with.
*/
size_t Length() const { return mPtr ? rure_set_len(mPtr.get()) : 0; }
private:
struct Deleter {
void operator()(rure_set* ptr) const { rure_set_free(ptr); }
};
UniquePtr<rure_set, Deleter> mPtr;
};
} // namespace mozilla
#endif // mozilla_RustRegex_h

Просмотреть файл

@ -35,10 +35,6 @@ EXPORTS += [
"nsUTF8Utils.h",
]
EXPORTS.mozilla += [
"RustRegex.h",
]
UNIFIED_SOURCES += [
"nsASCIIMask.cpp",
"nsReadableUtils.cpp",

Просмотреть файл

@ -1,181 +0,0 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "gtest/gtest.h"
#include "mozilla/RustRegex.h"
// This file is adapted from the test.c file in the `rure` crate, but modified
// to use gtest and the `RustRegex` wrapper.
namespace mozilla {
TEST(TestRustRegex, IsMatch)
{
RustRegex re("\\p{So}$");
ASSERT_TRUE(re.IsValid());
ASSERT_TRUE(re.IsMatch("snowman: \xE2\x98\x83"));
}
TEST(TestRustRegex, ShortestMatch)
{
RustRegex re("a+");
ASSERT_TRUE(re.IsValid());
Maybe<size_t> match = re.ShortestMatch("aaaaa");
ASSERT_TRUE(match);
EXPECT_EQ(*match, 1u);
}
TEST(TestRustRegex, Find)
{
RustRegex re("\\p{So}$");
ASSERT_TRUE(re.IsValid());
auto match = re.Find("snowman: \xE2\x98\x83");
ASSERT_TRUE(match);
EXPECT_EQ(match->start, 9u);
EXPECT_EQ(match->end, 12u);
}
TEST(TestRustRegex, Captures)
{
RustRegex re(".(.*(?P<snowman>\\p{So}))$");
ASSERT_TRUE(re);
auto captures = re.FindCaptures("snowman: \xE2\x98\x83");
ASSERT_TRUE(captures);
EXPECT_EQ(captures.Length(), 3u);
EXPECT_EQ(re.CaptureNameIndex("snowman"), 2);
auto match = captures[2];
ASSERT_TRUE(match);
EXPECT_EQ(match->start, 9u);
EXPECT_EQ(match->end, 12u);
}
TEST(TestRustRegex, Iter)
{
RustRegex re("\\w+(\\w)");
ASSERT_TRUE(re);
auto it = re.IterMatches("abc xyz");
ASSERT_TRUE(it);
auto match = it.Next();
ASSERT_TRUE(match);
EXPECT_EQ(match->start, 0u);
EXPECT_EQ(match->end, 3u);
auto captures = it.NextCaptures();
ASSERT_TRUE(captures);
auto capture = captures[1];
ASSERT_TRUE(capture);
EXPECT_EQ(capture->start, 6u);
EXPECT_EQ(capture->end, 7u);
}
TEST(TestRustRegex, IterCaptureNames)
{
RustRegex re("(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})");
ASSERT_TRUE(re);
auto it = re.IterCaptureNames();
Maybe<const char*> result = it.Next();
ASSERT_TRUE(result.isSome());
EXPECT_STREQ(*result, "");
result = it.Next();
ASSERT_TRUE(result.isSome());
EXPECT_STREQ(*result, "year");
result = it.Next();
ASSERT_TRUE(result.isSome());
EXPECT_STREQ(*result, "month");
result = it.Next();
ASSERT_TRUE(result.isSome());
EXPECT_STREQ(*result, "day");
result = it.Next();
ASSERT_TRUE(result.isNothing());
}
/*
* This tests whether we can set the flags correctly. In this case, we disable
* all flags, which includes disabling Unicode mode. When we disable Unicode
* mode, we can match arbitrary possibly invalid UTF-8 bytes, such as \xFF.
* (When Unicode mode is enabled, \xFF won't match .)
*/
TEST(TestRustRegex, Flags)
{
{
RustRegex re(".");
ASSERT_TRUE(re);
ASSERT_FALSE(re.IsMatch("\xFF"));
}
{
RustRegex re(".", RustRegexOptions().Unicode(false));
ASSERT_TRUE(re);
ASSERT_TRUE(re.IsMatch("\xFF"));
}
}
TEST(TestRustRegex, CompileErrorSizeLimit)
{
RustRegex re("\\w{100}", RustRegexOptions().SizeLimit(0));
EXPECT_FALSE(re);
}
TEST(TestRustRegex, SetMatches)
{
RustRegexSet set(nsTArray<std::string_view>{"foo", "barfoo", "\\w+", "\\d+",
"foobar", "bar"});
ASSERT_TRUE(set);
EXPECT_EQ(set.Length(), 6u);
EXPECT_TRUE(set.IsMatch("foobar"));
EXPECT_FALSE(set.IsMatch(""));
auto matches = set.Matches("foobar");
EXPECT_TRUE(matches.matchedAny);
EXPECT_EQ(matches.matches.Length(), 6u);
nsTArray<bool> expectedMatches{true, false, true, false, true, true};
EXPECT_EQ(matches.matches, expectedMatches);
}
TEST(TestRustRegex, SetMatchStart)
{
RustRegexSet re(nsTArray<std::string_view>{"foo", "bar", "fooo"});
EXPECT_TRUE(re);
EXPECT_EQ(re.Length(), 3u);
EXPECT_FALSE(re.IsMatch("foobiasdr", 2));
{
auto matches = re.Matches("fooobar");
EXPECT_TRUE(matches.matchedAny);
nsTArray<bool> expectedMatches{true, true, true};
EXPECT_EQ(matches.matches, expectedMatches);
}
{
auto matches = re.Matches("fooobar", 1);
EXPECT_TRUE(matches.matchedAny);
nsTArray<bool> expectedMatches{false, true, false};
EXPECT_EQ(matches.matches, expectedMatches);
}
}
TEST(TestRustRegex, RegexSetOptions)
{
RustRegexSet re(nsTArray<std::string_view>{"\\w{100}"},
RustRegexOptions().SizeLimit(0));
EXPECT_FALSE(re);
}
} // namespace mozilla

Просмотреть файл

@ -45,7 +45,6 @@ UNIFIED_SOURCES += [
"TestQueue.cpp",
"TestRacingServiceManager.cpp",
"TestRecursiveMutex.cpp",
"TestRustRegex.cpp",
"TestRWLock.cpp",
"TestSegmentedBuffer.cpp",
"TestSlicedInputStream.cpp",