Bug 1857742 - patch 2 - Use oxilangtag rather than unic_langid to parse lang tags for nsStyleUtil::LangTagCompare. r=layout-reviewers,dholbert,TYLin

Differential Revision: https://phabricator.services.mozilla.com/D193892
This commit is contained in:
Jonathan Kew 2023-11-18 10:36:01 +00:00
Родитель 94a3c09204
Коммит 20b8ef07ee
8 изменённых файлов: 172 добавлений и 89 удалений

9
Cargo.lock сгенерированный
Просмотреть файл

@ -2208,6 +2208,7 @@ dependencies = [
"oblivious_http",
"origin-trials-ffi",
"oxilangtag",
"oxilangtag-ffi",
"prefs_parser",
"processtools",
"profiler_helper",
@ -4139,6 +4140,14 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d91edf4fbb970279443471345a4e8c491bf05bb283b3e6c88e4e606fd8c181b"
[[package]]
name = "oxilangtag-ffi"
version = "0.1.0"
dependencies = [
"nsstring",
"oxilangtag",
]
[[package]]
name = "packed_simd"
version = "0.3.9"

Просмотреть файл

@ -83,11 +83,15 @@ if CONFIG["COMPILE_ENVIRONMENT"]:
"fluent_langneg_ffi_generated.h",
inputs=["/intl/locale/rust/fluent-langneg-ffi"],
)
CbindgenHeader(
"oxilangtag_ffi_generated.h", inputs=["/intl/locale/rust/oxilangtag-ffi"]
)
CbindgenHeader(
"unic_langid_ffi_generated.h", inputs=["/intl/locale/rust/unic-langid-ffi"]
)
EXPORTS.mozilla.intl += [
"!fluent_langneg_ffi_generated.h",
"!oxilangtag_ffi_generated.h",
"!unic_langid_ffi_generated.h",
]

Просмотреть файл

@ -0,0 +1,10 @@
[package]
name = "oxilangtag-ffi"
version = "0.1.0"
license = "MPL-2.0"
authors = ["Jonathan Kew <jkew@mozilla.com>"]
edition = "2021"
[dependencies]
nsstring = { path = "../../../../xpcom/rust/nsstring" }
oxilangtag = "0.1.3"

Просмотреть файл

@ -0,0 +1,15 @@
header = """/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */"""
autogen_warning = """/* DO NOT MODIFY THIS MANUALLY! This file was generated using cbindgen. See RunCbindgen.py */
"""
include_version = true
braces = "SameLine"
line_length = 100
tab_width = 2
language = "C++"
namespaces = ["mozilla", "intl", "ffi"]
[parse]
parse_deps = true
include = ["oxilangtag"]

Просмотреть файл

@ -0,0 +1,77 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
use nsstring::nsACString;
use oxilangtag::LanguageTag;
pub struct LangTag; // Opaque type for ffi interface.
/// Parse a string as a BCP47 language tag. Returns a `LangTag` object if the string is
/// successfully parsed; this must be freed with `lang_tag_destroy`.
///
/// The string `tag` must outlive the `LangTag`.
///
/// Returns null if `tag` is not a well-formed BCP47 tag (including if it is not
/// valid UTF-8).
#[no_mangle]
pub extern "C" fn lang_tag_new(tag: &nsACString) -> *mut LangTag {
if let Ok(tag_str) = core::str::from_utf8(tag.as_ref()) {
if let Ok(language_tag) = LanguageTag::parse(tag_str) {
return Box::into_raw(Box::new(language_tag)) as *mut LangTag;
}
}
std::ptr::null_mut()
}
/// Free a `LangTag` instance.
#[no_mangle]
pub extern "C" fn lang_tag_destroy(lang: *mut LangTag) {
if lang.is_null() {
return;
}
let _ = unsafe { Box::from_raw(lang as *mut LanguageTag<&str>) };
}
/// Matches an HTML language attribute against a CSS :lang() selector.
/// The attribute is a normal language tag; the selector is a language range,
/// with `und` representing a "wildcard" primary language.
/// (Based on LanguageTag::matches from the rust-language-tags crate,
/// adapted to this specific use case.)
#[no_mangle]
pub extern "C" fn lang_tag_matches(attribute: *const LangTag, selector: *const LangTag) -> bool {
let lang = unsafe { *(attribute as *const LanguageTag<&str>) };
let range = unsafe { *(selector as *const LanguageTag<&str>) };
fn matches_option(a: Option<&str>, b: Option<&str>) -> bool {
match (a, b) {
(Some(a), Some(b)) => a.eq_ignore_ascii_case(b),
(_, None) => true,
(None, _) => false,
}
}
fn matches_iter<'a>(
a: impl Iterator<Item = &'a str>,
b: impl Iterator<Item = &'a str>,
) -> bool {
a.zip(b).all(|(x, y)| x.eq_ignore_ascii_case(y))
}
if !(lang
.primary_language()
.eq_ignore_ascii_case(range.primary_language())
|| range.primary_language().eq_ignore_ascii_case("und"))
{
return false;
}
matches_option(lang.script(), range.script())
&& matches_option(lang.region(), range.region())
&& matches_iter(lang.variant_subtags(), range.variant_subtags())
&& matches_iter(
lang.extended_language_subtags(),
range.extended_language_subtags(),
)
&& matches_option(lang.private_use(), range.private_use())
}

Просмотреть файл

@ -10,6 +10,7 @@
#include "mozilla/dom/Document.h"
#include "mozilla/ExpandedPrincipal.h"
#include "mozilla/intl/MozLocaleBindings.h"
#include "mozilla/intl/oxilangtag_ffi_generated.h"
#include "mozilla/TextUtils.h"
#include "nsIContent.h"
#include "nsCSSProps.h"
@ -55,109 +56,73 @@ bool nsStyleUtil::DashMatchCompare(const nsAString& aAttributeValue,
bool nsStyleUtil::LangTagCompare(const nsACString& aAttributeValue,
const nsACString& aSelectorValue) {
class AutoLangId {
public:
AutoLangId() = delete;
AutoLangId(const AutoLangId& aOther) = delete;
explicit AutoLangId(const nsACString& aLangTag) : mIsValid(false) {
mLangId = intl::ffi::unic_langid_new(&aLangTag, &mIsValid);
}
~AutoLangId() { intl::ffi::unic_langid_destroy(mLangId); }
operator intl::ffi::LanguageIdentifier*() const { return mLangId; }
bool IsValid() const { return mIsValid; }
void Reset(const nsACString& aLangTag) {
intl::ffi::unic_langid_destroy(mLangId);
mLangId = intl::ffi::unic_langid_new(&aLangTag, &mIsValid);
}
private:
intl::ffi::LanguageIdentifier* mLangId;
bool mIsValid;
};
if (aAttributeValue.IsEmpty() || aSelectorValue.IsEmpty()) {
return false;
}
int32_t attrPriv = -1;
AutoLangId attrLangId(aAttributeValue);
if (!attrLangId.IsValid()) {
// If it was invalid due to private subtags, try stripping them and
// re-parsing what remains.
attrPriv = aAttributeValue.LowerCaseFindASCII("-x-");
if (attrPriv >= 0) {
nsAutoCString temp(aAttributeValue);
temp.Truncate(attrPriv);
attrLangId.Reset(temp);
if (!attrLangId.IsValid()) {
return false;
class MOZ_RAII AutoLangTag final {
public:
AutoLangTag() = delete;
AutoLangTag(const AutoLangTag& aOther) = delete;
explicit AutoLangTag(const nsACString& aLangTag) {
mLangTag = intl::ffi::lang_tag_new(&aLangTag);
}
~AutoLangTag() {
if (mLangTag) {
intl::ffi::lang_tag_destroy(mLangTag);
}
} else {
return false;
}
bool IsValid() const { return mLangTag; }
operator intl::ffi::LangTag*() const { return mLangTag; }
void Reset(const nsACString& aLangTag) {
if (mLangTag) {
intl::ffi::lang_tag_destroy(mLangTag);
}
mLangTag = intl::ffi::lang_tag_new(&aLangTag);
}
private:
intl::ffi::LangTag* mLangTag = nullptr;
};
AutoLangTag langAttr(aAttributeValue);
// Non-BCP47 extension: recognize '_' as an alternative subtag delimiter.
nsAutoCString attrTemp;
if (!langAttr.IsValid()) {
if (aAttributeValue.Contains('_')) {
attrTemp = aAttributeValue;
attrTemp.ReplaceChar('_', '-');
langAttr.Reset(attrTemp);
}
}
int32_t selPriv = -1;
AutoLangId selectorId(aSelectorValue);
if (!selectorId.IsValid()) {
// If it was "invalid" because of a wildcard language subtag, replace that
// with 'und' and try again.
// XXX Should unic_langid_new handle the wildcard internally?
bool wildcard = false;
if (aSelectorValue[0] == '*') {
wildcard = true;
nsAutoCString temp(aSelectorValue);
temp.Replace(0, 1, "und");
selectorId.Reset(temp);
if (selectorId.IsValid()) {
intl::ffi::unic_langid_clear_language(selectorId);
}
}
// If it was invalid due to private subtags, try stripping them.
if (!selectorId.IsValid()) {
selPriv = aSelectorValue.LowerCaseFindASCII("-x-");
if (selPriv >= 0) {
nsAutoCString temp(aSelectorValue);
temp.Truncate(selPriv);
// Also do the wildcard replacement if necessary.
if (wildcard) {
temp.Replace(0, 1, "und");
}
selectorId.Reset(temp);
if (!selectorId.IsValid()) {
return false;
}
if (wildcard) {
intl::ffi::unic_langid_clear_language(selectorId);
}
} else {
return false;
}
}
}
if (!intl::ffi::unic_langid_matches(attrLangId, selectorId,
/* match addrLangId as range */ false,
/* match selectorId as range */ true)) {
if (!langAttr.IsValid()) {
return false;
}
// If the selector included private subtags, we also require them to match.
// However, if the attribute has private subtags but the selector doesn't,
// they are ignored; the selector still matches the (non-private) subtags in
// the attribute.
if (selPriv >= 0) {
if (attrPriv < 0) {
return false;
AutoLangTag selector(aSelectorValue);
// If `selector` was invalid because of a wildcard language subtag (and/or
// an underscore), replace wildcard with 'und' (and underscore with hyphen)
// and try again.
nsAutoCString selectorTemp;
if (!selector.IsValid()) {
bool wildcard = aSelectorValue[0] == '*';
if (wildcard || aSelectorValue.Contains('_')) {
selectorTemp = aSelectorValue;
selectorTemp.ReplaceChar('_', '-');
if (wildcard) {
selectorTemp.Replace(0, 1, "und");
}
selector.Reset(selectorTemp);
}
return Substring(aAttributeValue, attrPriv)
.EqualsIgnoreCase(Substring(aSelectorValue, selPriv));
}
return true;
return selector.IsValid() && intl::ffi::lang_tag_matches(langAttr, selector);
}
bool nsStyleUtil::ValueIncludes(const nsAString& aValueList,

Просмотреть файл

@ -57,6 +57,7 @@ unic-langid-ffi = { path = "../../../../intl/locale/rust/unic-langid-ffi" }
fluent-langneg = { version = "0.13", features = ["cldr"] }
fluent-langneg-ffi = { path = "../../../../intl/locale/rust/fluent-langneg-ffi" }
oxilangtag = "0.1.3"
oxilangtag-ffi = { path = "../../../../intl/locale/rust/oxilangtag-ffi" }
rure = "0.2.2"
rust_minidump_writer_linux = { path = "../../../crashreporter/rust_minidump_writer_linux", optional = true }
mozannotation_client = { path = "../../../crashreporter/mozannotation_client", optional = true }

Просмотреть файл

@ -87,6 +87,8 @@ extern crate fluent_langneg_ffi;
extern crate fluent;
extern crate fluent_ffi;
extern crate oxilangtag_ffi;
extern crate rure;
extern crate fluent_fallback;