From d5b9be12ce66ab95be0d44988cc159a5c3a7ac6d Mon Sep 17 00:00:00 2001 From: Henri Sivonen Date: Mon, 15 Jun 2020 15:32:21 +0000 Subject: [PATCH] Bug 1603712 - Remove intl.charset.detector.ng.enabled pref and resulting dead code. r=Gijs,fluent-reviewers,valentin,m_kato Differential Revision: https://phabricator.services.mozilla.com/D79101 --- .../customizableui/CustomizableWidgets.jsm | 60 +---- .../customizableui/content/panelUI.inc.xhtml | 6 - .../browser_967000_button_charEncoding.js | 13 +- .../components/preferences/dialogs/fonts.js | 5 - .../preferences/dialogs/fonts.xhtml | 34 --- browser/components/preferences/main.inc.xhtml | 19 -- browser/components/preferences/main.js | 9 - .../en-US/browser/preferences/fonts.ftl | 44 ---- build/pgo/server-locations.txt | 2 + build/win32/orderfile.txt | 2 - build/win64/orderfile.txt | 2 - dom/base/test/test_bug466409.html | 15 +- dom/encoding/FallbackEncoding.cpp | 171 ------------- dom/encoding/FallbackEncoding.h | 92 ------- dom/encoding/domainsfallbacks.properties | 167 ------------- dom/encoding/encodings2arrays.py | 26 -- dom/encoding/labelsencodings.properties | 226 ------------------ dom/encoding/localesfallbacks.properties | 72 ------ dom/encoding/moz.build | 19 -- .../nonparticipatingdomains.properties | 51 ---- dom/encoding/test/file_in_lk_TLD.html | 9 + dom/encoding/test/mochitest.ini | 2 + dom/encoding/test/test_in_lk_TLD.html | 55 +++++ dom/html/nsHTMLDocument.cpp | 73 +----- .../tests/CharsetDetectionTests.js | 45 +--- layout/build/nsLayoutStatics.cpp | 4 - modules/libpref/init/StaticPrefList.yaml | 19 -- modules/libpref/init/all.js | 2 - .../converters/nsDirIndexParser.cpp | 63 ++++- parser/html/nsHtml5StreamParser.cpp | 59 ++--- .../chrome/global/charsetMenu.properties | 12 - .../en-US/chrome/global/intl.properties | 8 - toolkit/modules/CharsetMenu.jsm | 85 +------ tools/lint/file-whitespace.yml | 1 - 34 files changed, 174 insertions(+), 1298 deletions(-) delete mode 100644 dom/encoding/FallbackEncoding.cpp delete mode 100644 dom/encoding/FallbackEncoding.h delete mode 100644 dom/encoding/domainsfallbacks.properties delete mode 100644 dom/encoding/encodings2arrays.py delete mode 100644 dom/encoding/labelsencodings.properties delete mode 100644 dom/encoding/localesfallbacks.properties delete mode 100644 dom/encoding/nonparticipatingdomains.properties create mode 100644 dom/encoding/test/file_in_lk_TLD.html create mode 100644 dom/encoding/test/test_in_lk_TLD.html diff --git a/browser/components/customizableui/CustomizableWidgets.jsm b/browser/components/customizableui/CustomizableWidgets.jsm index 7d6274ab1cdf..b0b1ea9cf3e4 100644 --- a/browser/components/customizableui/CustomizableWidgets.jsm +++ b/browser/components/customizableui/CustomizableWidgets.jsm @@ -28,11 +28,6 @@ XPCOMUtils.defineLazyModuleGetters(this, { SyncedTabs: "resource://services-sync/SyncedTabs.jsm", }); -XPCOMUtils.defineLazyGetter(this, "CharsetBundle", function() { - const kCharsetBundle = "chrome://global/locale/charsetMenu.properties"; - return Services.strings.createBundle(kCharsetBundle); -}); - const kPrefCustomizationDebug = "browser.uiCustomization.debug"; XPCOMUtils.defineLazyGetter(this, "log", () => { @@ -439,29 +434,6 @@ const CustomizableWidgets = [ this._updateElements(elements, currentCharset); }, - updateCurrentDetector(aDocument) { - let detectorContainer = aDocument.getElementById( - "PanelUI-characterEncodingView-autodetect" - ); - let currentDetector; - try { - currentDetector = Services.prefs.getComplexValue( - "intl.charset.detector", - Ci.nsIPrefLocalizedString - ).data; - } catch (e) {} - - this._updateElements(detectorContainer.children, currentDetector); - let hideDetector = Services.prefs.getBoolPref( - "intl.charset.detector.ng.enabled" - ); - aDocument.getElementById( - "PanelUI-characterEncodingView-autodetect-container" - ).hidden = hideDetector; - aDocument.getElementById( - "PanelUI-characterEncodingView-autodetect-separator" - ).hidden = hideDetector; - }, _updateElements(aElements, aCurrentItem) { if (!aElements.length) { return; @@ -486,11 +458,10 @@ const CustomizableWidgets = [ } let document = aEvent.target.ownerDocument; - let autoDetectLabelId = "PanelUI-characterEncodingView-autodetect-label"; - let autoDetectLabel = document.getElementById(autoDetectLabelId); - if (!autoDetectLabel.hasAttribute("value")) { - let label = CharsetBundle.GetStringFromName("charsetMenuAutodet"); - autoDetectLabel.setAttribute("value", label); + if ( + !document.getElementById("PanelUI-characterEncodingView-pinned") + .firstChild + ) { this.populateList( document, "PanelUI-characterEncodingView-pinned", @@ -501,13 +472,8 @@ const CustomizableWidgets = [ "PanelUI-characterEncodingView-charsets", "otherCharsets" ); - this.populateList( - document, - "PanelUI-characterEncodingView-autodetect", - "detectors" - ); } - this.updateCurrentDetector(document); + this.updateCurrentCharset(document); }, onCommand(aEvent) { @@ -517,23 +483,9 @@ const CustomizableWidgets = [ } let window = node.ownerGlobal; - let section = node.section; let value = node.value; - // The behavior as implemented here is directly based off of the - // `MultiplexHandler()` method in browser.js. - if (section != "detectors") { - window.BrowserSetForcedCharacterSet(value); - } else { - // Set the detector pref. - try { - Services.prefs.setStringPref("intl.charset.detector", value); - } catch (e) { - Cu.reportError("Failed to set the intl.charset.detector preference."); - } - // Prepare a browser page reload with a changed charset. - window.BrowserCharsetReload(); - } + window.BrowserSetForcedCharacterSet(value); }, onCreated(aNode) { let document = aNode.ownerDocument; diff --git a/browser/components/customizableui/content/panelUI.inc.xhtml b/browser/components/customizableui/content/panelUI.inc.xhtml index 8a3339ff07be..cd565bfbaf94 100644 --- a/browser/components/customizableui/content/panelUI.inc.xhtml +++ b/browser/components/customizableui/content/panelUI.inc.xhtml @@ -755,12 +755,6 @@ - - - diff --git a/browser/components/customizableui/test/browser_967000_button_charEncoding.js b/browser/components/customizableui/test/browser_967000_button_charEncoding.js index d62a84a11246..d6060792918b 100644 --- a/browser/components/customizableui/test/browser_967000_button_charEncoding.js +++ b/browser/components/customizableui/test/browser_967000_button_charEncoding.js @@ -74,23 +74,12 @@ add_task(async function() { let checkedButtons = characterEncodingView.querySelectorAll( "toolbarbutton[checked='true']" ); - is( - checkedButtons.length, - 2, - "There should be 2 checked items (1 charset, 1 detector)." - ); + is(checkedButtons.length, 1, "There should be 1 checked item."); is( checkedButtons[0].getAttribute("label"), "Western", "The western encoding is correctly selected" ); - is( - characterEncodingView.querySelectorAll( - "#PanelUI-characterEncodingView-autodetect toolbarbutton[checked='true']" - ).length, - 1, - "There should be 1 checked detector." - ); panelHidePromise = promiseOverflowHidden(window); document.getElementById("nav-bar").overflowable._panel.hidePopup(); diff --git a/browser/components/preferences/dialogs/fonts.js b/browser/components/preferences/dialogs/fonts.js index e293f48226d4..e112d4321532 100644 --- a/browser/components/preferences/dialogs/fonts.js +++ b/browser/components/preferences/dialogs/fonts.js @@ -27,13 +27,8 @@ window.addEventListener("load", () => gFontsDialog.onLoad()); Preferences.addAll([ { id: "font.language.group", type: "wstring" }, { id: "browser.display.use_document_fonts", type: "int" }, - { id: "intl.charset.fallback.override", type: "string" }, ]); -document.getElementById("FallbackGroupbox").hidden = Services.prefs.getBoolPref( - "intl.charset.detector.ng.enabled" -); - var gFontsDialog = { _selectLanguageGroupPromise: Promise.resolve(), diff --git a/browser/components/preferences/dialogs/fonts.xhtml b/browser/components/preferences/dialogs/fonts.xhtml index 862bbf227d9f..25743fde3b95 100644 --- a/browser/components/preferences/dialogs/fonts.xhtml +++ b/browser/components/preferences/dialogs/fonts.xhtml @@ -233,40 +233,6 @@ - - - - - - - - diff --git a/dom/encoding/FallbackEncoding.cpp b/dom/encoding/FallbackEncoding.cpp deleted file mode 100644 index 5af02c1d40f8..000000000000 --- a/dom/encoding/FallbackEncoding.cpp +++ /dev/null @@ -1,171 +0,0 @@ -/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* vim: set ts=8 sts=2 et sw=2 tw=80: */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#include "mozilla/dom/FallbackEncoding.h" - -#include "mozilla/ArrayUtils.h" -#include "mozilla/Encoding.h" -#include "mozilla/intl/LocaleService.h" -#include "mozilla/Preferences.h" -#include "mozilla/Services.h" -#include "nsIObserverService.h" -#include "nsUConvPropertySearch.h" - -using mozilla::intl::LocaleService; - -namespace mozilla { -namespace dom { - -struct EncodingProp { - const char* const mKey; - NotNull mValue; -}; - -template -static NotNull SearchEncodingProp( - const EncodingProp (&aProperties)[N], const nsACString& aKey) { - const nsCString& flat = PromiseFlatCString(aKey); - size_t index; - if (!BinarySearchIf( - aProperties, 0, ArrayLength(aProperties), - [&flat](const EncodingProp& aProperty) { - return flat.Compare(aProperty.mKey); - }, - &index)) { - return WINDOWS_1252_ENCODING; - } - return aProperties[index].mValue; -} - -static const EncodingProp localesFallbacks[] = { -#include "localesfallbacks.properties.h" -}; - -static const EncodingProp domainsFallbacks[] = { -#include "domainsfallbacks.properties.h" -}; - -static constexpr nsUConvProp nonParticipatingDomains[] = { -#include "nonparticipatingdomains.properties.h" -}; - -NS_IMPL_ISUPPORTS(FallbackEncoding, nsIObserver) - -StaticRefPtr FallbackEncoding::sInstance; - -FallbackEncoding::FallbackEncoding() : mFallback(nullptr) { - MOZ_ASSERT(!FallbackEncoding::sInstance, "Singleton already exists."); -} - -NotNull FallbackEncoding::Get() { - if (mFallback) { - return WrapNotNull(mFallback); - } - - nsAutoCString override; - Preferences::GetCString("intl.charset.fallback.override", override); - // Don't let the user break things by setting the override to unreasonable - // values via about:config - auto encoding = Encoding::ForLabel(override); - if (!encoding || !encoding->IsAsciiCompatible() || - encoding == UTF_8_ENCODING) { - mFallback = nullptr; - } else { - mFallback = encoding; - } - - if (mFallback) { - return WrapNotNull(mFallback); - } - - nsAutoCString locale; - LocaleService::GetInstance()->GetAppLocaleAsBCP47(locale); - - // Let's lower case the string just in case unofficial language packs - // don't stick to conventions. - ToLowerCase(locale); // ASCII lowercasing with CString input! - - // Special case Traditional Chinese before throwing away stuff after the - // language itself. Today we only ship zh-TW, but be defensive about - // possible future values. - if (locale.EqualsLiteral("zh-tw") || locale.EqualsLiteral("zh-hk") || - locale.EqualsLiteral("zh-mo") || locale.EqualsLiteral("zh-hant")) { - mFallback = BIG5_ENCODING; - return WrapNotNull(mFallback); - } - - // Throw away regions and other variants to accommodate weird stuff seen - // in telemetry--apparently unofficial language packs. - int32_t index = locale.FindChar('-'); - if (index >= 0) { - locale.Truncate(index); - } - - auto fallback = SearchEncodingProp(localesFallbacks, locale); - mFallback = fallback; - - return fallback; -} - -NotNull FallbackEncoding::FromLocale() { - MOZ_ASSERT(FallbackEncoding::sInstance, - "Using uninitialized fallback cache."); - return FallbackEncoding::sInstance->Get(); -} - -// PrefChangedFunc -void FallbackEncoding::PrefChanged(const char*, void*) { - MOZ_ASSERT(FallbackEncoding::sInstance, - "Pref callback called with null fallback cache."); - FallbackEncoding::sInstance->Invalidate(); -} - -NS_IMETHODIMP -FallbackEncoding::Observe(nsISupports* aSubject, const char* aTopic, - const char16_t* aData) { - MOZ_ASSERT(FallbackEncoding::sInstance, - "Observe callback called with null fallback cache."); - FallbackEncoding::sInstance->Invalidate(); - return NS_OK; -} - -void FallbackEncoding::Initialize() { - MOZ_ASSERT(!FallbackEncoding::sInstance, - "Initializing pre-existing fallback cache."); - FallbackEncoding::sInstance = new FallbackEncoding; - Preferences::RegisterCallback(FallbackEncoding::PrefChanged, - "intl.charset.fallback.override"); - - nsCOMPtr obs = mozilla::services::GetObserverService(); - if (obs) { - obs->AddObserver(sInstance, "intl:requested-locales-changed", true); - } -} - -void FallbackEncoding::Shutdown() { - MOZ_ASSERT(FallbackEncoding::sInstance, - "Releasing non-existent fallback cache."); - nsCOMPtr obs = mozilla::services::GetObserverService(); - if (obs) { - obs->RemoveObserver(sInstance, "intl:requested-locales-changed"); - } - FallbackEncoding::sInstance = nullptr; -} - -bool FallbackEncoding::IsParticipatingTopLevelDomain(const nsACString& aTLD) { - nsAutoCString dummy; - return NS_FAILED(nsUConvPropertySearch::SearchPropertyValue( - nonParticipatingDomains, ArrayLength(nonParticipatingDomains), aTLD, - dummy)); -} - -NotNull FallbackEncoding::FromTopLevelDomain( - const nsACString& aTLD) { - return SearchEncodingProp(domainsFallbacks, aTLD); -} - -} // namespace dom -} // namespace mozilla diff --git a/dom/encoding/FallbackEncoding.h b/dom/encoding/FallbackEncoding.h deleted file mode 100644 index ab69a00d9d41..000000000000 --- a/dom/encoding/FallbackEncoding.h +++ /dev/null @@ -1,92 +0,0 @@ -/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* vim: set ts=8 sts=2 et sw=2 tw=80: */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this file, - * You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#ifndef mozilla_dom_FallbackEncoding_h_ -#define mozilla_dom_FallbackEncoding_h_ - -#include "mozilla/NotNull.h" -#include "mozilla/StaticPtr.h" -#include "nsIObserver.h" -#include "nsString.h" -#include "nsWeakReference.h" - -namespace mozilla { -class Encoding; -namespace dom { - -class FallbackEncoding : public nsIObserver, nsSupportsWeakReference { - public: - NS_DECL_ISUPPORTS - NS_DECL_NSIOBSERVER - - /** - * Gets the locale-dependent fallback encoding for legacy HTML and plain - * text content. - * - * @param aFallback the outparam for the fallback encoding - */ - static NotNull FromLocale(); - - /** - * Checks if it is appropriate to call FromTopLevelDomain() for a given TLD. - * - * @param aTLD the top-level domain (in Punycode) - * @return true if OK to call FromTopLevelDomain() - */ - static bool IsParticipatingTopLevelDomain(const nsACString& aTLD); - - /** - * Gets a top-level domain-depedendent fallback encoding for legacy HTML - * and plain text content - * - * @param aTLD the top-level domain (in Punycode) - * @param aFallback the outparam for the fallback encoding - */ - static NotNull FromTopLevelDomain(const nsACString& aTLD); - - // public API ends here! - - /** - * Allocate sInstance used by FromLocale(). - * To be called from nsLayoutStatics only. - */ - static void Initialize(); - - /** - * Delete sInstance used by FromLocale(). - * To be called from nsLayoutStatics only. - */ - static void Shutdown(); - - private: - /** - * The fallback cache. - */ - static StaticRefPtr sInstance; - - FallbackEncoding(); - virtual ~FallbackEncoding() = default; - - /** - * Invalidates the cache. - */ - void Invalidate() { mFallback = nullptr; } - - static void PrefChanged(const char*, void*); - - /** - * Gets the fallback encoding label. - * @param aFallback the fallback encoding - */ - NotNull Get(); - - const Encoding* mFallback; -}; - -} // namespace dom -} // namespace mozilla - -#endif // mozilla_dom_FallbackEncoding_h_ diff --git a/dom/encoding/domainsfallbacks.properties b/dom/encoding/domainsfallbacks.properties deleted file mode 100644 index cd77e04e6ff6..000000000000 --- a/dom/encoding/domainsfallbacks.properties +++ /dev/null @@ -1,167 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -# This file contains educated guesses about which top-level domains are -# likely to host legacy content that assumes a non-windows-1252 encoding. -# Punycode TLDs are included on the theory that legacy content might appear -# behind those relatively new TLDs if DNS just points to a legacy server. -# -# Encodings for which a confident-enough educated guess is missing are -# listed in nonparticipatingdomains.properties. Domains that are listed -# neither there nor here get windows-1252 as the associated fallback. -# -# The list below includes Arabic-script TLDs not on IANA list but on the -# ICANN list: -# http://www.icann.org/en/resources/idn/fast-track/string-evaluation-completion -# Otherwise, the list includes non-windows-1252-affilited country TLDs from -# https://data.iana.org/TLD/tlds-alpha-by-domain.txt -# -# The guesses are assigned as follows: -# * If the country has a dominant country-affiliated language and that language -# is part of the languages to fallbacks mapping, use the encoding for that -# language from that mapping. -# * Use windows-1256 for countries that have a dominant Arabic-script -# language or whose all languages are Arabic-script languages. -# * Use windows-1251 likewise but for Cyrillic script. - -ae=windows-1256 -xn--mgbaam7a8h=windows-1256 - -af=windows-1256 - -bg=windows-1251 - -bh=windows-1256 - -by=windows-1251 - -cn=GBK -xn--fiqs8s=GBK -# Assume that Traditional Chinese TLD is meant to work if URL input happens to -# be in the traditional mode. Expect content to be simplified anyway. -xn--fiqz9s=GBK - -cz=windows-1250 - -dz=windows-1256 -xn--lgbbat1ad8j=windows-1256 - -ee=windows-1257 - -eg=windows-1256 -xn--wgbh1c=windows-1256 - -gr=ISO-8859-7 - -hk=Big5 -xn--j6w193g=Big5 - -hr=windows-1250 - -hu=ISO-8859-2 - -iq=windows-1256 - -ir=windows-1256 -xn--mgba3a4f16a=windows-1256 - -jo=windows-1256 -xn--mgbayh7gpa=windows-1256 - -jp=Shift_JIS - -kg=windows-1251 - -kp=EUC-KR - -kr=EUC-KR -xn--3e0b707e=EUC-KR - -kw=windows-1256 - -kz=windows-1251 -xn--80ao21a=windows-1251 - -lb=windows-1256 - -lt=windows-1257 - -lv=windows-1257 - -ma=windows-1256 -xn--mgbc0a9azcg=windows-1256 - -mk=windows-1251 - -mn=windows-1251 -xn--l1acc=windows-1251 - -mo=Big5 - -# my -xn--mgbx4cd0ab=windows-1256 - -om=windows-1256 -xn--mgb9awbf=windows-1256 - -#pk -xn--mgbai9azgqp6j=windows-1256 - -pl=ISO-8859-2 - -ps=windows-1256 -xn--ygbi2ammx=windows-1256 - -qa=windows-1256 -xn--wgbl6a=windows-1256 - -rs=windows-1251 -xn--90a3ac=windows-1251 - -ru=windows-1251 -xn--p1ai=windows-1251 - -sa=windows-1256 -xn--mgberp4a5d4ar=windows-1256 - -sd=windows-1256 -xn--mgbpl2fh=windows-1256 - -sg=GBK -xn--yfro4i67o=GBK - -si=ISO-8859-2 - -sk=windows-1250 - -su=windows-1251 - -sy=windows-1256 -xn--mgbtf8fl=windows-1256 - -th=windows-874 -xn--o3cw4h=windows-874 - -tj=windows-1251 - -tn=windows-1256 -xn--pgbs0dh=windows-1256 - -tr=windows-1254 - -tw=Big5 -# Assume that the Simplified Chinese TLD is meant to work when URL input -# happens in the simplified mode. Assume content is tradition anyway. -xn--kprw13d=Big5 -xn--kpry57d=Big5 - -ua=windows-1251 -xn--j1amh=windows-1251 - -uz=windows-1251 - -vn=windows-1258 - -ye=windows-1256 -xn--mgb2ddes=windows-1256 diff --git a/dom/encoding/encodings2arrays.py b/dom/encoding/encodings2arrays.py deleted file mode 100644 index 1b0351f4f1e6..000000000000 --- a/dom/encoding/encodings2arrays.py +++ /dev/null @@ -1,26 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -import sys - -def main(header, propFile): - mappings = {} - - with open(propFile, 'r') as f: - for line in f: - line = line.strip() - if not line.startswith('#'): - parts = line.split("=", 1) - if len(parts) == 2 and len(parts[0]) > 0: - mappings[parts[0].strip()] = parts[1].strip() - - keys = mappings.keys() - - header.write("// This is a generated file. Please do not edit.\n") - header.write("// Please edit the corresponding .properties file instead.\n") - - entries = ['{ "%s", %s }' - % (key, mappings[key].replace('-', '_').upper() + '_ENCODING') for key in sorted(keys)] - header.write(',\n'.join(entries) + '\n') - diff --git a/dom/encoding/labelsencodings.properties b/dom/encoding/labelsencodings.properties deleted file mode 100644 index 9bdee501ae74..000000000000 --- a/dom/encoding/labelsencodings.properties +++ /dev/null @@ -1,226 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. -# -# The list is taken from http://encoding.spec.whatwg.org/#encodings -# The encoding name may not be lowercased due to compatibility with -# our internal implementations. -# - -unicode-1-1-utf-8=UTF-8 -utf-8=UTF-8 -utf8=UTF-8 -866=IBM866 -cp866=IBM866 -csibm866=IBM866 -ibm866=IBM866 -csisolatin2=ISO-8859-2 -iso-8859-2=ISO-8859-2 -iso-ir-101=ISO-8859-2 -iso8859-2=ISO-8859-2 -iso88592=ISO-8859-2 -iso_8859-2=ISO-8859-2 -iso_8859-2:1987=ISO-8859-2 -l2=ISO-8859-2 -latin2=ISO-8859-2 -csisolatin3=ISO-8859-3 -iso-8859-3=ISO-8859-3 -iso-ir-109=ISO-8859-3 -iso8859-3=ISO-8859-3 -iso88593=ISO-8859-3 -iso_8859-3=ISO-8859-3 -iso_8859-3:1988=ISO-8859-3 -l3=ISO-8859-3 -latin3=ISO-8859-3 -csisolatin4=ISO-8859-4 -iso-8859-4=ISO-8859-4 -iso-ir-110=ISO-8859-4 -iso8859-4=ISO-8859-4 -iso88594=ISO-8859-4 -iso_8859-4=ISO-8859-4 -iso_8859-4:1988=ISO-8859-4 -l4=ISO-8859-4 -latin4=ISO-8859-4 -csisolatincyrillic=ISO-8859-5 -cyrillic=ISO-8859-5 -iso-8859-5=ISO-8859-5 -iso-ir-144=ISO-8859-5 -iso8859-5=ISO-8859-5 -iso88595=ISO-8859-5 -iso_8859-5=ISO-8859-5 -iso_8859-5:1988=ISO-8859-5 -arabic=ISO-8859-6 -asmo-708=ISO-8859-6 -csiso88596e=ISO-8859-6 -csiso88596i=ISO-8859-6 -csisolatinarabic=ISO-8859-6 -ecma-114=ISO-8859-6 -iso-8859-6=ISO-8859-6 -iso-8859-6-e=ISO-8859-6 -iso-8859-6-i=ISO-8859-6 -iso-ir-127=ISO-8859-6 -iso8859-6=ISO-8859-6 -iso88596=ISO-8859-6 -iso_8859-6=ISO-8859-6 -iso_8859-6:1987=ISO-8859-6 -csisolatingreek=ISO-8859-7 -ecma-118=ISO-8859-7 -elot_928=ISO-8859-7 -greek=ISO-8859-7 -greek8=ISO-8859-7 -iso-8859-7=ISO-8859-7 -iso-ir-126=ISO-8859-7 -iso8859-7=ISO-8859-7 -iso88597=ISO-8859-7 -iso_8859-7=ISO-8859-7 -iso_8859-7:1987=ISO-8859-7 -sun_eu_greek=ISO-8859-7 -csiso88598e=ISO-8859-8 -csisolatinhebrew=ISO-8859-8 -hebrew=ISO-8859-8 -iso-8859-8=ISO-8859-8 -iso-8859-8-e=ISO-8859-8 -iso-ir-138=ISO-8859-8 -iso8859-8=ISO-8859-8 -iso88598=ISO-8859-8 -iso_8859-8=ISO-8859-8 -iso_8859-8:1988=ISO-8859-8 -visual=ISO-8859-8 -csiso88598i=ISO-8859-8-I -iso-8859-8-i=ISO-8859-8-I -logical=ISO-8859-8-I -csisolatin6=ISO-8859-10 -iso-8859-10=ISO-8859-10 -iso-ir-157=ISO-8859-10 -iso8859-10=ISO-8859-10 -iso885910=ISO-8859-10 -l6=ISO-8859-10 -latin6=ISO-8859-10 -iso-8859-13=ISO-8859-13 -iso8859-13=ISO-8859-13 -iso885913=ISO-8859-13 -iso-8859-14=ISO-8859-14 -iso8859-14=ISO-8859-14 -iso885914=ISO-8859-14 -csisolatin9=ISO-8859-15 -iso-8859-15=ISO-8859-15 -iso8859-15=ISO-8859-15 -iso885915=ISO-8859-15 -iso_8859-15=ISO-8859-15 -l9=ISO-8859-15 -iso-8859-16=ISO-8859-16 -cskoi8r=KOI8-R -koi=KOI8-R -koi8=KOI8-R -koi8-r=KOI8-R -koi8_r=KOI8-R -koi8-u=KOI8-U -csmacintosh=macintosh -mac=macintosh -macintosh=macintosh -x-mac-roman=macintosh -dos-874=windows-874 -iso-8859-11=windows-874 -iso8859-11=windows-874 -iso885911=windows-874 -tis-620=windows-874 -windows-874=windows-874 -cp1250=windows-1250 -windows-1250=windows-1250 -x-cp1250=windows-1250 -cp1251=windows-1251 -windows-1251=windows-1251 -x-cp1251=windows-1251 -ansi_x3.4-1968=windows-1252 -ascii=windows-1252 -cp1252=windows-1252 -cp819=windows-1252 -csisolatin1=windows-1252 -ibm819=windows-1252 -iso-8859-1=windows-1252 -iso-ir-100=windows-1252 -iso8859-1=windows-1252 -iso88591=windows-1252 -iso_8859-1=windows-1252 -iso_8859-1:1987=windows-1252 -l1=windows-1252 -latin1=windows-1252 -us-ascii=windows-1252 -windows-1252=windows-1252 -x-cp1252=windows-1252 -cp1253=windows-1253 -windows-1253=windows-1253 -x-cp1253=windows-1253 -cp1254=windows-1254 -csisolatin5=windows-1254 -iso-8859-9=windows-1254 -iso-ir-148=windows-1254 -iso8859-9=windows-1254 -iso88599=windows-1254 -iso_8859-9=windows-1254 -iso_8859-9:1989=windows-1254 -l5=windows-1254 -latin5=windows-1254 -windows-1254=windows-1254 -x-cp1254=windows-1254 -cp1255=windows-1255 -windows-1255=windows-1255 -x-cp1255=windows-1255 -cp1256=windows-1256 -windows-1256=windows-1256 -x-cp1256=windows-1256 -cp1257=windows-1257 -windows-1257=windows-1257 -x-cp1257=windows-1257 -cp1258=windows-1258 -windows-1258=windows-1258 -x-cp1258=windows-1258 -x-mac-cyrillic=x-mac-cyrillic -x-mac-ukrainian=x-mac-cyrillic -chinese=gbk -csgb2312=gbk -csiso58gb231280=gbk -gb2312=gbk -gb_2312=gbk -gb_2312-80=gbk -gbk=gbk -iso-ir-58=gbk -x-gbk=gbk -gb18030=gb18030 -hz-gb-2312=replacement -big5=Big5 -big5-hkscs=Big5 -cn-big5=Big5 -csbig5=Big5 -x-x-big5=Big5 -cseucpkdfmtjapanese=EUC-JP -euc-jp=EUC-JP -x-euc-jp=EUC-JP -csiso2022jp=ISO-2022-JP -iso-2022-jp=ISO-2022-JP -csshiftjis=Shift_JIS -ms932=Shift_JIS -ms_kanji=Shift_JIS -shift-jis=Shift_JIS -shift_jis=Shift_JIS -sjis=Shift_JIS -windows-31j=Shift_JIS -x-sjis=Shift_JIS -cseuckr=EUC-KR -csksc56011987=EUC-KR -euc-kr=EUC-KR -iso-ir-149=EUC-KR -korean=EUC-KR -ks_c_5601-1987=EUC-KR -ks_c_5601-1989=EUC-KR -ksc5601=EUC-KR -ksc_5601=EUC-KR -windows-949=EUC-KR -csiso2022kr=replacement -iso-2022-kr=replacement -iso-2022-cn=replacement -iso-2022-cn-ext=replacement -utf-16=UTF-16LE -utf-16le=UTF-16LE -utf-16be=UTF-16BE -x-user-defined=x-user-defined diff --git a/dom/encoding/localesfallbacks.properties b/dom/encoding/localesfallbacks.properties deleted file mode 100644 index 47cccc5da66b..000000000000 --- a/dom/encoding/localesfallbacks.properties +++ /dev/null @@ -1,72 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -# This file contains mappings from languages to legacy encodings for languages -# that are associated with legacy encoding other than windows-1252 (except -# Traditional Chinese, which is handled as a special case elsewhere). -# -# The keys are language codes without regions. The values are Gecko-canonical -# encoding labels (not necessarily lower case!). -# -# Rules: -# -# * Avoid editing this file! -# -# * If you do edit this file, be sure to file a spec bug against WHATWG HTML -# to keep this file in sync with -# http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#determining-the-character-encoding -# -# * As an exception to the previous rule, GBK is used instead of GB18030 -# until/unless work on http://encoding.spec.whatwg.org/ shows that the former -# can be treated as an alias of the latter and our decoder implementation -# has been audited to match the spec. -# -# * Use only the language code without a hyphen or anything that would come -# after the hyphen. -# -# * Don't put windows-1252-affiliated languages here. -# -# * Don't put Traditional Chinese here. - -ar=windows-1256 -# https://www.w3.org/Bugs/Public/show_bug.cgi?id=23089 -ba=windows-1251 -# https://www.w3.org/Bugs/Public/show_bug.cgi?id=23089 -be=windows-1251 -bg=windows-1251 -cs=windows-1250 -# https://www.w3.org/Bugs/Public/show_bug.cgi?id=23090 -el=ISO-8859-7 -et=windows-1257 -fa=windows-1256 -he=windows-1255 -hr=windows-1250 -hu=ISO-8859-2 -ja=Shift_JIS -# https://www.w3.org/Bugs/Public/show_bug.cgi?id=23089 -kk=windows-1251 -ko=EUC-KR -ku=windows-1254 -# https://www.w3.org/Bugs/Public/show_bug.cgi?id=23089 -ky=windows-1251 -lt=windows-1257 -lv=windows-1257 -# https://www.w3.org/Bugs/Public/show_bug.cgi?id=23089 -mk=windows-1251 -pl=ISO-8859-2 -ru=windows-1251 -# https://www.w3.org/Bugs/Public/show_bug.cgi?id=23089 -sah=windows-1251 -sk=windows-1250 -sl=ISO-8859-2 -sr=windows-1251 -# https://www.w3.org/Bugs/Public/show_bug.cgi?id=23089 -tg=windows-1251 -th=windows-874 -tr=windows-1254 -# https://www.w3.org/Bugs/Public/show_bug.cgi?id=23089 -tt=windows-1251 -uk=windows-1251 -vi=windows-1258 -zh=GBK diff --git a/dom/encoding/moz.build b/dom/encoding/moz.build index 40779816b562..e2bc8a5a829b 100644 --- a/dom/encoding/moz.build +++ b/dom/encoding/moz.build @@ -8,13 +8,11 @@ with Files("**"): BUG_COMPONENT = ("Core", "Internationalization") EXPORTS.mozilla.dom += [ - 'FallbackEncoding.h', 'TextDecoder.h', 'TextEncoder.h', ] UNIFIED_SOURCES += [ - 'FallbackEncoding.cpp', 'TextDecoder.cpp', 'TextEncoder.cpp', ] @@ -24,23 +22,6 @@ LOCAL_INCLUDES += [ '/intl/locale', ] -props2arrays = 'encodings2arrays.py' -prefixes = ( - 'domainsfallbacks', - 'labelsencodings', - 'localesfallbacks', -) - -for prefix in prefixes: - input_file = prefix + '.properties' - header = prefix + '.properties.h' - GeneratedFile(header, script=props2arrays, inputs=[input_file]) - -input_file = 'nonparticipatingdomains.properties' -header = input_file + '.h' -GeneratedFile(header, script='../../intl/locale/props2arrays.py', - inputs=[input_file]) - MOCHITEST_MANIFESTS += [ 'test/mochitest.ini', ] diff --git a/dom/encoding/nonparticipatingdomains.properties b/dom/encoding/nonparticipatingdomains.properties deleted file mode 100644 index b2e1396bf9b9..000000000000 --- a/dom/encoding/nonparticipatingdomains.properties +++ /dev/null @@ -1,51 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -# Top-level domains listed here do not participate in TLD-based guessing. -# -# We should do Web crawls to see if domains listed here can migrate to -# domainsfallbacks.properties. -# -# The value to the right of the = sign is ignored and serves as a placeholder. - -# Generic -com=windows-1252 -net=windows-1252 -org=windows-1252 - -# No Firefox localization for Azeri -az=windows-1254 - -# windows-1251 or windows-1250? -ba=??? - -# ISO-8859-7 or windows-1254? -cy=??? - -# Is there enough unlabeled windows-1256 content for a windows-1255 to break -# too much? -il=windows-1255 - -# Out-of-country English use -ly=windows-1256 - -# Out-of-country English use -# md=windows-1250 - -# Out-of-country English use -# me=windows-1251 - -# Malaysia has an Arabic-script TLD, official script is latin, possibly Chinese-script publications -my=??? - -# No Firefox localization for Urdu; potential for minority-language sites -# relying on windows-1252 hacks. -pk=windows-1256 - -# The Romanian localization says windows-1252, even though the Windows legacy -# differs. -ro=windows-1250 - -tm=windows-1250 - diff --git a/dom/encoding/test/file_in_lk_TLD.html b/dom/encoding/test/file_in_lk_TLD.html new file mode 100644 index 000000000000..3f238998f8e8 --- /dev/null +++ b/dom/encoding/test/file_in_lk_TLD.html @@ -0,0 +1,9 @@ + +これは文字実験です。 + + +

これは文字実験です。

\ No newline at end of file diff --git a/dom/encoding/test/mochitest.ini b/dom/encoding/test/mochitest.ini index 28a746b3a211..e0c5564891c8 100644 --- a/dom/encoding/test/mochitest.ini +++ b/dom/encoding/test/mochitest.ini @@ -1,6 +1,7 @@ [DEFAULT] support-files = file_TLD.html + file_in_lk_TLD.html file_utf16_be_bom.css file_utf16_be_bom.js file_utf16_be_bom.xhtml @@ -24,4 +25,5 @@ support-files = [test_TextDecoder.html] [test_TextEncoder.html] [test_TLD.html] +[test_in_lk_TLD.html] [test_utf16_files.html] diff --git a/dom/encoding/test/test_in_lk_TLD.html b/dom/encoding/test/test_in_lk_TLD.html new file mode 100644 index 000000000000..8a908b933f55 --- /dev/null +++ b/dom/encoding/test/test_in_lk_TLD.html @@ -0,0 +1,55 @@ + + + + + + Test for Bug 1603712 + + + + + +Mozilla Bug 1603712 +

+ +
+
+ + diff --git a/dom/html/nsHTMLDocument.cpp b/dom/html/nsHTMLDocument.cpp index 56ac241e5549..5b2f652090a9 100644 --- a/dom/html/nsHTMLDocument.cpp +++ b/dom/html/nsHTMLDocument.cpp @@ -59,7 +59,6 @@ // AHMED 12-2 #include "nsBidiUtils.h" -#include "mozilla/dom/FallbackEncoding.h" #include "mozilla/Encoding.h" #include "mozilla/EventListenerManager.h" #include "mozilla/IdentifierMapEntry.h" @@ -317,73 +316,6 @@ void nsHTMLDocument::TryParentCharset(nsIDocShell* aDocShell, } } -void nsHTMLDocument::TryTLD(int32_t& aCharsetSource, - NotNull& aEncoding) { - if (aCharsetSource >= kCharsetFromTopLevelDomain) { - return; - } - if (!StaticPrefs::intl_charset_fallback_tld()) { - return; - } - if (!mDocumentURI) { - return; - } - nsAutoCString host; - mDocumentURI->GetAsciiHost(host); - if (host.IsEmpty()) { - return; - } - // First let's see if the host is DNS-absolute and ends with a dot and - // get rid of that one. - if (host.Last() == '.') { - host.SetLength(host.Length() - 1); - if (host.IsEmpty()) { - return; - } - } - // If we still have a dot, the host is weird, so let's continue only - // if we have something other than a dot now. - if (host.Last() == '.') { - return; - } - int32_t index = host.RFindChar('.'); - if (index == kNotFound) { - // We have an intranet host, Gecko-internal URL or an IPv6 address. - return; - } - // Since the string didn't end with a dot and we found a dot, - // there is at least one character between the dot and the end of - // the string, so taking the substring below is safe. - nsAutoCString tld; - ToLowerCase(Substring(host, index + 1, host.Length() - (index + 1)), tld); - // Reject generic TLDs and country TLDs that need more research - if (!FallbackEncoding::IsParticipatingTopLevelDomain(tld)) { - return; - } - // Check if we have an IPv4 address - bool seenNonDigit = false; - for (size_t i = 0; i < tld.Length(); ++i) { - char c = tld.CharAt(i); - if (c < '0' || c > '9') { - seenNonDigit = true; - break; - } - } - if (!seenNonDigit) { - return; - } - aCharsetSource = kCharsetFromTopLevelDomain; - aEncoding = FallbackEncoding::FromTopLevelDomain(tld); -} - -void nsHTMLDocument::TryFallback(int32_t& aCharsetSource, - NotNull& aEncoding) { - if (kCharsetFromFallback <= aCharsetSource) return; - - aCharsetSource = kCharsetFromFallback; - aEncoding = FallbackEncoding::FromLocale(); -} - // Using a prototype document is only allowed with chrome privilege. bool ShouldUsePrototypeDocument(nsIChannel* aChannel, Document* aDoc) { if (!aChannel || !aDoc || @@ -555,6 +487,8 @@ nsresult nsHTMLDocument::StartDocumentLoad(const char* aCommand, NS_ASSERTION(docShell, "Unexpected null value"); charsetSource = kCharsetUninitialized; + // Used for .in and .lk TLDs. .jp is handled in the parser. + encoding = WINDOWS_1252_ENCODING; // The following will try to get the character encoding from various // sources. Each Try* function will return early if the source is already @@ -580,9 +514,6 @@ nsresult nsHTMLDocument::StartDocumentLoad(const char* aCommand, if (cachingChan && !urlSpec.IsEmpty()) { TryCacheCharset(cachingChan, charsetSource, encoding); } - - TryTLD(charsetSource, encoding); - TryFallback(charsetSource, encoding); } SetDocumentCharacterSetSource(charsetSource); diff --git a/extensions/universalchardet/tests/CharsetDetectionTests.js b/extensions/universalchardet/tests/CharsetDetectionTests.js index e7ebe1aa5d94..a92b808e4502 100644 --- a/extensions/universalchardet/tests/CharsetDetectionTests.js +++ b/extensions/universalchardet/tests/CharsetDetectionTests.js @@ -3,14 +3,10 @@ var { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm"); var gExpectedCharset; -var gOldPref; -var gDetectorList; -var gTestIndex; var gLocalDir; -function CharsetDetectionTests(aTestFile, aExpectedCharset, aDetectorList) { +function CharsetDetectionTests(aTestFile, aExpectedCharset) { gExpectedCharset = aExpectedCharset; - gDetectorList = aDetectorList; InitDetectorTests(); @@ -21,21 +17,10 @@ function CharsetDetectionTests(aTestFile, aExpectedCharset, aDetectorList) { } function InitDetectorTests() { - var prefService = Services.prefs; var loader = Services.scriptloader; var ioService = Services.io; loader.loadSubScript("chrome://mochikit/content/chrome-harness.js"); - try { - gOldPref = prefService.getComplexValue( - "intl.charset.detector", - Ci.nsIPrefLocalizedString - ).data; - } catch (e) { - gOldPref = ""; - } - SetDetectorPref(gDetectorList[0]); - gTestIndex = 0; $("testframe").onload = DoDetectionTest; if (gExpectedCharset == "default") { @@ -54,37 +39,11 @@ function InitDetectorTests() { gLocalDir = ioService.newFileURI(dir).spec; } -function SetDetectorPref(aPrefValue) { - var fallback = ""; - if (aPrefValue == "ja_parallel_state_machine") { - fallback = "Shift_JIS"; - } else if (aPrefValue == "ruprob" || aPrefValue == "ukprob") { - fallback = "windows-1251"; - } - var prefService = Services.prefs; - prefService.setStringPref("intl.charset.detector", aPrefValue); - prefService.setStringPref("intl.charset.fallback.override", fallback); -} - function DoDetectionTest() { var iframeDoc = $("testframe").contentDocument; var charset = iframeDoc.characterSet; - is( - charset, - gExpectedCharset, - "decoded as " + gExpectedCharset + " by " + gDetectorList[gTestIndex] - ); + is(charset, gExpectedCharset, "decoded as " + gExpectedCharset); - if (++gTestIndex < gDetectorList.length) { - SetDetectorPref(gDetectorList[gTestIndex]); - iframeDoc.location.reload(); - } else { - CleanUpDetectionTests(); - } -} - -function CleanUpDetectionTests() { - SetDetectorPref(gOldPref); SimpleTest.finish(); } diff --git a/layout/build/nsLayoutStatics.cpp b/layout/build/nsLayoutStatics.cpp index 3cb604d90102..16a48222a156 100644 --- a/layout/build/nsLayoutStatics.cpp +++ b/layout/build/nsLayoutStatics.cpp @@ -48,7 +48,6 @@ #include "nsHTMLDNSPrefetch.h" #include "nsHtml5Module.h" #include "nsHTMLTags.h" -#include "mozilla/dom/FallbackEncoding.h" #include "nsFocusManager.h" #include "nsListControlFrame.h" #include "mozilla/dom/HTMLInputElement.h" @@ -234,7 +233,6 @@ nsresult nsLayoutStatics::Initialize() { CubebUtils::InitLibrary(); nsHtml5Module::InitializeStatics(); - mozilla::dom::FallbackEncoding::Initialize(); nsLayoutUtils::Initialize(); PointerEventHandler::InitializeStatics(); TouchManager::InitializeStatics(); @@ -379,8 +377,6 @@ void nsLayoutStatics::Shutdown() { nsHtml5Module::ReleaseStatics(); - mozilla::dom::FallbackEncoding::Shutdown(); - mozilla::EventDispatcher::Shutdown(); HTMLInputElement::DestroyUploadLastDir(); diff --git a/modules/libpref/init/StaticPrefList.yaml b/modules/libpref/init/StaticPrefList.yaml index 8f214a7a4a47..4a130ee9e6ca 100644 --- a/modules/libpref/init/StaticPrefList.yaml +++ b/modules/libpref/init/StaticPrefList.yaml @@ -4752,19 +4752,6 @@ # Prefs starting with "intl." #--------------------------------------------------------------------------- -# Whether ISO-2022-JP is a permitted content-based encoding detection -# outcome in the JapaneseDetector. -- name: intl.charset.detector.iso2022jp.allowed - type: bool - value: true - mirror: always - -# Whether the new encoding detector is enabled (except for Japan's ccTLDs). -- name: intl.charset.detector.ng.enabled - type: bool - value: true - mirror: always - # Whether the new encoding detector is enabled for the .jp TLD. - name: intl.charset.detector.ng.jp.enabled type: bool @@ -4783,12 +4770,6 @@ value: false mirror: always -# Whether the TLD is considered if the new encoding detector is disabled. -- name: intl.charset.fallback.tld - type: bool - value: true - mirror: always - # If true, dispatch the keydown and keyup events on any web apps even during # composition. - name: intl.ime.hack.on_any_apps.fire_key_events_for_composition diff --git a/modules/libpref/init/all.js b/modules/libpref/init/all.js index 440b1fb15686..82f3b1340d79 100644 --- a/modules/libpref/init/all.js +++ b/modules/libpref/init/all.js @@ -1966,8 +1966,6 @@ pref("converter.html2txt.header_strategy", 1); // 0 = no indention; 1 = indenti pref("intl.accept_languages", "chrome://global/locale/intl.properties"); pref("intl.menuitems.alwaysappendaccesskeys","chrome://global/locale/intl.properties"); pref("intl.menuitems.insertseparatorbeforeaccesskeys","chrome://global/locale/intl.properties"); -pref("intl.charset.detector", "chrome://global/locale/intl.properties"); -pref("intl.charset.fallback.override", ""); pref("intl.ellipsis", "chrome://global-platform/locale/intl.properties"); // this pref allows user to request that all internationalization formatters // like date/time formatting, unit formatting, calendars etc. should use diff --git a/netwerk/streamconv/converters/nsDirIndexParser.cpp b/netwerk/streamconv/converters/nsDirIndexParser.cpp index 11db477519fe..ed53cc45fb44 100644 --- a/netwerk/streamconv/converters/nsDirIndexParser.cpp +++ b/netwerk/streamconv/converters/nsDirIndexParser.cpp @@ -8,7 +8,6 @@ #include "nsDirIndexParser.h" #include "mozilla/ArrayUtils.h" -#include "mozilla/dom/FallbackEncoding.h" #include "mozilla/Encoding.h" #include "prprf.h" #include "nsCRT.h" @@ -18,9 +17,69 @@ #include "nsIInputStream.h" #include "nsITextToSubURI.h" #include "nsServiceManagerUtils.h" +#include "mozilla/intl/LocaleService.h" using namespace mozilla; +struct EncodingProp { + const char* const mKey; + NotNull mValue; +}; + +static const EncodingProp localesFallbacks[] = { + {"ar", WINDOWS_1256_ENCODING}, {"ba", WINDOWS_1251_ENCODING}, + {"be", WINDOWS_1251_ENCODING}, {"bg", WINDOWS_1251_ENCODING}, + {"cs", WINDOWS_1250_ENCODING}, {"el", ISO_8859_7_ENCODING}, + {"et", WINDOWS_1257_ENCODING}, {"fa", WINDOWS_1256_ENCODING}, + {"he", WINDOWS_1255_ENCODING}, {"hr", WINDOWS_1250_ENCODING}, + {"hu", ISO_8859_2_ENCODING}, {"ja", SHIFT_JIS_ENCODING}, + {"kk", WINDOWS_1251_ENCODING}, {"ko", EUC_KR_ENCODING}, + {"ku", WINDOWS_1254_ENCODING}, {"ky", WINDOWS_1251_ENCODING}, + {"lt", WINDOWS_1257_ENCODING}, {"lv", WINDOWS_1257_ENCODING}, + {"mk", WINDOWS_1251_ENCODING}, {"pl", ISO_8859_2_ENCODING}, + {"ru", WINDOWS_1251_ENCODING}, {"sah", WINDOWS_1251_ENCODING}, + {"sk", WINDOWS_1250_ENCODING}, {"sl", ISO_8859_2_ENCODING}, + {"sr", WINDOWS_1251_ENCODING}, {"tg", WINDOWS_1251_ENCODING}, + {"th", WINDOWS_874_ENCODING}, {"tr", WINDOWS_1254_ENCODING}, + {"tt", WINDOWS_1251_ENCODING}, {"uk", WINDOWS_1251_ENCODING}, + {"vi", WINDOWS_1258_ENCODING}, {"zh", GBK_ENCODING}}; + +static NotNull +GetFTPFallbackEncodingDoNotAddNewCallersToThisFunction() { + nsAutoCString locale; + mozilla::intl::LocaleService::GetInstance()->GetAppLocaleAsBCP47(locale); + + // Let's lower case the string just in case unofficial language packs + // don't stick to conventions. + ToLowerCase(locale); // ASCII lowercasing with CString input! + + // Special case Traditional Chinese before throwing away stuff after the + // language itself. Today we only ship zh-TW, but be defensive about + // possible future values. + if (locale.EqualsLiteral("zh-tw") || locale.EqualsLiteral("zh-hk") || + locale.EqualsLiteral("zh-mo") || locale.EqualsLiteral("zh-hant")) { + return BIG5_ENCODING; + } + + // Throw away regions and other variants to accommodate weird stuff seen + // in telemetry--apparently unofficial language packs. + int32_t hyphenIndex = locale.FindChar('-'); + if (hyphenIndex >= 0) { + locale.Truncate(hyphenIndex); + } + + size_t index; + if (BinarySearchIf( + localesFallbacks, 0, ArrayLength(localesFallbacks), + [&locale](const EncodingProp& aProperty) { + return locale.Compare(aProperty.mKey); + }, + &index)) { + return localesFallbacks[index].mValue; + } + return WINDOWS_1252_ENCODING; +} + NS_IMPL_ISUPPORTS(nsDirIndexParser, nsIRequestObserver, nsIStreamListener, nsIDirIndexParser) @@ -30,7 +89,7 @@ nsresult nsDirIndexParser::Init() { mLineStart = 0; mHasDescription = false; mFormat[0] = -1; - auto encoding = mozilla::dom::FallbackEncoding::FromLocale(); + auto encoding = GetFTPFallbackEncodingDoNotAddNewCallersToThisFunction(); encoding->Name(mEncoding); nsresult rv; diff --git a/parser/html/nsHtml5StreamParser.cpp b/parser/html/nsHtml5StreamParser.cpp index 0ebc4009b752..71a034f1cb5e 100644 --- a/parser/html/nsHtml5StreamParser.cpp +++ b/parser/html/nsHtml5StreamParser.cpp @@ -177,8 +177,7 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor, mEventTarget(nsHtml5Module::GetStreamParserThread()->SerialEventTarget()), mExecutorFlusher(new nsHtml5ExecutorFlusher(aExecutor)), mLoadFlusher(new nsHtml5LoadFlusher(aExecutor)), - mJapaneseDetector(mozilla::JapaneseDetector::Create( - StaticPrefs::intl_charset_detector_iso2022jp_allowed())), + mJapaneseDetector(mozilla::JapaneseDetector::Create(true)), mUseJapaneseDetector(false), mInitialEncodingWasFromParentFrame(false), mHasHadErrors(false), @@ -451,6 +450,7 @@ void nsHtml5StreamParser::SetEncodingFromExpat(const char16_t* aEncoding) { if (encoding) { mEncoding = WrapNotNull(encoding); mCharsetSource = kCharsetFromMetaTag; // closest for XML + mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); return; } // else the page declared an encoding Gecko doesn't support and we'd @@ -460,6 +460,7 @@ void nsHtml5StreamParser::SetEncodingFromExpat(const char16_t* aEncoding) { } mEncoding = UTF_8_ENCODING; // XML defaults to UTF-8 without a BOM mCharsetSource = kCharsetFromMetaTag; // means confident + mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); } // A separate user data struct is used instead of passing the @@ -594,6 +595,7 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span aFromSegment, // longer than 1024 bytes, but that case is not worth worrying about. mEncoding = UTF_8_ENCODING; mCharsetSource = kCharsetFromMetaTag; // means confident + mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); } return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment); @@ -1012,6 +1014,7 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) { if (originalURI->SchemeIs("resource")) { mCharsetSource = kCharsetFromBuiltIn; mEncoding = UTF_8_ENCODING; + mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); } else { nsCOMPtr currentURI; rv = channel->GetURI(getter_AddRefs(currentURI)); @@ -1150,39 +1153,26 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) { // Compute various pref-based special cases if (!mDecodingLocalFileWithoutTokenizing && mFeedChardet) { - if (StaticPrefs::intl_charset_detector_ng_enabled()) { - if (mTLD.EqualsLiteral("jp")) { - mUseJapaneseDetector = - !StaticPrefs::intl_charset_detector_ng_jp_enabled(); - } else if (mTLD.EqualsLiteral("in") && - mEncoding == WINDOWS_1252_ENCODING && - !StaticPrefs::intl_charset_detector_ng_in_enabled()) { - // Avoid breaking font hacks that Chrome doesn't break. - DontGuessEncoding(); - } else if (mTLD.EqualsLiteral("lk") && - mEncoding == WINDOWS_1252_ENCODING && - !StaticPrefs::intl_charset_detector_ng_lk_enabled()) { - // Avoid breaking font hacks that Chrome doesn't break. - DontGuessEncoding(); + if (mTLD.EqualsLiteral("jp")) { + mUseJapaneseDetector = + !StaticPrefs::intl_charset_detector_ng_jp_enabled(); + if (mUseJapaneseDetector && mEncoding == WINDOWS_1252_ENCODING && + mCharsetSource <= kCharsetFromTopLevelDomain) { + mCharsetSource = kCharsetFromTopLevelDomain; + mEncoding = SHIFT_JIS_ENCODING; + mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); } - } else { - // If the new detector is turned off in general, we still use it to - // emulate the old Cyrillic detector in cases where the old Cyrillic - // detector would have been enabled. - nsAutoCString detectorName; - Preferences::GetLocalizedCString("intl.charset.detector", detectorName); - bool forceEncodingDetectorToCyrillicOnly = - detectorName.EqualsLiteral("ruprob") || - detectorName.EqualsLiteral("ukprob"); - if (mEncoding->IsJapaneseLegacy()) { - mUseJapaneseDetector = true; - } else if (mEncoding == WINDOWS_1251_ENCODING && - forceEncodingDetectorToCyrillicOnly) { - mTLD.AssignLiteral("ru"); // Force the detector into Cyrillic mode - // regardless of real TLD - } else { - DontGuessEncoding(); + } else if ((mTLD.EqualsLiteral("in") && + !StaticPrefs::intl_charset_detector_ng_in_enabled()) || + (mTLD.EqualsLiteral("lk") && + !StaticPrefs::intl_charset_detector_ng_lk_enabled())) { + if (mEncoding == WINDOWS_1252_ENCODING && + mCharsetSource <= kCharsetFromTopLevelDomain) { + // Avoid breaking font hacks that Chrome doesn't break. + mCharsetSource = kCharsetFromTopLevelDomain; + mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); } + DontGuessEncoding(); } } @@ -1566,7 +1556,8 @@ const Encoding* nsHtml5StreamParser::PreferredForInternalEncodingDecl( } } mCharsetSource = kCharsetFromMetaTag; // become confident - DontGuessEncoding(); // don't feed chardet when confident + mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); + DontGuessEncoding(); // don't feed chardet when confident return nullptr; } diff --git a/toolkit/locales/en-US/chrome/global/charsetMenu.properties b/toolkit/locales/en-US/chrome/global/charsetMenu.properties index 3dfba32569c0..09aacfd120a1 100644 --- a/toolkit/locales/en-US/chrome/global/charsetMenu.properties +++ b/toolkit/locales/en-US/chrome/global/charsetMenu.properties @@ -24,18 +24,6 @@ # (When this code was developed, all localizations appeared to use # U+0028 LEFT PARENTHESIS for this purpose.) -# Auto-Detect (sub)menu -charsetMenuCharsets = Character Encoding -charsetMenuAutodet = Auto-Detect -# 'A' is reserved for Arabic: -charsetMenuAutodet.key = D -charsetMenuAutodet.off = (off) -charsetMenuAutodet.off.key = o -charsetMenuAutodet.ru = Russian -charsetMenuAutodet.ru.key = R -charsetMenuAutodet.uk = Ukrainian -charsetMenuAutodet.uk.key = U - # Globally-relevant UTF-8.key = U UTF-8 = Unicode diff --git a/toolkit/locales/en-US/chrome/global/intl.properties b/toolkit/locales/en-US/chrome/global/intl.properties index df8022998e80..4f2236e87276 100644 --- a/toolkit/locales/en-US/chrome/global/intl.properties +++ b/toolkit/locales/en-US/chrome/global/intl.properties @@ -30,14 +30,6 @@ intl.accept_languages=en-US, en # http://dxr.mozilla.org/mozilla-central/source/browser/components/preferences/dialogs/fonts.xhtml font.language.group=x-western -# LOCALIZATION NOTE (intl.charset.detector): -# This preference controls the initial setting for the character encoding -# detector. Valid values are ja_parallel_state_machine for Japanese, ruprob -# for Russian and ukprob for Ukrainian and the empty string to turn detection -# off. The value must be empty for locales other than Japanese, Russian and -# Ukrainian. -intl.charset.detector= - # LOCALIZATION NOTE (pluralRule): Pick the appropriate plural rule for your # language. This will determine how many plural forms of a word you will need # to provide and in what order. diff --git a/toolkit/modules/CharsetMenu.jsm b/toolkit/modules/CharsetMenu.jsm index 6d6552b15cdc..4bba8c095c28 100644 --- a/toolkit/modules/CharsetMenu.jsm +++ b/toolkit/modules/CharsetMenu.jsm @@ -19,12 +19,6 @@ ChromeUtils.defineModuleGetter( "resource://gre/modules/Deprecated.jsm" ); -const kAutoDetectors = [ - ["off", ""], - ["ru", "ruprob"], - ["uk", "ukprob"], -]; - /** * This set contains encodings that are in the Encoding Standard, except: * - Japanese encodings are represented by one autodetection item @@ -97,29 +91,10 @@ function CharsetComparator(a, b) { return titleA.localeCompare(titleB) || b.value.localeCompare(a.value); } -function SetDetector(event) { - Services.prefs.setStringPref( - "intl.charset.detector", - event.target.getAttribute("detector") - ); -} - -function UpdateDetectorMenu(event) { - event.stopPropagation(); - let detector = Services.prefs.getComplexValue( - "intl.charset.detector", - Ci.nsIPrefLocalizedString - ); - let menuitem = this.getElementsByAttribute("detector", detector).item(0); - if (menuitem) { - menuitem.setAttribute("checked", "true"); - } -} - -var gDetectorInfoCache, gCharsetInfoCache, gPinnedInfoCache; +var gCharsetInfoCache, gPinnedInfoCache; var CharsetMenu = { - build(parent, deprecatedShowAccessKeys = true, showDetector = true) { + build(parent, deprecatedShowAccessKeys = true) { if (!deprecatedShowAccessKeys) { Deprecated.warning( "CharsetMenu no longer supports building a menu with no access keys.", @@ -139,38 +114,12 @@ var CharsetMenu = { } if (parent.hasChildNodes()) { - // Detector menu or charset menu already built + // Charset menu already built return; } this._ensureDataReady(); let doc = parent.ownerDocument; - if ( - showDetector && - !Services.prefs.getBoolPref("intl.charset.detector.ng.enabled") - ) { - let menuNode = doc.createXULElement("menu"); - menuNode.setAttribute( - "label", - gBundle.GetStringFromName("charsetMenuAutodet") - ); - menuNode.setAttribute( - "accesskey", - gBundle.GetStringFromName("charsetMenuAutodet.key") - ); - parent.appendChild(menuNode); - - let menuPopupNode = doc.createXULElement("menupopup"); - menuNode.appendChild(menuPopupNode); - menuPopupNode.addEventListener("command", SetDetector); - menuPopupNode.addEventListener("popupshown", UpdateDetectorMenu); - - gDetectorInfoCache.forEach(detectorInfo => - menuPopupNode.appendChild(createDOMNode(doc, detectorInfo)) - ); - parent.appendChild(doc.createXULElement("menuseparator")); - } - gPinnedInfoCache.forEach(charsetInfo => parent.appendChild(createDOMNode(doc, charsetInfo)) ); @@ -183,29 +132,18 @@ var CharsetMenu = { getData() { this._ensureDataReady(); return { - detectors: gDetectorInfoCache, pinnedCharsets: gPinnedInfoCache, otherCharsets: gCharsetInfoCache, }; }, _ensureDataReady() { - if (!gDetectorInfoCache) { - gDetectorInfoCache = this.getDetectorInfo(); + if (!gCharsetInfoCache) { gPinnedInfoCache = this.getCharsetInfo(kPinned, false); gCharsetInfoCache = this.getCharsetInfo(kEncodings); } }, - getDetectorInfo() { - return kAutoDetectors.map(([detectorName, nodeId]) => ({ - label: this._getDetectorLabel(detectorName), - accesskey: this._getDetectorAccesskey(detectorName), - name: "detector", - value: nodeId, - })); - }, - getCharsetInfo(charsets, sort = true) { let list = Array.from(charsets, charset => ({ label: this._getCharsetLabel(charset), @@ -220,21 +158,6 @@ var CharsetMenu = { return list; }, - _getDetectorLabel(detector) { - try { - return gBundle.GetStringFromName("charsetMenuAutodet." + detector); - } catch (ex) {} - return detector; - }, - _getDetectorAccesskey(detector) { - try { - return gBundle.GetStringFromName( - "charsetMenuAutodet." + detector + ".key" - ); - } catch (ex) {} - return ""; - }, - _getCharsetLabel(charset) { if (charset == "GBK") { // Localization key has been revised diff --git a/tools/lint/file-whitespace.yml b/tools/lint/file-whitespace.yml index edd747e70be7..3d2b3584feae 100644 --- a/tools/lint/file-whitespace.yml +++ b/tools/lint/file-whitespace.yml @@ -23,7 +23,6 @@ file-whitespace: - dom/bindings/parser/tests/test_special_methods.py - dom/bindings/parser/tests/test_toJSON.py - dom/bindings/parser/tests/test_typedef.py - - dom/encoding/encodings2arrays.py - dom/media/gtest/AudioGenerator.cpp - dom/media/gtest/AudioGenerator.h - dom/security/test/csp/file_websocket_self_wsh.py