Backed out changeset 4891a17c55e2 (bug 1713627) for Browser-chrome failures in docshell/test/browser/browser_bug673087-1.js. CLOSED TREE

This commit is contained in:
Dorel Luca 2021-06-21 12:10:54 +03:00
Родитель 7292512649
Коммит 2118316ba4
82 изменённых файлов: 2538 добавлений и 139 удалений

21
Cargo.lock сгенерированный
Просмотреть файл

@ -2025,6 +2025,7 @@ dependencies = [
"rusqlite",
"rust_minidump_writer_linux",
"rustc_version",
"shift_or_euc_c",
"static_prefs",
"storage",
"unic-langid",
@ -4631,6 +4632,26 @@ dependencies = [
"opaque-debug",
]
[[package]]
name = "shift_or_euc"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f930dea4685b9803954b9d74cdc175c6d946a22f2eafe5aa2e9a58cdcae7da8c"
dependencies = [
"encoding_rs",
"memchr",
]
[[package]]
name = "shift_or_euc_c"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c81ec08c8a68c45c48d8ef58b80ce038cc9945891c4a4996761e2ec5cba05abc"
dependencies = [
"encoding_rs",
"shift_or_euc",
]
[[package]]
name = "shlex"
version = "0.1.1"

Просмотреть файл

@ -98,8 +98,9 @@ class BrowserTabChild extends JSWindowActorChild {
} catch (e) {}
break;
case "ForceEncodingDetection":
docShell.forceEncodingDetection();
case "UpdateCharacterSet":
docShell.charset = message.data.value;
docShell.gatherCharsetMenuTelemetry();
break;
}
}

Просмотреть файл

@ -547,6 +547,16 @@
</vbox>
</panelview>
<panelview id="PanelUI-characterEncodingView" flex="1">
<vbox class="panel-subview-body">
<vbox id="PanelUI-characterEncodingView-pinned"
class="PanelUI-characterEncodingView-list"/>
<toolbarseparator/>
<vbox id="PanelUI-characterEncodingView-charsets"
class="PanelUI-characterEncodingView-list"/>
</vbox>
</panelview>
<panelview id="PanelUI-panicView" flex="1"
descriptionheightworkaround="true">
<vbox class="panel-subview-body">
@ -585,6 +595,11 @@
class="subviewbutton subviewbutton-iconic"
data-l10n-id="appmenu-taskmanager"
oncommand="switchToTabHavingURI('about:performance', true)"/>
<toolbarbutton id="appMenu-characterencoding-button"
class="subviewbutton subviewbutton-nav"
label="&charsetMenu2.label;"
closemenu="none"
oncommand="PanelUI.showSubView('PanelUI-characterEncodingView', this)"/>
<toolbarbutton id="appMenu-workoffline-button"
class="subviewbutton"
data-l10n-id="more-menu-go-offline"

Просмотреть файл

@ -2,6 +2,8 @@
%brandDTD;
<!ENTITY % browserDTD SYSTEM "chrome://browser/locale/browser.dtd" >
%browserDTD;
<!ENTITY % charsetDTD SYSTEM "chrome://global/locale/charsetMenu.dtd" >
%charsetDTD;
<!ENTITY % textcontextDTD SYSTEM "chrome://global/locale/textcontext.dtd" >
%textcontextDTD;
<!ENTITY % placesDTD SYSTEM "chrome://browser/locale/places/places.dtd">

Просмотреть файл

@ -143,7 +143,8 @@
</menu>
<menu id="view-menu" data-l10n-id="menu-view">
<menupopup id="menu_viewPopup">
<menupopup id="menu_viewPopup"
onpopupshowing="updateCharacterEncodingMenuState();">
<menu id="viewToolbarsMenu" data-l10n-id="menu-view-toolbars-menu">
<menupopup id="view-menu-popup" onpopupshowing="onViewToolbarsPopupShowing(event);">
<menuseparator/>
@ -202,7 +203,7 @@
</menu>
<menuitem id="repair-text-encoding"
disabled="true"
oncommand="BrowserForceEncodingDetection();"
oncommand="BrowserSetForcedCharacterSet('_autodetect_all')"
data-l10n-id="menu-view-repair-text-encoding"/>
<menuseparator/>
#ifdef XP_MACOSX

Просмотреть файл

@ -27,6 +27,7 @@ XPCOMUtils.defineLazyModuleGetters(this, {
BrowserUtils: "resource://gre/modules/BrowserUtils.jsm",
BrowserWindowTracker: "resource:///modules/BrowserWindowTracker.jsm",
CFRPageActions: "resource://activity-stream/lib/CFRPageActions.jsm",
CharsetMenu: "resource://gre/modules/CharsetMenu.jsm",
Color: "resource://gre/modules/Color.jsm",
ContextualIdentityService:
"resource://gre/modules/ContextualIdentityService.jsm",
@ -4883,6 +4884,24 @@ function updateUserContextUIIndicator() {
hbox.hidden = false;
}
/**
* Makes the Character Encoding menu enabled or disabled as appropriate.
* To be called when the View menu or the app menu is opened.
*/
function updateCharacterEncodingMenuState() {
let charsetMenu = document.getElementById("charsetMenu");
// gBrowser is null on Mac when the menubar shows in the context of
// non-browser windows. The above elements may be null depending on
// what parts of the menubar are present. E.g. no app menu on Mac.
if (gBrowser && gBrowser.selectedBrowser.mayEnableCharacterEncodingMenu) {
if (charsetMenu) {
charsetMenu.removeAttribute("disabled");
}
} else if (charsetMenu) {
charsetMenu.setAttribute("disabled", "true");
}
}
var XULBrowserWindow = {
// Stored Status, Link and Loading values
status: "",
@ -7059,11 +7078,37 @@ function handleDroppedLink(
}
}
function BrowserForceEncodingDetection() {
gBrowser.selectedBrowser.forceEncodingDetection();
function BrowserSetForcedCharacterSet(aCharset) {
if (aCharset) {
if (aCharset == "Japanese") {
aCharset = "Shift_JIS";
}
gBrowser.selectedBrowser.characterSet = aCharset;
// Save the forced character-set
PlacesUIUtils.setCharsetForPage(
gBrowser.currentURI,
aCharset,
window
).catch(Cu.reportError);
}
BrowserCharsetReload();
}
function BrowserCharsetReload() {
BrowserReloadWithFlags(Ci.nsIWebNavigation.LOAD_FLAGS_CHARSET_CHANGE);
}
function UpdateCurrentCharset(target) {
let selectedCharset = CharsetMenu.foldCharset(
gBrowser.selectedBrowser.characterSet,
gBrowser.selectedBrowser.charsetAutodetected
);
for (let menuItem of target.getElementsByTagName("menuitem")) {
let isSelected = menuItem.getAttribute("charset") === selectedCharset;
menuItem.setAttribute("checked", isSelected);
}
}
var ToolbarContextMenu = {
updateDownloadsAutoHide(popup) {
let checkbox = document.getElementById(

Просмотреть файл

@ -439,7 +439,7 @@ const CustomizableWidgets = [
id: "characterencoding-button",
l10nId: "repair-text-encoding-button",
onCommand(aEvent) {
aEvent.view.BrowserForceEncodingDetection();
aEvent.view.BrowserSetForcedCharacterSet("_autodetect_all");
},
},
{

Просмотреть файл

@ -370,6 +370,7 @@ nsDocShell::nsDocShell(BrowsingContext* aBrowsingContext,
: nsDocLoader(true),
mContentWindowID(aContentWindowID),
mBrowsingContext(aBrowsingContext),
mForcedCharset(nullptr),
mParentCharset(nullptr),
mTreeOwner(nullptr),
mScrollbarPref(ScrollbarPreference::Auto),
@ -1526,7 +1527,7 @@ nsDocShell::GetCharset(nsACString& aCharset) {
}
NS_IMETHODIMP
nsDocShell::ForceEncodingDetection() {
nsDocShell::GatherCharsetMenuTelemetry() {
nsCOMPtr<nsIContentViewer> viewer;
GetContentViewer(getter_AddRefs(viewer));
if (!viewer) {
@ -1538,11 +1539,15 @@ nsDocShell::ForceEncodingDetection() {
return NS_OK;
}
mForcedAutodetection = true;
LOGCHARSETMENU(("ENCODING_OVERRIDE_USED_AUTOMATIC"));
Telemetry::ScalarSet(Telemetry::ScalarID::ENCODING_OVERRIDE_USED_AUTOMATIC,
true);
if (mForcedAutodetection) {
LOGCHARSETMENU(("ENCODING_OVERRIDE_USED_AUTOMATIC"));
Telemetry::ScalarSet(Telemetry::ScalarID::ENCODING_OVERRIDE_USED_AUTOMATIC,
true);
} else {
LOGCHARSETMENU(("ENCODING_OVERRIDE_USED_MANUAL"));
Telemetry::ScalarSet(Telemetry::ScalarID::ENCODING_OVERRIDE_USED_MANUAL,
true);
}
nsIURI* url = doc->GetOriginalURI();
bool isFileURL = url && SchemeIsFile(url);
@ -1556,6 +1561,28 @@ nsDocShell::ForceEncodingDetection() {
Telemetry::AccumulateCategorical(
Telemetry::LABELS_ENCODING_OVERRIDE_SITUATION_2::AutoOverridden);
break;
case kCharsetFromUserForced:
case kCharsetFromUserForcedJapaneseAutoDetection:
LOGCHARSETMENU(("ManuallyOverridden"));
Telemetry::AccumulateCategorical(
Telemetry::LABELS_ENCODING_OVERRIDE_SITUATION_2::ManuallyOverridden);
break;
case kCharsetFromTopLevelDomain:
if (encoding == WINDOWS_1252_ENCODING) {
LOGCHARSETMENU(("UnlabeledInLk"));
Telemetry::AccumulateCategorical(
Telemetry::LABELS_ENCODING_OVERRIDE_SITUATION_2::UnlabeledInLk);
} else {
LOGCHARSETMENU(("UnlabeledJp"));
Telemetry::AccumulateCategorical(
Telemetry::LABELS_ENCODING_OVERRIDE_SITUATION_2::UnlabeledJp);
}
break;
case kCharsetFromFinalJapaneseAutoDetection:
LOGCHARSETMENU(("UnlabeledJp"));
Telemetry::AccumulateCategorical(
Telemetry::LABELS_ENCODING_OVERRIDE_SITUATION_2::UnlabeledJp);
break;
case kCharsetFromInitialAutoDetectionASCII:
// Deliberately no final version
LOGCHARSETMENU(("UnlabeledAscii"));
@ -1630,6 +1657,31 @@ nsDocShell::ForceEncodingDetection() {
return NS_OK;
}
NS_IMETHODIMP
nsDocShell::SetCharset(const nsACString& aCharset) {
mForcedAutodetection = false;
if (aCharset.IsEmpty()) {
mForcedCharset = nullptr;
return NS_OK;
}
if (aCharset.EqualsLiteral("_autodetect_all")) {
mForcedCharset = WINDOWS_1252_ENCODING;
mForcedAutodetection = true;
return NS_OK;
}
const Encoding* encoding = Encoding::ForLabel(aCharset);
if (!encoding) {
// Reject unknown labels
return NS_ERROR_INVALID_ARG;
}
if (!encoding->IsAsciiCompatible() && encoding != ISO_2022_JP_ENCODING) {
// Reject XSS hazards
return NS_ERROR_INVALID_ARG;
}
mForcedCharset = encoding;
return NS_OK;
}
void nsDocShell::SetParentCharset(const Encoding*& aCharset,
int32_t aCharsetSource,
nsIPrincipal* aPrincipal) {
@ -1990,6 +2042,30 @@ nsDocShell::GetMayEnableCharacterEncodingMenu(
return NS_OK;
}
NS_IMETHODIMP
nsDocShell::GetCharsetAutodetected(bool* aCharsetAutodetected) {
*aCharsetAutodetected = false;
if (!mContentViewer) {
return NS_OK;
}
Document* doc = mContentViewer->GetDocument();
if (!doc) {
return NS_OK;
}
int32_t source = doc->GetDocumentCharacterSetSource();
if ((source >= kCharsetFromInitialAutoDetectionASCII &&
source <= kCharsetFromFinalAutoDetectionFile) ||
source == kCharsetFromUserForcedJapaneseAutoDetection ||
source == kCharsetFromPendingUserForcedAutoDetection ||
source == kCharsetFromInitialUserForcedAutoDetection ||
source == kCharsetFromFinalUserForcedAutoDetection) {
*aCharsetAutodetected = true;
}
return NS_OK;
}
NS_IMETHODIMP
nsDocShell::GetAllDocShellsInSubtree(int32_t aItemType,
DocShellEnumeratorDirection aDirection,

Просмотреть файл

@ -327,9 +327,9 @@ class nsDocShell final : public nsDocLoader,
void SetInFrameSwap(bool aInSwap) { mInFrameSwap = aInSwap; }
bool InFrameSwap();
bool GetForcedAutodetection() { return mForcedAutodetection; }
const mozilla::Encoding* GetForcedCharset() { return mForcedCharset; }
void ResetForcedAutodetection() { mForcedAutodetection = false; }
bool GetForcedAutodetection() { return mForcedAutodetection; }
mozilla::HTMLEditor* GetHTMLEditorInternal();
nsresult SetHTMLEditorInternal(mozilla::HTMLEditor* aHTMLEditor);
@ -1189,6 +1189,7 @@ class nsDocShell final : public nsDocLoader,
mozilla::UniquePtr<mozilla::gfx::Matrix5x4> mColorMatrix;
const mozilla::Encoding* mForcedCharset;
const mozilla::Encoding* mParentCharset;
// WEAK REFERENCES BELOW HERE.

Просмотреть файл

@ -494,10 +494,17 @@ interface nsIDocShell : nsIDocShellTreeItem
/**
* Upon getting, returns the canonical encoding label of the document
* currently loaded into this docshell.
*
* Upon setting, sets the forced encoding for compatibility with legacy callers.
*/
readonly attribute ACString charset;
attribute ACString charset;
void forceEncodingDetection();
/**
* Called when the user chose an encoding override from the character
* encoding menu. Separate from the setter for the charset property to avoid
* extensions adding noise to the data.
*/
void gatherCharsetMenuTelemetry();
/**
* In a child docshell, this is the charset of the parent docshell
@ -613,6 +620,11 @@ interface nsIDocShell : nsIDocShellTreeItem
*/
[infallible] readonly attribute boolean mayEnableCharacterEncodingMenu;
/**
* Indicates that the character encoding was autodetected.
*/
[infallible] readonly attribute boolean charsetAutodetected;
attribute nsIEditor editor;
readonly attribute boolean editable; /* this docShell is editable */
readonly attribute boolean hasEditingSession; /* this docShell has an editing session */

Просмотреть файл

@ -21,6 +21,9 @@ support-files =
file_bug234628-6-child.html
file_bug234628-6-child.html^headers^
file_bug234628-6.html
file_bug234628-7-child.html
file_bug234628-7-child.html^headers^
file_bug234628-7.html
file_bug234628-8-child.html
file_bug234628-8.html
file_bug234628-9-child.html
@ -40,8 +43,14 @@ support-files =
file_bug1328501.html
file_bug1328501_frame.html
file_bug1328501_framescript.js
file_bug1543077-1-child.html
file_bug1543077-1.html
file_bug1543077-2-child.html
file_bug1543077-2.html
file_bug1543077-3-child.html
file_bug1543077-3.html
file_bug1543077-4-child.html
file_bug1543077-4.html
file_multiple_pushState.html
file_onbeforeunload_0.html
file_onbeforeunload_1.html
@ -76,7 +85,10 @@ skip-if =
os == "linux" && bits == 64 && !debug # Bug 1607713
fission && os == "mac" && debug # Bug 1713903 - new Fission platform triage
[browser_backforward_userinteraction_about.js]
[browser_bug1543077-1.js]
[browser_bug1543077-2.js]
[browser_bug1543077-3.js]
[browser_bug1543077-4.js]
[browser_bug1594938.js]
[browser_bug1206879.js]
[browser_bug1309900_crossProcessHistoryNavigation.js]
@ -106,6 +118,7 @@ skip-if = !fission || !crashreporter # On a crash we only keep history when fiss
[browser_bug234628-4.js]
[browser_bug234628-5.js]
[browser_bug234628-6.js]
[browser_bug234628-7.js]
[browser_bug234628-8.js]
[browser_bug234628-9.js]
[browser_bug349769.js]

Просмотреть файл

@ -29,7 +29,7 @@ function afterOpen() {
content.document.getElementById("testinput").value = TEXT.enteredText2;
}).then(() => {
/* Force the page encoding to Shift_JIS */
BrowserForceEncodingDetection();
BrowserSetForcedCharacterSet("Shift_JIS");
});
}

Просмотреть файл

@ -0,0 +1,47 @@
function test() {
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
runCharsetTest(
rootDir + "file_bug1543077-1.html",
afterOpen,
"Japanese",
afterChangeCharset
);
}
function afterOpen() {
is(
content.document.documentElement.textContent.indexOf("\u0434"),
131,
"Parent doc should be IBM866 initially"
);
is(
content.frames[0].document.documentElement.textContent.indexOf("\u0434"),
87,
"Child doc should be IBM866 initially"
);
}
function afterChangeCharset() {
is(
content.document.documentElement.textContent.indexOf("\u3042"),
131,
"Parent doc should decode as EUC-JP subsequently"
);
is(
content.frames[0].document.documentElement.textContent.indexOf("\u3042"),
87,
"Child doc should decode as EUC-JP subsequently"
);
is(
content.document.characterSet,
"EUC-JP",
"Parent doc should report EUC-JP subsequently"
);
is(
content.frames[0].document.characterSet,
"EUC-JP",
"Child doc should report EUC-JP subsequently"
);
}

Просмотреть файл

@ -0,0 +1,47 @@
function test() {
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
runCharsetTest(
rootDir + "file_bug1543077-2.html",
afterOpen,
"Japanese",
afterChangeCharset
);
}
function afterOpen() {
is(
content.document.documentElement.textContent.indexOf("\u0412"),
134,
"Parent doc should be IBM866 initially"
);
is(
content.frames[0].document.documentElement.textContent.indexOf("\u0412"),
90,
"Child doc should be IBM866 initially"
);
}
function afterChangeCharset() {
is(
content.document.documentElement.textContent.indexOf("\u3042"),
134,
"Parent doc should decode as Shift_JIS subsequently"
);
is(
content.frames[0].document.documentElement.textContent.indexOf("\u3042"),
90,
"Child doc should decode as Shift_JIS subsequently"
);
is(
content.document.characterSet,
"Shift_JIS",
"Parent doc should report Shift_JIS subsequently"
);
is(
content.frames[0].document.characterSet,
"Shift_JIS",
"Child doc should report Shift_JIS subsequently"
);
}

Просмотреть файл

@ -3,6 +3,7 @@ function test() {
runCharsetTest(
rootDir + "file_bug1543077-3.html",
afterOpen,
"Japanese",
afterChangeCharset
);
}

Просмотреть файл

@ -0,0 +1,47 @@
function test() {
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
runCharsetTest(
rootDir + "file_bug1543077-4.html",
afterOpen,
"Japanese",
afterChangeCharset
);
}
function afterOpen() {
is(
content.document.documentElement.textContent.indexOf("\u0434"),
131,
"Parent doc should be IBM866 initially"
);
is(
content.frames[0].document.documentElement.textContent.indexOf("\u0412"),
90,
"Child doc should be IBM866 initially"
);
}
function afterChangeCharset() {
is(
content.document.documentElement.textContent.indexOf("\u3042"),
131,
"Parent doc should decode as EUC-JP subsequently"
);
is(
content.frames[0].document.documentElement.textContent.indexOf("\u3042"),
90,
"Child doc should decode as Shift_JIS subsequently"
);
is(
content.document.characterSet,
"EUC-JP",
"Parent doc should report EUC-JP subsequently"
);
is(
content.frames[0].document.characterSet,
"Shift_JIS",
"Child doc should report Shift_JIS subsequently"
);
}

Просмотреть файл

@ -3,6 +3,7 @@ function test() {
runCharsetTest(
rootDir + "file_bug1648464-1.html",
afterOpen,
"_autodetect_all",
afterChangeCharset
);
}

Просмотреть файл

@ -3,6 +3,7 @@ function test() {
runCharsetTest(
rootDir + "file_bug1688368-1.sjs",
afterOpen,
"UTF-8",
afterChangeCharset
);
}

Просмотреть файл

@ -3,6 +3,7 @@ function test() {
runCharsetTest(
rootDir + "file_bug234628-1.html",
afterOpen,
"windows-1251",
afterChangeCharset
);
}
@ -23,25 +24,24 @@ function afterOpen() {
function afterChangeCharset() {
is(
content.document.documentElement.textContent.indexOf("\u20AC"),
content.document.documentElement.textContent.indexOf("\u0402"),
129,
"Parent doc should be windows-1252 subsequently"
"Parent doc should decode as windows-1251 subsequently"
);
is(
content.frames[0].document.documentElement.textContent.indexOf("\u20AC"),
content.frames[0].document.documentElement.textContent.indexOf("\u0402"),
85,
"Child doc should be windows-1252 subsequently"
"Child doc should decode as windows-1251 subsequently"
);
is(
content.document.characterSet,
"windows-1252",
"Parent doc should report windows-1252 subsequently"
"windows-1251",
"Parent doc should report windows-1251 subsequently"
);
is(
content.frames[0].document.characterSet,
"windows-1252",
"Child doc should report windows-1252 subsequently"
"windows-1251",
"Child doc should report windows-1251 subsequently"
);
}

Просмотреть файл

@ -3,6 +3,7 @@ function test() {
runCharsetTest(
rootDir + "file_bug234628-10.html",
afterOpen,
"windows-1251",
afterChangeCharset
);
}
@ -23,9 +24,9 @@ function afterOpen() {
function afterChangeCharset() {
is(
content.document.documentElement.textContent.indexOf("\u20AC"),
content.document.documentElement.textContent.indexOf("\u0402"),
151,
"Parent doc should be windows-1252 initially"
"Parent doc should decode as windows-1251 subsequently"
);
is(
content.frames[0].document.documentElement.textContent.indexOf("\u20AC"),
@ -35,8 +36,8 @@ function afterChangeCharset() {
is(
content.document.characterSet,
"windows-1252",
"Parent doc should report windows-1252 subsequently"
"windows-1251",
"Parent doc should report windows-1251 subsequently"
);
is(
content.frames[0].document.characterSet,

Просмотреть файл

@ -3,6 +3,7 @@ function test() {
runCharsetTest(
rootDir + "file_bug234628-11.html",
afterOpen,
"windows-1251",
afterChangeCharset
);
}
@ -23,9 +24,9 @@ function afterOpen() {
function afterChangeCharset() {
is(
content.document.documentElement.textContent.indexOf("\u20AC"),
content.document.documentElement.textContent.indexOf("\u0402"),
193,
"Parent doc should be windows-1252 subsequently"
"Parent doc should decode as windows-1251 subsequently"
);
is(
content.frames[0].document.documentElement.textContent.indexOf("\u20AC"),
@ -35,8 +36,8 @@ function afterChangeCharset() {
is(
content.document.characterSet,
"windows-1252",
"Parent doc should report windows-1252 subsequently"
"windows-1251",
"Parent doc should report windows-1251 subsequently"
);
is(
content.frames[0].document.characterSet,

Просмотреть файл

@ -3,6 +3,7 @@ function test() {
runCharsetTest(
rootDir + "file_bug234628-2.html",
afterOpen,
"windows-1251",
afterChangeCharset
);
}
@ -25,25 +26,26 @@ function afterOpen() {
function afterChangeCharset() {
is(
content.document.documentElement.textContent.indexOf("\u20AC"),
content.document.documentElement.textContent.indexOf("\u0402"),
129,
"Parent doc should be windows-1252 subsequently"
"Parent doc should decode as windows-1251 subsequently"
);
is(
content.frames[0].document.documentElement.textContent.indexOf("\u20AC"),
content.frames[0].document.documentElement.textContent.indexOf(
"\u0432\u201A\u00AC"
),
78,
"Child doc should be UTF-8 subsequently"
"Child doc should decode as windows-1251 subsequently"
);
is(
content.document.characterSet,
"windows-1252",
"Parent doc should report windows-1252 subsequently"
"windows-1251",
"Parent doc should report windows-1251 subsequently"
);
is(
content.frames[0].document.characterSet,
"UTF-8",
"Child doc should report UTF-8 subsequently"
"windows-1251",
"Child doc should report windows-1251 subsequently"
);
}

Просмотреть файл

@ -3,6 +3,7 @@ function test() {
runCharsetTest(
rootDir + "file_bug234628-3.html",
afterOpen,
"windows-1251",
afterChangeCharset
);
}
@ -23,25 +24,26 @@ function afterOpen() {
function afterChangeCharset() {
is(
content.document.documentElement.textContent.indexOf("\u20AC"),
content.document.documentElement.textContent.indexOf("\u0402"),
118,
"Parent doc should be windows-1252 subsequently"
"Parent doc should decode as windows-1251 subsequently"
);
is(
content.frames[0].document.documentElement.textContent.indexOf("\u20AC"),
content.frames[0].document.documentElement.textContent.indexOf(
"\u0432\u201A\u00AC"
),
73,
"Child doc should be utf-8 subsequently"
"Child doc should decode as windows-1251 subsequently"
);
is(
content.document.characterSet,
"windows-1252",
"Parent doc should report windows-1252 subsequently"
"windows-1251",
"Parent doc should report windows-1251 subsequently"
);
is(
content.frames[0].document.characterSet,
"UTF-8",
"Child doc should report UTF-8 subsequently"
"windows-1251",
"Child doc should report windows-1251 subsequently"
);
}

Просмотреть файл

@ -3,6 +3,7 @@ function test() {
runCharsetTest(
rootDir + "file_bug234628-4.html",
afterOpen,
"windows-1251",
afterChangeCharset
);
}
@ -23,9 +24,9 @@ function afterOpen() {
function afterChangeCharset() {
is(
content.document.documentElement.textContent.indexOf("\u20AC"),
content.document.documentElement.textContent.indexOf("\u0402"),
132,
"Parent doc should decode as windows-1252 subsequently"
"Parent doc should decode as windows-1251 subsequently"
);
is(
content.frames[0].document.documentElement.textContent.indexOf("\u20AC"),
@ -35,8 +36,8 @@ function afterChangeCharset() {
is(
content.document.characterSet,
"windows-1252",
"Parent doc should report windows-1252 subsequently"
"windows-1251",
"Parent doc should report windows-1251 subsequently"
);
is(
content.frames[0].document.characterSet,

Просмотреть файл

@ -3,6 +3,7 @@ function test() {
runCharsetTest(
rootDir + "file_bug234628-5.html",
afterOpen,
"windows-1251",
afterChangeCharset
);
}
@ -23,9 +24,9 @@ function afterOpen() {
function afterChangeCharset() {
is(
content.document.documentElement.textContent.indexOf("\u20AC"),
content.document.documentElement.textContent.indexOf("\u0402"),
146,
"Parent doc should be windows-1252 subsequently"
"Parent doc should decode as windows-1251 subsequently"
);
is(
content.frames[0].document.documentElement.textContent.indexOf("\u20AC"),
@ -35,8 +36,8 @@ function afterChangeCharset() {
is(
content.document.characterSet,
"windows-1252",
"Parent doc should report windows-1252 subsequently"
"windows-1251",
"Parent doc should report windows-1251 subsequently"
);
is(
content.frames[0].document.characterSet,

Просмотреть файл

@ -3,6 +3,7 @@ function test() {
runCharsetTest(
rootDir + "file_bug234628-6.html",
afterOpen,
"windows-1251",
afterChangeCharset
);
}
@ -23,21 +24,20 @@ function afterOpen() {
function afterChangeCharset() {
is(
content.document.documentElement.textContent.indexOf("\u20AC"),
content.document.documentElement.textContent.indexOf("\u0402"),
190,
"Parent doc should be windows-1252 subsequently"
"Parent doc should decode as windows-1251 subsequently"
);
is(
content.frames[0].document.documentElement.textContent.indexOf("\u20AC"),
109,
"Child doc should be utf-16 subsequently"
"Child doc should decode as utf-16 subsequently"
);
is(
content.document.characterSet,
"windows-1252",
"Parent doc should report windows-1252 subsequently"
"windows-1251",
"Parent doc should report windows-1251 subsequently"
);
is(
content.frames[0].document.characterSet,

Просмотреть файл

@ -0,0 +1,49 @@
function test() {
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
runCharsetTest(
rootDir + "file_bug234628-7.html",
afterOpen,
"windows-1251",
afterChangeCharset
);
}
function afterOpen() {
is(
content.document.documentElement.textContent.indexOf("\u20AC"),
188,
"Parent doc should be windows-1252 initially"
);
is(
content.frames[0].document.documentElement.textContent.indexOf("\u20AC"),
107,
"Child doc should be utf-8 initially"
);
}
function afterChangeCharset() {
is(
content.document.documentElement.textContent.indexOf("\u0402"),
188,
"Parent doc should decode as windows-1251 subsequently"
);
is(
content.frames[0].document.documentElement.textContent.indexOf(
"\u0432\u201A\u00AC"
),
107,
"Child doc should decode as windows-1251 subsequently"
);
is(
content.document.characterSet,
"windows-1251",
"Parent doc should report windows-1251 subsequently"
);
is(
content.frames[0].document.characterSet,
"windows-1251",
"Child doc should report windows-1251 subsequently"
);
}

Просмотреть файл

@ -1,6 +1,6 @@
function test() {
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
runCharsetCheck(rootDir + "file_bug234628-8.html", afterOpen);
runCharsetTest(rootDir + "file_bug234628-8.html", afterOpen);
}
function afterOpen() {

Просмотреть файл

@ -1,6 +1,6 @@
function test() {
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
runCharsetCheck(rootDir + "file_bug234628-9.html", afterOpen);
runCharsetTest(rootDir + "file_bug234628-9.html", afterOpen);
}
function afterOpen() {

Просмотреть файл

@ -3,6 +3,7 @@ function test() {
runCharsetTest(
rootDir + "file_bug673087-2.html",
afterOpen,
"windows-1252",
afterChangeCharset
);
}

Просмотреть файл

@ -34,7 +34,7 @@ function afterOpen() {
/* Test that the content on load is the expected wrong decoding */
testContent(wrongText).then(() => {
BrowserForceEncodingDetection();
BrowserSetForcedCharacterSet("Shift_JIS");
});
}

Просмотреть файл

@ -0,0 +1,11 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta content="width=device-width, initial-scale=1" name="viewport">
<title>No encoding declaration in parent or child</title>
</head>
<body>
<p>Hiragana letter a if decoded as EUC-JP: ¤¢</p>
</body>
</html>

Просмотреть файл

@ -0,0 +1,16 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta content="width=device-width, initial-scale=1" name="viewport">
<title>No encoding declaration in parent or child</title>
</head>
<body>
<h1>No encoding declaration in parent or child</h1>
<p>Hiragana letter a if decoded as EUC-JP: ¤¢</p>
<iframe src="file_bug1543077-1-child.html"></iframe>
</body>
</html>

Просмотреть файл

@ -0,0 +1,11 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta content="width=device-width, initial-scale=1" name="viewport">
<title>No encoding declaration in parent or child</title>
</head>
<body>
<p>Hiragana letter a if decoded as Shift_JIS:  </p>
</body>
</html>

Просмотреть файл

@ -0,0 +1,16 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta content="width=device-width, initial-scale=1" name="viewport">
<title>No encoding declaration in parent or child</title>
</head>
<body>
<h1>No encoding declaration in parent or child</h1>
<p>Hiragana letter a if decoded as Shift_JIS:  </p>
<iframe src="file_bug1543077-2-child.html"></iframe>
</body>
</html>

Просмотреть файл

@ -0,0 +1,11 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta content="width=device-width, initial-scale=1" name="viewport">
<title>No encoding declaration in parent or child</title>
</head>
<body>
<p>Hiragana letter a if decoded as Shift_JIS:  </p>
</body>
</html>

Просмотреть файл

@ -0,0 +1,16 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta content="width=device-width, initial-scale=1" name="viewport">
<title>No encoding declaration in parent or child</title>
</head>
<body>
<h1>No encoding declaration in parent or child</h1>
<p>Hiragana letter a if decoded as EUC-JP: ¤¢</p>
<iframe src="file_bug1543077-4-child.html"></iframe>
</body>
</html>

Просмотреть файл

@ -0,0 +1,12 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta content="width=device-width, initial-scale=1" name="viewport">
<title>meta declaration in parent and BOMless UTF-8 with HTTP charset in child</title>
</head>
<body>
<p>Euro sign if decoded as UTF-8: €</p>
<p>a with diaeresis if decoded as UTF-8: ä</p>
</body>
</html>

Просмотреть файл

@ -0,0 +1 @@
Content-Type: text/html; charset=utf-8

Просмотреть файл

@ -0,0 +1,18 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="windows-1252">
<meta content="width=device-width, initial-scale=1" name="viewport">
<title>meta declaration in parent and BOMless UTF-8 with HTTP charset in child</title>
</head>
<body>
<h1>meta declaration in parent and BOMless UTF-8 with HTTP charset in child</h1>
<p>Euro sign if decoded as Windows-1252: €</p>
<p>a with diaeresis if decoded as Windows-1252: ä</p>
<iframe src="file_bug234628-7-child.html"></iframe>
</body>
</html>

Просмотреть файл

@ -63,22 +63,34 @@ function timelineTestOpenUrl(url) {
}
/**
* Helper function for encoding override tests, loads URL, runs check1,
* forces encoding detection, runs check2.
* Helper function for charset tests. It loads |url| in a new tab,
* runs |check1| in a ContentTask when the page is ready, switches the
* charset to |charset|, and then runs |check2| in a ContentTask when
* the page has finished reloading.
*
* |charset| and |check2| can be omitted, in which case the test
* finishes when |check1| completes.
*/
function runCharsetTest(url, check1, check2) {
function runCharsetTest(url, check1, charset, check2) {
waitForExplicitFinish();
BrowserTestUtils.openNewForegroundTab(gBrowser, url, true).then(afterOpen);
function afterOpen() {
BrowserTestUtils.browserLoaded(gBrowser.selectedBrowser).then(
afterChangeCharset
);
if (charset) {
BrowserTestUtils.browserLoaded(gBrowser.selectedBrowser).then(
afterChangeCharset
);
SpecialPowers.spawn(gBrowser.selectedBrowser, [], check1).then(() => {
BrowserForceEncodingDetection();
});
SpecialPowers.spawn(gBrowser.selectedBrowser, [], check1).then(() => {
BrowserSetForcedCharacterSet(charset);
});
} else {
SpecialPowers.spawn(gBrowser.selectedBrowser, [], check1).then(() => {
gBrowser.removeCurrentTab();
finish();
});
}
}
function afterChangeCharset() {
@ -89,23 +101,6 @@ function runCharsetTest(url, check1, check2) {
}
}
/**
* Helper function for charset tests. It loads |url| in a new tab,
* runs |check|.
*/
function runCharsetCheck(url, check) {
waitForExplicitFinish();
BrowserTestUtils.openNewForegroundTab(gBrowser, url, true).then(afterOpen);
function afterOpen() {
SpecialPowers.spawn(gBrowser.selectedBrowser, [], check).then(() => {
gBrowser.removeCurrentTab();
finish();
});
}
}
async function pushState(url, frameId) {
info(
`Doing a pushState, expecting to load ${url} ${

Просмотреть файл

@ -362,11 +362,8 @@ nsDOMWindowUtils::GetDocCharsetIsForced(bool* aIsForced) {
*aIsForced = false;
Document* doc = GetDocument();
if (doc) {
auto source = doc->GetDocumentCharacterSetSource();
*aIsForced = source == kCharsetFromInitialUserForcedAutoDetection ||
source == kCharsetFromFinalUserForcedAutoDetection;
}
*aIsForced =
doc && doc->GetDocumentCharacterSetSource() >= kCharsetFromUserForced;
return NS_OK;
}

Просмотреть файл

@ -206,7 +206,7 @@ void nsHTMLDocument::TryUserForcedCharset(nsIContentViewer* aCv,
nsIDocShell* aDocShell,
int32_t& aCharsetSource,
NotNull<const Encoding*>& aEncoding) {
if (aCharsetSource >= kCharsetFromXmlDeclarationUtf16) {
if (kCharsetFromUserForced <= aCharsetSource) {
return;
}
@ -215,11 +215,20 @@ void nsHTMLDocument::TryUserForcedCharset(nsIContentViewer* aCv,
return;
}
if (aDocShell && nsDocShell::Cast(aDocShell)->GetForcedAutodetection()) {
if (aDocShell) {
// This is the Character Encoding menu code path in Firefox
aEncoding = WINDOWS_1252_ENCODING;
aCharsetSource = kCharsetFromPendingUserForcedAutoDetection;
nsDocShell::Cast(aDocShell)->ResetForcedAutodetection();
auto encoding = nsDocShell::Cast(aDocShell)->GetForcedCharset();
if (encoding) {
if (!IsAsciiCompatible(encoding)) {
return;
}
aEncoding = WrapNotNull(encoding);
aCharsetSource = nsDocShell::Cast(aDocShell)->GetForcedAutodetection()
? kCharsetFromPendingUserForcedAutoDetection
: kCharsetFromUserForced;
aDocShell->SetCharset(""_ns);
}
}
}
@ -229,7 +238,7 @@ void nsHTMLDocument::TryParentCharset(nsIDocShell* aDocShell,
if (!aDocShell) {
return;
}
if (aCharsetSource >= kCharsetFromXmlDeclarationUtf16) {
if (aCharsetSource >= kCharsetFromUserForced) {
return;
}
@ -241,7 +250,9 @@ void nsHTMLDocument::TryParentCharset(nsIDocShell* aDocShell,
if (!parentCharset) {
return;
}
if (kCharsetFromPendingUserForcedAutoDetection == parentSource ||
if (kCharsetFromUserForced == parentSource ||
kCharsetFromUserForcedJapaneseAutoDetection == parentSource ||
kCharsetFromPendingUserForcedAutoDetection == parentSource ||
kCharsetFromInitialUserForcedAutoDetection == parentSource ||
kCharsetFromFinalUserForcedAutoDetection == parentSource) {
if (WillIgnoreCharsetOverride() ||
@ -250,7 +261,11 @@ void nsHTMLDocument::TryParentCharset(nsIDocShell* aDocShell,
return;
}
aEncoding = WrapNotNull(parentCharset);
aCharsetSource = kCharsetFromPendingUserForcedAutoDetection;
aCharsetSource =
(kCharsetFromUserForced == parentSource ||
kCharsetFromUserForcedJapaneseAutoDetection == parentSource)
? kCharsetFromUserForced
: kCharsetFromPendingUserForcedAutoDetection;
return;
}
@ -678,9 +693,11 @@ bool nsHTMLDocument::WillIgnoreCharsetOverride() {
switch (mCharacterSetSource) {
case kCharsetUninitialized:
case kCharsetFromFallback:
case kCharsetFromTopLevelDomain:
case kCharsetFromDocTypeDefault:
case kCharsetFromInitialAutoDetectionWouldHaveBeenUTF8:
case kCharsetFromInitialAutoDetectionWouldNotHaveBeenUTF8DependedOnTLD:
case kCharsetFromFinalJapaneseAutoDetection:
case kCharsetFromFinalAutoDetectionWouldHaveBeenUTF8:
case kCharsetFromFinalAutoDetectionWouldNotHaveBeenUTF8DependedOnTLD:
case kCharsetFromParentFrame:
@ -688,6 +705,8 @@ bool nsHTMLDocument::WillIgnoreCharsetOverride() {
case kCharsetFromMetaPrescan:
case kCharsetFromMetaTag:
case kCharsetFromChannel:
case kCharsetFromUserForced:
case kCharsetFromUserForcedJapaneseAutoDetection:
return false;
}

Просмотреть файл

@ -176,6 +176,9 @@ class nsHTMLDocument : public mozilla::dom::Document {
void TryUserForcedCharset(nsIContentViewer* aCv, nsIDocShell* aDocShell,
int32_t& aCharsetSource,
NotNull<const Encoding*>& aEncoding);
static void TryCacheCharset(nsICachingChannel* aCachingChannel,
int32_t& aCharsetSource,
NotNull<const Encoding*>& aEncoding);
void TryParentCharset(nsIDocShell* aDocShell, int32_t& charsetSource,
NotNull<const Encoding*>& aEncoding);

Просмотреть файл

@ -67,6 +67,12 @@ interface nsIBrowser : nsISupports
*/
attribute boolean mayEnableCharacterEncodingMenu;
/**
* Whether or not the character encoding was detected by analyzing
* content (as opposed to reading a protocol label).
*/
attribute boolean charsetAutodetected;
/**
* Called by Gecko to update the browser when its state changes.
*
@ -96,6 +102,8 @@ interface nsIBrowser : nsISupports
* @param aCharset the character set of the document
* @param aMayEnableCharacterEncodingMenu whether or not the content encoding
* menu may be enabled
* @param aCharsetAutodetected whether or not the given character set was
* autodetected
* @param aDocumentURI the URI of the new document
* @param aTitle the title of the new doucment
* @param aContentPrincipal the security principal of the new document
@ -112,6 +120,7 @@ interface nsIBrowser : nsISupports
void updateForLocationChange(in nsIURI aLocation,
in AString aCharset,
in boolean aMayEnableCharacterEncodingMenu,
in boolean aCharsetAutodetected,
in nsIURI aDocumentURI,
in AString aTitle,
in nsIPrincipal aContentPrincipal,

Просмотреть файл

@ -3602,6 +3602,7 @@ NS_IMETHODIMP BrowserChild::OnStateChange(nsIWebProgress* aWebProgress,
stateChangeData->isNavigating() = docShell->GetIsNavigating();
stateChangeData->mayEnableCharacterEncodingMenu() =
docShell->GetMayEnableCharacterEncodingMenu();
stateChangeData->charsetAutodetected() = docShell->GetCharsetAutodetected();
RefPtr<Document> document = browsingContext->GetExtantDocument();
if (document && aStateFlags & nsIWebProgressListener::STATE_STOP) {
@ -3700,6 +3701,8 @@ NS_IMETHODIMP BrowserChild::OnLocationChange(nsIWebProgress* aWebProgress,
locationChangeData->mayEnableCharacterEncodingMenu() =
docShell->GetMayEnableCharacterEncodingMenu();
locationChangeData->charsetAutodetected() =
docShell->GetCharsetAutodetected();
locationChangeData->contentPrincipal() = document->NodePrincipal();
locationChangeData->contentPartitionedPrincipal() =

Просмотреть файл

@ -2713,6 +2713,8 @@ mozilla::ipc::IPCResult BrowserParent::RecvOnStateChange(
Unused << browser->SetIsNavigating(aStateChangeData->isNavigating());
Unused << browser->SetMayEnableCharacterEncodingMenu(
aStateChangeData->mayEnableCharacterEncodingMenu());
Unused << browser->SetCharsetAutodetected(
aStateChangeData->charsetAutodetected());
Unused << browser->UpdateForStateChange(aStateChangeData->charset(),
aStateChangeData->documentURI(),
aStateChangeData->contentType());
@ -2781,6 +2783,7 @@ mozilla::ipc::IPCResult BrowserParent::RecvOnLocationChange(
Unused << browser->UpdateForLocationChange(
aLocation, aLocationChangeData->charset(),
aLocationChangeData->mayEnableCharacterEncodingMenu(),
aLocationChangeData->charsetAutodetected(),
aLocationChangeData->documentURI(), aLocationChangeData->title(),
aLocationChangeData->contentPrincipal(),
aLocationChangeData->contentPartitionedPrincipal(),

Просмотреть файл

@ -130,6 +130,7 @@ struct WebProgressStateChangeData
{
bool isNavigating;
bool mayEnableCharacterEncodingMenu;
bool charsetAutodetected;
// The following fields are only set when the aStateFlags param passed with
// this struct is |nsIWebProgress.STATE_STOP|.
@ -143,6 +144,7 @@ struct WebProgressLocationChangeData
bool isNavigating;
bool isSyntheticDocument;
bool mayEnableCharacterEncodingMenu;
bool charsetAutodetected;
nsString contentType;
nsString title;
nsString charset;

124
intl/JapaneseDetector.h Normal file
Просмотреть файл

@ -0,0 +1,124 @@
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
// file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// Mostly copied and pasted from
// third_party/rust/shift_or_euc/src/lib.rs , so
// "top-level directory of this distribution" above refers to
// third_party/rust/shift_or_euc/
#ifndef mozilla_JapaneseDetector_h
#define mozilla_JapaneseDetector_h
#include "mozilla/Encoding.h"
namespace mozilla {
class JapaneseDetector;
}; // namespace mozilla
#define SHIFT_OR_EUC_DETECTOR mozilla::JapaneseDetector
#include "shift_or_euc.h"
namespace mozilla {
/**
* A Japanese legacy encoding detector for detecting between Shift_JIS,
* EUC-JP, and, optionally, ISO-2022-JP _given_ the assumption that the
* encoding is one of those.
*
* # Principle of Operation
*
* The detector is based on two observations:
*
* 1. The ISO-2022-JP escape sequences don't normally occur in Shift_JIS or
* EUC-JP, so encountering such an escape sequence (before non-ASCII has been
* encountered) can be taken as indication of ISO-2022-JP.
* 2. When normal (full-with) kana or common kanji encoded as Shift_JIS is
* decoded as EUC-JP, or vice versa, the result is either an error or
* half-width katakana, and it's very uncommon for Japanese HTML to have
* half-width katakana character before a normal kana or common kanji
* character. Therefore, if decoding as Shift_JIS results in error or
* have-width katakana, the detector decides that the content is EUC-JP, and
* vice versa.
*
* # Failure Modes
*
* The detector gives the wrong answer if the text has a half-width katakana
* character before normal kana or common kanji. Some uncommon kanji are
* undecidable. (All JIS X 0208 Level 1 kanji are decidable.)
*
* The half-width katakana issue is mainly relevant for old 8-bit JIS X
* 0201-only text files that would decode correctly as Shift_JIS but that the
* detector detects as EUC-JP.
*
* The undecidable kanji issue does not realistically show up when a full
* document is fed to the detector, because, realistically, in a full
* document, there is at least one kana or common kanji. It can occur,
* though, if the detector is only run on a prefix of a document and the
* prefix only contains the title of the document. It is possible for
* document title to consist entirely of undecidable kanji. (Indeed,
* Japanese Wikipedia has articles with such titles.) If the detector is
* undecided, a fallback to Shift_JIS should be used.
*/
class JapaneseDetector final {
public:
~JapaneseDetector() {}
static void operator delete(void* aDetector) {
shift_or_euc_detector_free(reinterpret_cast<JapaneseDetector*>(aDetector));
}
/**
* Instantiates the detector. If `aAllow2022` is `true` the possible
* guesses are Shift_JIS, EUC-JP, ISO-2022-JP, and undecided. If
* `aAllow2022` is `false`, the possible guesses are Shift_JIS, EUC-JP,
* and undecided.
*/
static inline UniquePtr<JapaneseDetector> Create(bool aAllow2022) {
UniquePtr<JapaneseDetector> detector(shift_or_euc_detector_new(aAllow2022));
return detector;
}
/**
* Feeds bytes to the detector. If `aLast` is `true` the end of the stream
* is considered to occur immediately after the end of `aBuffer`.
* Otherwise, the stream is expected to continue. `aBuffer` may be empty.
*
* If you're running the detector only on a prefix of a complete
* document, _do not_ pass `aLast` as `true` after the prefix if the
* stream as a whole still contains more content.
*
* Returns `SHIFT_JIS_ENCODING` if the detector guessed
* Shift_JIS. Returns `EUC_JP_ENCODING` if the detector
* guessed EUC-JP. Returns `ISO_2022_JP_ENCODING` if the
* detector guessed ISO-2022-JP (only possible if `true` was passed as
* `aAllow2022` when instantiating the detector). Returns `nullptr` if the
* detector is undecided. If `nullptr` is returned even when passing `true`
* as `aLast`, falling back to Shift_JIS is the best guess for Web
* purposes.
*
* Do not call again after the method has returned non-`nullptr` or after
* the method has been called with `true` as `aLast`. (Asserts if the
* previous sentence isn't adhered to.)
*/
inline const mozilla::Encoding* Feed(Span<const uint8_t> aBuffer,
bool aLast) {
return shift_or_euc_detector_feed(this, aBuffer.Elements(),
aBuffer.Length(), aLast);
}
private:
JapaneseDetector() = delete;
JapaneseDetector(const JapaneseDetector&) = delete;
JapaneseDetector& operator=(const JapaneseDetector&) = delete;
};
}; // namespace mozilla
#endif // mozilla_JapaneseDetector_h

Просмотреть файл

@ -27,12 +27,14 @@ DIRS += [
EXPORTS.mozilla += [
"Encoding.h",
"EncodingDetector.h",
"JapaneseDetector.h",
]
EXPORTS += [
"../third_party/rust/chardetng_c/include/chardetng.h",
"../third_party/rust/encoding_c/include/encoding_rs.h",
"../third_party/rust/encoding_c/include/encoding_rs_statics.h",
"../third_party/rust/shift_or_euc_c/include/shift_or_euc.h",
]

Просмотреть файл

@ -177,6 +177,10 @@ exclude-multi-locale = [
reference = "toolkit/locales/en-US/toolkit/about/*Compat.ftl"
l10n = "{l}toolkit/toolkit/about/*Compat.ftl"
[[paths]]
reference = "toolkit/locales/en-US/chrome/global/charsetMenu.properties"
l10n = "{l}toolkit/chrome/global/charsetMenu.properties"
[[paths]]
reference = "toolkit/locales/en-US/chrome/global/commonDialogs.properties"
l10n = "{l}toolkit/chrome/global/commonDialogs.properties"

Просмотреть файл

@ -5581,6 +5581,24 @@
# Prefs starting with "intl."
#---------------------------------------------------------------------------
# Whether the new encoding detector is enabled for the .jp TLD.
- name: intl.charset.detector.ng.jp.enabled
type: bool
value: true
mirror: always
# Whether the new encoding detector is enabled for the .in TLD.
- name: intl.charset.detector.ng.in.enabled
type: bool
value: true
mirror: always
# Whether the new encoding detector is enabled for the .lk TLD.
- name: intl.charset.detector.ng.lk.enabled
type: bool
value: true
mirror: always
# If true, dispatch the keydown and keyup events on any web apps even during
# composition.
- name: intl.ime.hack.on_any_apps.fire_key_events_for_composition

Просмотреть файл

@ -20,6 +20,7 @@
#include "mozilla/DebugOnly.h"
#include "mozilla/Encoding.h"
#include "mozilla/EncodingDetector.h"
#include "mozilla/JapaneseDetector.h"
#include "mozilla/Likely.h"
#include "mozilla/Maybe.h"
#include "mozilla/SchedulerGroup.h"
@ -306,6 +307,9 @@ int32_t nsHtml5StreamParser::MaybeRollBackSource(int32_t aSource) {
}
void nsHtml5StreamParser::GuessEncoding(bool aEof, bool aInitial) {
if (mJapaneseDetector) {
return;
}
if (aInitial) {
if (!mDetectorHasSeenNonAscii) {
mDetectorHadOnlySeenAsciiWhenFirstGuessing = true;
@ -316,6 +320,7 @@ void nsHtml5StreamParser::GuessEncoding(bool aEof, bool aInitial) {
bool forced = (mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
mCharsetSource == kCharsetFromInitialUserForcedAutoDetection);
MOZ_ASSERT(
mCharsetSource != kCharsetFromFinalJapaneseAutoDetection &&
mCharsetSource != kCharsetFromFinalUserForcedAutoDetection &&
mCharsetSource != kCharsetFromFinalAutoDetectionWouldHaveBeenUTF8 &&
mCharsetSource !=
@ -380,7 +385,8 @@ void nsHtml5StreamParser::GuessEncoding(bool aEof, bool aInitial) {
mCharsetSource = MaybeRollBackSource(source);
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
} else {
MOZ_ASSERT(mCharsetSource < kCharsetFromXmlDeclarationUtf16 || forced);
MOZ_ASSERT(mCharsetSource < kCharsetFromFinalJapaneseAutoDetection ||
forced);
// We've already committed to a decoder. Request a reload from the
// docshell.
mTreeBuilder->NeedsCharsetSwitchTo(encoding, source, 0);
@ -401,9 +407,44 @@ void nsHtml5StreamParser::GuessEncoding(bool aEof, bool aInitial) {
}
}
void nsHtml5StreamParser::FeedJapaneseDetector(Span<const uint8_t> aBuffer,
bool aLast) {
MOZ_ASSERT(!mDecodingLocalFileWithoutTokenizing);
const Encoding* detected = mJapaneseDetector->Feed(aBuffer, aLast);
if (!detected) {
return;
}
DontGuessEncoding();
int32_t source = kCharsetFromFinalJapaneseAutoDetection;
if (mCharsetSource == kCharsetFromUserForced) {
source = kCharsetFromUserForcedJapaneseAutoDetection;
}
if (detected == mEncoding) {
MOZ_ASSERT(mCharsetSource < source, "Why are we running chardet at all?");
mCharsetSource = source;
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
} else if (HasDecoder()) {
// We've already committed to a decoder. Request a reload from the
// docshell.
mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(detected), source, 0);
FlushTreeOpsAndDisarmTimer();
Interrupt();
} else {
// Got a confident answer from the sniffing buffer. That code will
// take care of setting up the decoder.
mEncoding = WrapNotNull(detected);
mCharsetSource = source;
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
}
}
void nsHtml5StreamParser::FeedDetector(Span<const uint8_t> aBuffer,
bool aLast) {
mDetectorHasSeenNonAscii = mDetector->Feed(aBuffer, aLast);
if (mJapaneseDetector) {
FeedJapaneseDetector(aBuffer, aLast);
} else {
mDetectorHasSeenNonAscii = mDetector->Feed(aBuffer, aLast);
}
}
void nsHtml5StreamParser::SetViewSourceTitle(nsIURI* aURL) {
@ -450,12 +491,13 @@ nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
NS_ASSERTION(IsParserThread(), "Wrong thread!");
nsresult rv = NS_OK;
if (mDecodingLocalFileWithoutTokenizing &&
mCharsetSource <= kCharsetFromFallback) {
mCharsetSource <= kCharsetFromTopLevelDomain) {
MOZ_ASSERT(mEncoding != UTF_8_ENCODING);
mUnicodeDecoder = UTF_8_ENCODING->NewDecoderWithBOMRemoval();
} else {
if (mCharsetSource >= kCharsetFromFinalAutoDetectionWouldHaveBeenUTF8) {
if (!(mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
if (mCharsetSource >= kCharsetFromFinalJapaneseAutoDetection) {
if (!(mCharsetSource == kCharsetFromUserForced ||
mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
mCharsetSource == kCharsetFromInitialUserForcedAutoDetection)) {
DontGuessEncoding();
}
@ -639,7 +681,7 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
uint32_t aCountToSniffingLimit,
bool aEof) {
MOZ_ASSERT(IsParserThread(), "Wrong thread!");
MOZ_ASSERT(mCharsetSource < kCharsetFromXmlDeclarationUtf16,
MOZ_ASSERT(mCharsetSource < kCharsetFromUserForcedJapaneseAutoDetection,
"Should not finalize sniffing with strong decision already made.");
if (mMode == VIEW_SOURCE_XML) {
static const XML_Memory_Handling_Suite memsuite = {
@ -702,9 +744,12 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
}
bool forced = (mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
mCharsetSource == kCharsetFromInitialUserForcedAutoDetection ||
mCharsetSource == kCharsetFromFinalUserForcedAutoDetection);
bool forced =
(mCharsetSource == kCharsetFromUserForced ||
mCharsetSource == kCharsetFromUserForcedJapaneseAutoDetection ||
mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
mCharsetSource == kCharsetFromInitialUserForcedAutoDetection ||
mCharsetSource == kCharsetFromFinalUserForcedAutoDetection);
if (!mChannelHadCharset &&
(forced || mCharsetSource < kCharsetFromMetaPrescan) &&
(mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA)) {
@ -731,7 +776,17 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
if (forced &&
(encoding->IsAsciiCompatible() || encoding == ISO_2022_JP_ENCODING)) {
// Honor override
if (mCharsetSource == kCharsetFromFinalUserForcedAutoDetection) {
if (mCharsetSource == kCharsetFromUserForced &&
mEncoding->IsJapaneseLegacy()) {
mFeedChardet = true;
if (!mJapaneseDetector) {
mJapaneseDetector = mozilla::JapaneseDetector::Create(true);
}
FinalizeSniffingWithDetector(aFromSegment, aCountToSniffingLimit,
false);
} else if (mCharsetSource ==
kCharsetFromUserForcedJapaneseAutoDetection ||
mCharsetSource == kCharsetFromFinalUserForcedAutoDetection) {
DontGuessEncoding();
} else {
FinalizeSniffingWithDetector(aFromSegment, aCountToSniffingLimit,
@ -753,7 +808,15 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
}
if (forced && mCharsetSource != kCharsetFromIrreversibleAutoDetection) {
// neither meta nor XML declaration found, honor override
if (mCharsetSource == kCharsetFromFinalUserForcedAutoDetection) {
if (mCharsetSource == kCharsetFromUserForced &&
mEncoding->IsJapaneseLegacy()) {
mFeedChardet = true;
if (!mJapaneseDetector) {
mJapaneseDetector = mozilla::JapaneseDetector::Create(true);
}
FinalizeSniffingWithDetector(aFromSegment, aCountToSniffingLimit, false);
} else if (mCharsetSource == kCharsetFromUserForcedJapaneseAutoDetection ||
mCharsetSource == kCharsetFromFinalUserForcedAutoDetection) {
DontGuessEncoding();
} else {
FinalizeSniffingWithDetector(aFromSegment, aCountToSniffingLimit, false);
@ -958,7 +1021,9 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
// this is the last buffer
uint32_t countToSniffingLimit = SNIFFING_BUFFER_SIZE - mSniffingLength;
bool forced =
(mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
(mCharsetSource == kCharsetFromUserForced ||
mCharsetSource == kCharsetFromUserForcedJapaneseAutoDetection ||
mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
mCharsetSource == kCharsetFromInitialUserForcedAutoDetection ||
mCharsetSource == kCharsetFromFinalUserForcedAutoDetection);
if (!mChannelHadCharset && (mMode == NORMAL || mMode == VIEW_SOURCE_HTML ||
@ -980,7 +1045,18 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
if (forced && (encoding->IsAsciiCompatible() ||
encoding == ISO_2022_JP_ENCODING)) {
// Honor override
if (mCharsetSource == kCharsetFromFinalUserForcedAutoDetection) {
if (mCharsetSource == kCharsetFromUserForced &&
mEncoding->IsJapaneseLegacy()) {
mFeedChardet = true;
if (!mJapaneseDetector) {
mJapaneseDetector = mozilla::JapaneseDetector::Create(true);
}
FinalizeSniffingWithDetector(aFromSegment, countToSniffingLimit,
false);
} else if (mCharsetSource ==
kCharsetFromUserForcedJapaneseAutoDetection ||
mCharsetSource ==
kCharsetFromFinalUserForcedAutoDetection) {
DontGuessEncoding();
} else {
FinalizeSniffingWithDetector(aFromSegment, countToSniffingLimit,
@ -1015,7 +1091,9 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
}
if (encoding) {
// meta scan successful; honor overrides unless meta is XSS-dangerous
if ((mCharsetSource == kCharsetFromFinalUserForcedAutoDetection) &&
if ((mCharsetSource == kCharsetFromUserForced ||
mCharsetSource == kCharsetFromUserForcedJapaneseAutoDetection ||
mCharsetSource == kCharsetFromFinalUserForcedAutoDetection) &&
(encoding->IsAsciiCompatible() || encoding == ISO_2022_JP_ENCODING)) {
// Honor override
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
@ -1215,7 +1293,7 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
// let's instantiate only if we make it out of this method with the
// intent to use it.
auto detectorCreator = MakeScopeExit([&] {
if (mFeedChardet) {
if (mFeedChardet && !mJapaneseDetector) {
mDetector = mozilla::EncodingDetector::Create();
}
});
@ -1243,7 +1321,7 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
nsresult rv = GetChannel(getter_AddRefs(channel));
if (NS_SUCCEEDED(rv)) {
isSrcdoc = NS_IsSrcdocChannel(channel);
if (!isSrcdoc && mCharsetSource <= kCharsetFromFallback) {
if (!isSrcdoc && mCharsetSource <= kCharsetFromTopLevelDomain) {
nsCOMPtr<nsIURI> originalURI;
rv = channel->GetOriginalURI(getter_AddRefs(originalURI));
if (NS_SUCCEEDED(rv)) {
@ -1379,8 +1457,44 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
if (!(mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
mCharsetSource == kCharsetFromInitialUserForcedAutoDetection ||
mCharsetSource == kCharsetFromFinalUserForcedAutoDetection)) {
if (mCharsetSource >= kCharsetFromFinalAutoDetectionWouldHaveBeenUTF8) {
DontGuessEncoding();
if (mCharsetSource >= kCharsetFromFinalJapaneseAutoDetection) {
if ((mCharsetSource == kCharsetFromUserForced) &&
mEncoding->IsJapaneseLegacy()) {
// Japanese detector only
if (!mJapaneseDetector) {
mJapaneseDetector = mozilla::JapaneseDetector::Create(true);
}
mGuessEncoding = false;
} else {
DontGuessEncoding();
}
}
// Compute various pref-based special cases
if (!mDecodingLocalFileWithoutTokenizing && mFeedChardet) {
if (mTLD.EqualsLiteral("jp")) {
if (!mJapaneseDetector &&
!StaticPrefs::intl_charset_detector_ng_jp_enabled()) {
mJapaneseDetector = mozilla::JapaneseDetector::Create(true);
}
if (mJapaneseDetector && mEncoding == WINDOWS_1252_ENCODING &&
mCharsetSource <= kCharsetFromTopLevelDomain) {
mCharsetSource = kCharsetFromTopLevelDomain;
mEncoding = SHIFT_JIS_ENCODING;
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
}
} else if ((mTLD.EqualsLiteral("in") &&
!StaticPrefs::intl_charset_detector_ng_in_enabled()) ||
(mTLD.EqualsLiteral("lk") &&
!StaticPrefs::intl_charset_detector_ng_lk_enabled())) {
if (mEncoding == WINDOWS_1252_ENCODING &&
mCharsetSource <= kCharsetFromTopLevelDomain) {
// Avoid breaking font hacks that Chrome doesn't break.
mCharsetSource = kCharsetFromTopLevelDomain;
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
}
DontGuessEncoding();
}
}
}

Просмотреть файл

@ -45,6 +45,7 @@ class nsIURI;
namespace mozilla {
class EncodingDetector;
class JapaneseDetector;
template <typename T>
class Buffer;
@ -218,6 +219,11 @@ class nsHtml5StreamParser final : public nsISupports {
// Not from an external interface
/**
* Pass a buffer to the JapaneseDetector.
*/
void FeedJapaneseDetector(mozilla::Span<const uint8_t> aBuffer, bool aLast);
/**
* Pass a buffer to the Japanese or Cyrillic detector as appropriate.
*/
@ -652,6 +658,11 @@ class nsHtml5StreamParser final : public nsISupports {
nsCOMPtr<nsIRunnable> mLoadFlusher;
/**
* The Japanese detector.
*/
mozilla::UniquePtr<mozilla::JapaneseDetector> mJapaneseDetector;
/**
* The generict detector.
*/

Просмотреть файл

@ -9,6 +9,7 @@
enum {
kCharsetUninitialized,
kCharsetFromFallback,
kCharsetFromTopLevelDomain,
kCharsetFromDocTypeDefault, // This and up confident for XHR
// Start subdividing source for telementry purposes
kCharsetFromInitialAutoDetectionASCII,
@ -16,6 +17,7 @@ enum {
kCharsetFromInitialAutoDetectionWouldNotHaveBeenUTF8Generic,
kCharsetFromInitialAutoDetectionWouldNotHaveBeenUTF8Content,
kCharsetFromInitialAutoDetectionWouldNotHaveBeenUTF8DependedOnTLD,
kCharsetFromFinalJapaneseAutoDetection,
// Deliberately no Final version of ASCII
kCharsetFromFinalAutoDetectionWouldHaveBeenUTF8,
kCharsetFromFinalAutoDetectionWouldNotHaveBeenUTF8Generic,
@ -35,6 +37,8 @@ enum {
// later
kCharsetFromInitialUserForcedAutoDetection,
kCharsetFromFinalUserForcedAutoDetection,
kCharsetFromUserForced, // propagates to child frames
kCharsetFromUserForcedJapaneseAutoDetection,
kCharsetFromXmlDeclarationUtf16, // This one is overridden by
// kCharsetFromChannel
kCharsetFromIrreversibleAutoDetection, // This one is overridden by

1
third_party/rust/shift_or_euc/.cargo-checksum.json поставляемый Normal file
Просмотреть файл

@ -0,0 +1 @@
{"files":{"CONTRIBUTING.md":"0e64fb3dd5a00e3fd528de6442de3f2ca851bd718c45cca0871aaf4eedac9ee1","COPYRIGHT":"3a7313aa2f19bf7095a2fd731c3d5e76f38d5e4640bd2a115d53032f24b2aa6c","Cargo.toml":"f9f41b76ecbe257a312ab09ed1208189b8dc9952d12d17a216fe2846d1d471c8","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"dac4dde23582d18b01701032860d8f8a1979fb2cf626060ca8de77e081a2a3d5","README.md":"b7148745a7ef59788e76fbe638d4b41c54dcaa1313a809f4630a020645f892a8","examples/detect.rs":"eb7239ccc802290ef24331db600ca1226198801dd86df86876b4b738ef4b8470","src/lib.rs":"f2a83db125d553af5c6fabae0487ef211aad62f2d93c4418dc510cbd425d472a"},"package":"f930dea4685b9803954b9d74cdc175c6d946a22f2eafe5aa2e9a58cdcae7da8c"}

38
third_party/rust/shift_or_euc/CONTRIBUTING.md поставляемый Normal file
Просмотреть файл

@ -0,0 +1,38 @@
If you send a pull request / patch, please observe the following.
## Licensing
Since this crate is dual-licensed,
[section 5 of the Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0#contributions)
is considered to apply in the sense of Contributions being automatically
under the Apache License 2.0 or MIT dual license (see the `COPYRIGHT` file).
That is, by the act of offering a Contribution, you place your Contribution
under the Apache License 2.0 or MIT dual license stated in the `COPYRIGHT`
file. Please do not contribute if you aren't willing or allowed to license your
contributions in this manner.
You are encouraged to dedicate test code that you contribute to the Public
Domain using the CC0 dedication. If you contribute test code that is not
dedicated to the Public Domain, please be sure not to put it in a part of
source code that the comments designate as being dedicated to the Public
Domain.
## Copyright Notices
If you require the addition of your copyright notice, it's up to you to edit in
your notice as part of your Contribution. Not adding a copyright notice is
taken as a waiver of copyright notice.
## Compatibility with Stable Rust
Please ensure that your Contribution compiles with the latest stable-channel
rustc.
## rustfmt
The `rustfmt` version used for this code is `rustfmt-nightly`. Please either
use that version or avoid using `rustfmt` (so as not to reformat all the code).
## Unit tests
Please ensure that `cargo test` succeeds.

9
third_party/rust/shift_or_euc/COPYRIGHT поставляемый Normal file
Просмотреть файл

@ -0,0 +1,9 @@
shift_or_euc is copyright 2018 Mozilla Foundation.
Licensed under the Apache License, Version 2.0
<LICENSE-APACHE or
https://www.apache.org/licenses/LICENSE-2.0> or the MIT
license <LICENSE-MIT or https://opensource.org/licenses/MIT>,
at your option. All files in the project carrying such
notice may not be copied, modified, or distributed except
according to those terms.

30
third_party/rust/shift_or_euc/Cargo.toml поставляемый Normal file
Просмотреть файл

@ -0,0 +1,30 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
edition = "2018"
name = "shift_or_euc"
version = "0.1.0"
authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
description = "Detects among the Japanese legacy encodings"
homepage = "https://docs.rs/shift_or_euc/"
documentation = "https://docs.rs/shift_or_euc/"
readme = "README.md"
keywords = ["encoding", "web", "charset"]
categories = ["text-processing", "encoding", "web-programming", "internationalization"]
license = "MIT/Apache-2.0"
repository = "https://github.com/hsivonen/shift_or_euc"
[dependencies.encoding_rs]
version = "0.8.17"
[dependencies.memchr]
version = "2.2.0"

202
third_party/rust/shift_or_euc/LICENSE-APACHE поставляемый Normal file
Просмотреть файл

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

25
third_party/rust/shift_or_euc/LICENSE-MIT поставляемый Normal file
Просмотреть файл

@ -0,0 +1,25 @@
Copyright (c) 2018 Mozilla Foundation
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

73
third_party/rust/shift_or_euc/README.md поставляемый Normal file
Просмотреть файл

@ -0,0 +1,73 @@
# shift_or_euc
[![Apache 2 / MIT dual-licensed](https://img.shields.io/badge/license-Apache%202%20%2F%20MIT-blue.svg)](https://github.com/hsivonen/shift_or_euc/blob/master/COPYRIGHT)
A Japanese legacy encoding detector for detecting between Shift_JIS, EUC-JP,
and, optionally, ISO-2022-JP _given_ the assumption that the encoding is one
of those.
This detector is generally more accurate (but see below about the failure
mode on half-width katakana) and decides much sooner than machine
learning-based detectors. To decide EUC-JP, machine learning-based detectors
try to gain confidence that the input looks like EUC-JP. To decide EUC-JP,
this detector instead looks for two simple rule-based signs of the input not
being Shift_JIS.
As a consequence of not containing machine learning tables, the binary size
footprint that this crate adds on top of
[`encoding_rs`](https://docs.rs/crate/encoding_rs) is tiny.
## Documentation
[API documentation on docs.rs](https://docs.rs/crate/shift_or_euc)
## Licensing
See the file named [COPYRIGHT](https://github.com/hsivonen/shift_or_euc/blob/master/COPYRIGHT).
## Sample Program Usage
1. [Install Rust](https://rustup.rs/)
2. `git clone https://github.com/hsivonen/shift_or_euc`
3. `cd shift_or_euc`
4. `cargo run --example detect PATH_TO_FILE`
The program prints one of:
* Shift_JIS
* EUC-JP
* ISO-2022-JP
* Undecided
## Principle of Operation
The detector is based on two observations:
1. The ISO-2022-JP escape sequences don't normally occur in Shift_JIS or
EUC-JP, so encountering such an escape sequence (before non-ASCII has been
encountered) can be taken as indication of ISO-2022-JP.
2. When normal (full-with) kana or common kanji encoded as Shift_JIS is
decoded as EUC-JP, or vice versa, the result is either an error or half-width
katakana, and it's very uncommon for Japanese HTML to have half-width katakana
character before a normal kana or common kanji character. Therefore, if
decoding as Shift_JIS results in error or have-width katakana, the detector
decides that the content is EUC-JP, and vice versa.
## Failure Modes
The detector gives the wrong answer if the text has a half-width katakana
character before normal kana or common kanji. Some uncommon kanji are
undecidable. (All JIS X 0208 Level 1 kanji are decidable.)
The half-width katakana issue is mainly relevant for old 8-bit JIS X 0201-only
text files that would decode correctly as Shift_JIS but that the detector
detects as EUC-JP.
The undecidable kanji issue does not realistically show up when a full
document is fed to the detector, because, realistically, in a full document,
there is at least one kana or common kanji. It can occur, though, if the
detector is only run on a prefix of a document and the prefix only contains
the title of the document. It is possible for document title to consist
entirely of undecidable kanji. (Indeed, Japanese Wikipedia has articles with
such titles.) If the detector is undecided, falling back to Shift_JIS is
typically the Web oriented better guess.

56
third_party/rust/shift_or_euc/examples/detect.rs поставляемый Normal file
Просмотреть файл

@ -0,0 +1,56 @@
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
// file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::fs::File;
use std::io::Read;
use shift_or_euc::Detector;
fn main() {
let mut args = std::env::args_os();
if args.next().is_none() {
eprintln!("Error: Program name missing from arguments.");
std::process::exit(-1);
}
if let Some(path) = args.next() {
if args.next().is_some() {
eprintln!("Error: Too many arguments.");
std::process::exit(-3);
}
if let Ok(mut file) = File::open(path) {
let mut buffer = [0u8; 4096];
let mut detector = Detector::new(true);
loop {
if let Ok(num_read) = file.read(&mut buffer[..]) {
let opt_enc = if num_read == 0 {
detector.feed(b"", true)
} else {
detector.feed(&buffer[..num_read], false)
};
if let Some(encoding) = opt_enc {
println!("{}", encoding.name());
return;
} else if num_read == 0 {
println!("Undecided");
return;
}
} else {
eprintln!("Error: Error reading file.");
std::process::exit(-5);
}
}
} else {
eprintln!("Error: Could not open file.");
std::process::exit(-4);
}
} else {
eprintln!("Error: One path argument needed.");
std::process::exit(-2);
}
}

278
third_party/rust/shift_or_euc/src/lib.rs поставляемый Normal file
Просмотреть файл

@ -0,0 +1,278 @@
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
// file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![doc(html_root_url = "https://docs.rs/shift_or_euc/0.1.0")]
//! A Japanese legacy encoding detector for detecting between Shift_JIS,
//! EUC-JP, and, optionally, ISO-2022-JP _given_ the assumption that the
//! encoding is one of those.
//!
//! This detector is generally more accurate (but see below about the failure
//! mode on half-width katakana) and decides much sooner than machine
//! learning-based detectors. To decide EUC-JP, machine learning-based
//! detectors try to gain confidence that the input looks like EUC-JP. To
//! decide EUC-JP, this detector instead looks for two simple rule-based
//! signs of the input not being Shift_JIS.
//!
//! As a consequence of not containing machine learning tables, the binary
//! size footprint that this crate adds on top of
//! [`encoding_rs`](https://docs.rs/crate/encoding_rs) is tiny.
//!
//! # Licensing
//!
//! See the file named [COPYRIGHT](https://github.com/hsivonen/shift_or_euc/blob/master/COPYRIGHT).
//!
//! # Principle of Operation
//!
//! The detector is based on two observations:
//!
//! 1. The ISO-2022-JP escape sequences don't normally occur in Shift_JIS or
//! EUC-JP, so encountering such an escape sequence (before non-ASCII has been
//! encountered) can be taken as indication of ISO-2022-JP.
//! 2. When normal (full-with) kana or common kanji encoded as Shift_JIS is
//! decoded as EUC-JP, or vice versa, the result is either an error or
//! half-width katakana, and it's very uncommon for Japanese HTML to have
//! half-width katakana character before a normal kana or common kanji
//! character. Therefore, if decoding as Shift_JIS results in error or
//! have-width katakana, the detector decides that the content is EUC-JP, and
//! vice versa.
//!
//! # Failure Modes
//!
//! The detector gives the wrong answer if the text has a half-width katakana
//! character before normal kana or common kanji. Some uncommon kanji are
//! undecidable. (All JIS X 0208 Level 1 kanji are decidable.)
//!
//! The half-width katakana issue is mainly relevant for old 8-bit JIS X
//! 0201-only text files that would decode correctly as Shift_JIS but that the
//! detector detects as EUC-JP.
//!
//! The undecidable kanji issue does not realistically show up when a full
//! document is fed to the detector, because, realistically, in a full
//! document, there is at least one kana or common kanji. It can occur,
//! though, if the detector is only run on a prefix of a document and the
//! prefix only contains the title of the document. It is possible for
//! document title to consist entirely of undecidable kanji. (Indeed,
//! Japanese Wikipedia has articles with such titles.) If the detector is
//! undecided, falling back to Shift_JIS is typically the Web oriented better
//! guess.
use encoding_rs::Decoder;
use encoding_rs::DecoderResult;
use encoding_rs::Encoding;
use encoding_rs::EUC_JP;
use encoding_rs::ISO_2022_JP;
use encoding_rs::SHIFT_JIS;
/// Returns the index of the first non-ASCII byte or the first
/// 0x1B, whichever comes first, or the length of the buffer
/// if neither is found.
fn find_non_ascii_or_escape(buffer: &[u8]) -> usize {
let ascii_up_to = Encoding::ascii_valid_up_to(buffer);
if let Some(escape) = memchr::memchr(0x1B, &buffer[..ascii_up_to]) {
escape
} else {
ascii_up_to
}
}
/// Feed decoder with one byte (if `last` is `false`) or EOF (if `last` is
/// `true`). `byte` is ignored if `last` is `true`.
/// Returns `true` if there was no rejection or `false` upon rejecting the
/// encoding hypothesis represented by this decoder.
#[inline(always)]
fn feed_decoder(decoder: &mut Decoder, byte: u8, last: bool) -> bool {
let mut output = [0u16; 1];
let input = [byte];
let (result, _read, written) = decoder.decode_to_utf16_without_replacement(
if last { b"" } else { &input },
&mut output,
last,
);
match result {
DecoderResult::InputEmpty => {
if written == 1 {
match output[0] {
0xFF61...0xFF9F => {
return false;
}
_ => {}
}
}
}
DecoderResult::Malformed(_, _) => {
return false;
}
DecoderResult::OutputFull => {
unreachable!();
}
}
true
}
/// A detector for detecting the character encoding of input on the
/// precondition that the encoding is a Japanese legacy encoding.
pub struct Detector {
shift_jis_decoder: Decoder,
euc_jp_decoder: Decoder,
second_byte_in_escape: u8,
iso_2022_jp_disqualified: bool,
escape_seen: bool,
finished: bool,
}
impl Detector {
/// Instantiates the detector. If `allow_2022` is `true` the possible
/// guesses are Shift_JIS, EUC-JP, ISO-2022-JP, and undecided. If
/// `allow_2022` is `false`, the possible guesses are Shift_JIS, EUC-JP,
/// and undecided.
pub fn new(allow_2022: bool) -> Self {
Detector {
shift_jis_decoder: SHIFT_JIS.new_decoder_without_bom_handling(),
euc_jp_decoder: EUC_JP.new_decoder_without_bom_handling(),
second_byte_in_escape: 0,
iso_2022_jp_disqualified: !allow_2022,
escape_seen: false,
finished: false,
}
}
/// Feeds bytes to the detector. If `last` is `true` the end of the stream
/// is considered to occur immediately after the end of `buffer`.
/// Otherwise, the stream is expected to continue. `buffer` may be empty.
///
/// If you're running the detector only on a prefix of a complete
/// document, _do not_ pass `last` as `true` after the prefix if the
/// stream as a whole still contains more content.
///
/// Returns `Some(encoding_rs::SHIFT_JIS)` if the detector guessed
/// Shift_JIS. Returns `Some(encoding_rs::EUC_JP)` if the detector
/// guessed EUC-JP. Returns `Some(encoding_rs::ISO_2022_JP)` if the
/// detector guessed ISO-2022-JP (only possible if `true` was passed as
/// `allow_2022` when instantiating the detector). Returns `None` if the
/// detector is undecided. If `None` is returned even when passing `true`
/// as `last`, falling back to Shift_JIS is the best guess for Web
/// purposes.
///
/// Do not call again after the method has returned `Some(_)` or after
/// the method has been called with `true` as `last`.
///
/// # Panics
///
/// If called after the method has returned `Some(_)` or after the method
/// has been called with `true` as `last`.
pub fn feed(&mut self, buffer: &[u8], last: bool) -> Option<&'static Encoding> {
assert!(
!self.finished,
"Tried to used a detector that has finished."
);
self.finished = true; // Will change back to false unless we return early
let mut i = 0;
if !self.iso_2022_jp_disqualified {
if !self.escape_seen {
i = find_non_ascii_or_escape(buffer);
}
while i < buffer.len() {
let byte = buffer[i];
if byte > 0x7F {
self.iso_2022_jp_disqualified = true;
break;
}
if !self.escape_seen && byte == 0x1B {
self.escape_seen = true;
i += 1;
continue;
}
if self.escape_seen && self.second_byte_in_escape == 0 {
self.second_byte_in_escape = byte;
i += 1;
continue;
}
match (self.second_byte_in_escape, byte) {
(0x28, 0x42) | (0x28, 0x4A) | (0x28, 0x49) | (0x24, 0x40) | (0x24, 0x42) => {
return Some(ISO_2022_JP);
}
_ => {}
}
if self.escape_seen {
self.iso_2022_jp_disqualified = true;
break;
}
i += 1;
}
}
for &byte in &buffer[i..] {
if !feed_decoder(&mut self.euc_jp_decoder, byte, false) {
return Some(SHIFT_JIS);
}
if !feed_decoder(&mut self.shift_jis_decoder, byte, false) {
return Some(EUC_JP);
}
}
if last {
if !feed_decoder(&mut self.euc_jp_decoder, 0, true) {
return Some(SHIFT_JIS);
}
if !feed_decoder(&mut self.shift_jis_decoder, 0, true) {
return Some(EUC_JP);
}
return None;
}
self.finished = false;
None
}
}
// Any copyright to the test code below this comment is dedicated to the
// Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_iso_2022_jp() {
let mut detector = Detector::new(true);
assert_eq!(
detector.feed(b"abc\x1B\x28\x42\xFF", true),
Some(ISO_2022_JP)
);
}
#[test]
fn test_error_precedence() {
let mut detector = Detector::new(true);
assert_eq!(detector.feed(b"abc\xFF", true), Some(SHIFT_JIS));
}
#[test]
fn test_invalid_euc_jp() {
let mut detector = Detector::new(true);
assert_eq!(detector.feed(b"abc\x81\x40", true), Some(SHIFT_JIS));
}
#[test]
fn test_invalid_shift_jis() {
let mut detector = Detector::new(true);
assert_eq!(detector.feed(b"abc\xEB\xA8", true), Some(EUC_JP));
}
#[test]
fn test_invalid_shift_jis_before_invalid_euc_jp() {
let mut detector = Detector::new(true);
assert_eq!(detector.feed(b"abc\xEB\xA8\x81\x40", true), Some(EUC_JP));
}
#[test]
fn test_undecided() {
let mut detector = Detector::new(true);
assert_eq!(detector.feed(b"abc", false), None);
assert_eq!(detector.feed(b"abc", false), None);
}
}

1
third_party/rust/shift_or_euc_c/.cargo-checksum.json поставляемый Normal file
Просмотреть файл

@ -0,0 +1 @@
{"files":{"CONTRIBUTING.md":"0e64fb3dd5a00e3fd528de6442de3f2ca851bd718c45cca0871aaf4eedac9ee1","COPYRIGHT":"3a7313aa2f19bf7095a2fd731c3d5e76f38d5e4640bd2a115d53032f24b2aa6c","Cargo.toml":"342e5345f4fb433b89f397b07e4e7162376b30cbbc1d6f6ccb11523116e6ed6b","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"dac4dde23582d18b01701032860d8f8a1979fb2cf626060ca8de77e081a2a3d5","README.md":"a323f1f4537bc7b3f9b3b216c8ac5041b83aa0321f5349a52627aade947c6272","include/shift_or_euc.h":"47c3b9832cb7eb8995aa37dcc2e76be7d4f5c7b3fa6b43135e579831ab449cd8","src/lib.rs":"cab1898dd6724e0a0324a1e44f6348c107f13916da8873dba69c70dbc95ba9cd"},"package":"c81ec08c8a68c45c48d8ef58b80ce038cc9945891c4a4996761e2ec5cba05abc"}

38
third_party/rust/shift_or_euc_c/CONTRIBUTING.md поставляемый Normal file
Просмотреть файл

@ -0,0 +1,38 @@
If you send a pull request / patch, please observe the following.
## Licensing
Since this crate is dual-licensed,
[section 5 of the Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0#contributions)
is considered to apply in the sense of Contributions being automatically
under the Apache License 2.0 or MIT dual license (see the `COPYRIGHT` file).
That is, by the act of offering a Contribution, you place your Contribution
under the Apache License 2.0 or MIT dual license stated in the `COPYRIGHT`
file. Please do not contribute if you aren't willing or allowed to license your
contributions in this manner.
You are encouraged to dedicate test code that you contribute to the Public
Domain using the CC0 dedication. If you contribute test code that is not
dedicated to the Public Domain, please be sure not to put it in a part of
source code that the comments designate as being dedicated to the Public
Domain.
## Copyright Notices
If you require the addition of your copyright notice, it's up to you to edit in
your notice as part of your Contribution. Not adding a copyright notice is
taken as a waiver of copyright notice.
## Compatibility with Stable Rust
Please ensure that your Contribution compiles with the latest stable-channel
rustc.
## rustfmt
The `rustfmt` version used for this code is `rustfmt-nightly`. Please either
use that version or avoid using `rustfmt` (so as not to reformat all the code).
## Unit tests
Please ensure that `cargo test` succeeds.

9
third_party/rust/shift_or_euc_c/COPYRIGHT поставляемый Normal file
Просмотреть файл

@ -0,0 +1,9 @@
shift_or_euc is copyright 2018 Mozilla Foundation.
Licensed under the Apache License, Version 2.0
<LICENSE-APACHE or
https://www.apache.org/licenses/LICENSE-2.0> or the MIT
license <LICENSE-MIT or https://opensource.org/licenses/MIT>,
at your option. All files in the project carrying such
notice may not be copied, modified, or distributed except
according to those terms.

30
third_party/rust/shift_or_euc_c/Cargo.toml поставляемый Normal file
Просмотреть файл

@ -0,0 +1,30 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
edition = "2018"
name = "shift_or_euc_c"
version = "0.1.0"
authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
description = "C API for shift_or_euc"
homepage = "https://docs.rs/shift_or_euc_c/"
documentation = "https://docs.rs/shift_or_euc_c/"
readme = "README.md"
keywords = ["encoding", "web", "charset"]
categories = ["text-processing", "encoding", "web-programming", "internationalization"]
license = "MIT/Apache-2.0"
repository = "https://github.com/hsivonen/shift_or_euc_c"
[dependencies.encoding_rs]
version = "0.8.17"
[dependencies.shift_or_euc]
version = "0.1.0"

202
third_party/rust/shift_or_euc_c/LICENSE-APACHE поставляемый Normal file
Просмотреть файл

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

25
third_party/rust/shift_or_euc_c/LICENSE-MIT поставляемый Normal file
Просмотреть файл

@ -0,0 +1,25 @@
Copyright (c) 2018 Mozilla Foundation
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

13
third_party/rust/shift_or_euc_c/README.md поставляемый Normal file
Просмотреть файл

@ -0,0 +1,13 @@
# shift_or_euc_c
[![Apache 2 / MIT dual-licensed](https://img.shields.io/badge/license-Apache%202%20%2F%20MIT-blue.svg)](https://github.com/hsivonen/shift_or_euc_c/blob/master/COPYRIGHT)
C API for [`shift_or_euc`](https://docs.rs/crate/shift_or_euc).
## Documentation
[API documentation on docs.rs](https://docs.rs/crate/shift_or_euc_c)
## Licensing
See the file named [COPYRIGHT](https://github.com/hsivonen/shift_or_euc_c/blob/master/COPYRIGHT).

88
third_party/rust/shift_or_euc_c/include/shift_or_euc.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,88 @@
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
// file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#ifndef shift_or_euc_h
#define shift_or_euc_h
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <stdbool.h>
#include "encoding_rs.h"
#ifndef SHIFT_OR_EUC_DETECTOR
#define SHIFT_OR_EUC_DETECTOR Detector
#ifndef __cplusplus
typedef struct Detector_ Detector;
#endif
#endif
/// Instantiates the detector. If `allow_2022` is `true` the possible
/// guesses are Shift_JIS, EUC-JP, ISO-2022-JP, and undecided. If
/// `allow_2022` is `false`, the possible guesses are Shift_JIS, EUC-JP,
/// and undecided.
///
/// The instantiated detector must be freed after use using
/// `shift_or_euc_detector_free`.
SHIFT_OR_EUC_DETECTOR* shift_or_euc_detector_new(bool allow_2022);
/// Deallocates a detector obtained from `shift_or_euc_detector_new`.
void shift_or_euc_detector_free(SHIFT_OR_EUC_DETECTOR* detector);
/// Feeds bytes to the detector. If `last` is `true` the end of the stream
/// is considered to occur immediately after the end of `buffer`.
/// Otherwise, the stream is expected to continue. `buffer_len` may be zero.
/// `buffer` must not be `NULL` but may be undereferencable when
/// `buffer_len` is zero.
///
/// If you're running the detector only on a prefix of a complete
/// document, _do not_ pass `last` as `true` after the prefix if the
/// stream as a whole still contains more content.
///
/// Returns `SHIFT_JIS_ENCODING` if the detector guessed
/// Shift_JIS. Returns `EUC_JP_ENCODING` if the detector
/// guessed EUC-JP. Returns `ISO_2022_JP_ENCODING` if the
/// detector guessed ISO-2022-JP (only possible if `true` was passed as
/// `allow_2022` when instantiating the detector). Returns `NULL` if the
/// detector is undecided. If `NULL` is returned even when passing `true`
/// as `last`, falling back to Shift_JIS is the best guess for Web
/// purposes.
///
/// Do not call again after the function has returned non-`NULL` or after
/// the function has been called with `true` as `last`.
///
/// # Panics
///
/// If called after the function has returned non-`NULL` or after the
/// function has been called with `true` as `last`.
///
/// # Undefined Behavior
///
/// UB ensues if
///
/// * `detector` does not point to a detector obtained from
/// `shift_or_euc_detector_new` but not yet freed with
/// `shift_or_euc_detector_free`.
/// * `buffer` is `NULL`.
/// * `buffer` and `buffer_len` don't designate a range of memory
/// valid for reading.
ENCODING_RS_ENCODING const* shift_or_euc_detector_feed(
SHIFT_OR_EUC_DETECTOR* detector,
uint8_t const* buffer,
size_t buffer_len,
bool last
);
#ifdef __cplusplus
}
#endif
#endif // shift_or_euc_h

94
third_party/rust/shift_or_euc_c/src/lib.rs поставляемый Normal file
Просмотреть файл

@ -0,0 +1,94 @@
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
// file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![doc(html_root_url = "https://docs.rs/shift_or_euc_c/0.1.0")]
//! C API for [`shift_or_euc`](https://docs.rs/shift_or_euc/)
//!
//! # Panics
//!
//! This crate is designed to be used only in a `panic=abort` scenario.
//! Panic propagation across FFI is not handled!
//!
//! # Licensing
//!
//! See the file named [COPYRIGHT](https://github.com/hsivonen/shift_or_euc/blob/master/COPYRIGHT).
use encoding_rs::Encoding;
use shift_or_euc::*;
/// Instantiates the detector. If `allow_2022` is `true` the possible
/// guesses are Shift_JIS, EUC-JP, ISO-2022-JP, and undecided. If
/// `allow_2022` is `false`, the possible guesses are Shift_JIS, EUC-JP,
/// and undecided.
///
/// The instantiated detector must be freed after use using
/// `shift_or_euc_detector_free`.
#[no_mangle]
pub unsafe extern "C" fn shift_or_euc_detector_new(allow_2022: bool) -> *mut Detector {
Box::into_raw(Box::new(Detector::new(allow_2022)))
}
/// Deallocates a detector obtained from `shift_or_euc_detector_new`.
#[no_mangle]
pub unsafe extern "C" fn shift_or_euc_detector_free(detector: *mut Detector) {
let _ = Box::from_raw(detector);
}
/// Feeds bytes to the detector. If `last` is `true` the end of the stream
/// is considered to occur immediately after the end of `buffer`.
/// Otherwise, the stream is expected to continue. `buffer_len` may be zero.
/// `buffer` must not be `NULL` but may be undereferencable when
/// `buffer_len` is zero.
///
/// If you're running the detector only on a prefix of a complete
/// document, _do not_ pass `last` as `true` after the prefix if the
/// stream as a whole still contains more content.
///
/// Returns `SHIFT_JIS_ENCODING` if the detector guessed
/// Shift_JIS. Returns `EUC_JP_ENCODING` if the detector
/// guessed EUC-JP. Returns `ISO_2022_JP_ENCODING` if the
/// detector guessed ISO-2022-JP (only possible if `true` was passed as
/// `allow_2022` when instantiating the detector). Returns `NULL` if the
/// detector is undecided. If `NULL` is returned even when passing `true`
/// as `last`, falling back to Shift_JIS is the best guess for Web
/// purposes.
///
/// Do not call again after the function has returned non-`NULL` or after
/// the function has been called with `true` as `last`.
///
/// # Panics
///
/// If called after the function has returned non-`NULL` or after the
/// function has been called with `true` as `last`.
///
/// # Undefined Behavior
///
/// UB ensues if
///
/// * `detector` does not point to a detector obtained from
/// `shift_or_euc_detector_new` but not yet freed with
/// `shift_or_euc_detector_free`.
/// * `buffer` is `NULL`.
/// * `buffer` and `buffer_len` don't designate a range of memory
/// valid for reading.
#[no_mangle]
pub unsafe extern "C" fn shift_or_euc_detector_feed(
detector: *mut Detector,
buffer: *const u8,
buffer_len: usize,
last: bool,
) -> *const Encoding {
if let Some(encoding) = (*detector).feed(::std::slice::from_raw_parts(buffer, buffer_len), last)
{
encoding
} else {
::std::ptr::null()
}
}

Просмотреть файл

@ -52,19 +52,20 @@ class ViewSourceChild extends JSWindowActorChild {
* loading.
*/
viewSource(URL, outerWindowID, lineNumber) {
let otherDocShell;
let forceEncodingDetection = false;
let otherDocShell, forcedCharSet;
if (outerWindowID) {
let contentWindow = Services.wm.getOuterWindowWithId(outerWindowID);
if (contentWindow) {
otherDocShell = contentWindow.docShell;
forceEncodingDetection = contentWindow.windowUtils.docCharsetIsForced;
let utils = contentWindow.windowUtils;
let doc = contentWindow.document;
forcedCharSet = utils.docCharsetIsForced ? doc.characterSet : null;
}
}
this.loadSource(URL, otherDocShell, lineNumber, forceEncodingDetection);
this.loadSource(URL, otherDocShell, lineNumber, forcedCharSet);
}
/**
@ -104,14 +105,18 @@ class ViewSourceChild extends JSWindowActorChild {
* @param lineNumber (optional)
* The line number to focus as soon as the source has finished
* loading.
* @param forceEncodingDetection (optional)
* Force autodetection of the character encoding.
* @param forcedCharSet (optional)
* The document character set to use instead of the default one.
*/
loadSource(URL, otherDocShell, lineNumber, forceEncodingDetection) {
loadSource(URL, otherDocShell, lineNumber, forcedCharSet) {
const viewSrcURL = "view-source:" + URL;
if (forceEncodingDetection) {
this.docShell.forceEncodingDetection();
if (forcedCharSet) {
try {
this.docShell.charset = forcedCharSet;
} catch (e) {
/* invalid charset */
}
}
ViewSourcePageChild.setInitialLineNumber(lineNumber);

Просмотреть файл

@ -255,6 +255,8 @@
this._mayEnableCharacterEncodingMenu = null;
this._charsetAutodetected = false;
this._contentPrincipal = null;
this._contentPartitionedPrincipal = null;
@ -583,11 +585,17 @@
: this.contentDocument.title;
}
forceEncodingDetection() {
set characterSet(val) {
if (this.isRemoteBrowser) {
this.sendMessageToActor("ForceEncodingDetection", {}, "BrowserTab");
this.sendMessageToActor(
"UpdateCharacterSet",
{ value: val },
"BrowserTab"
);
this._characterSet = val;
} else {
this.docShell.forceEncodingDetection();
this.docShell.charset = val;
this.docShell.gatherCharsetMenuTelemetry();
}
}
@ -607,6 +615,18 @@
}
}
get charsetAutodetected() {
return this.isRemoteBrowser
? this._charsetAutodetected
: this.docShell.charsetAutodetected;
}
set charsetAutodetected(aAutodetected) {
if (this.isRemoteBrowser) {
this._charsetAutodetected = aAutodetected;
}
}
get contentPrincipal() {
return this.isRemoteBrowser
? this._contentPrincipal
@ -1127,6 +1147,7 @@
aLocation,
aCharset,
aMayEnableCharacterEncodingMenu,
aCharsetAutodetected,
aDocumentURI,
aTitle,
aContentPrincipal,
@ -1142,6 +1163,7 @@
if (aCharset != null) {
this._characterSet = aCharset;
this._mayEnableCharacterEncodingMenu = aMayEnableCharacterEncodingMenu;
this._charsetAutodetected = aCharsetAutodetected;
}
if (aContentType != null) {
@ -1556,6 +1578,7 @@
"_documentContentType",
"_characterSet",
"_mayEnableCharacterEncodingMenu",
"_charsetAutodetected",
"_contentPrincipal",
"_contentPartitionedPrincipal",
"_isSyntheticDocument",

Просмотреть файл

@ -39,6 +39,7 @@ cert_storage = { path = "../../../../security/manager/ssl/cert_storage" }
bitsdownload = { path = "../../../components/bitsdownload", optional = true }
storage = { path = "../../../../storage/rust" }
bookmark_sync = { path = "../../../components/places/bookmark_sync", optional = true }
shift_or_euc_c = "0.1.0"
chardetng_c = "0.1.1"
audio_thread_priority = "0.23.4"
mdns_service = { path="../../../../dom/media/webrtc/transport/mdns_service", optional = true }

Просмотреть файл

@ -45,6 +45,7 @@ extern crate processtools;
#[cfg(feature = "gecko_profiler")]
extern crate profiler_helper;
extern crate rsdparsa_capi;
extern crate shift_or_euc_c;
extern crate static_prefs;
extern crate storage;
#[cfg(feature = "quantum_render")]

Просмотреть файл

@ -0,0 +1,5 @@
<!-- This Source Code Form is subject to the terms of the Mozilla Public
- License, v. 2.0. If a copy of the MPL was not distributed with this
- file, You can obtain one at http://mozilla.org/MPL/2.0/. -->
<!ENTITY charsetMenu2.label "Text Encoding">

Просмотреть файл

@ -0,0 +1,114 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# LOCALIZATION NOTE: The property keys ending with ".key" are for access keys.
# Localizations may add or delete properties where the property key ends with
# ".key" as appropriate for the localization. The code that uses this data can
# deal with the absence of an access key for an item.
#
# For gbk, gbk.bis and gbk.bis.key are used to trigger string changes in
# localizations.
#
# In the en-US version of this file, access keys are given to the following:
# * UTF-8
# * All encodings that are the fallback encoding for some locale in Firefox
# * All encodings that are the fallback encoding for some locale in IE
# * All Japanese encodings
#
# For the items whose property key does not end in ".key" and whose value
# includes "(" U+0028 LEFT PARENTHESIS, the "(" character is significant for
# processing by CharsetMenu.jsm. If your localization does not use ASCII
# parentheses where en-US does in this file, please file a bug to make
# CharsetMenu.jsm also recognize the delimiter your localization uses.
# (When this code was developed, all localizations appeared to use
# U+0028 LEFT PARENTHESIS for this purpose.)
# Globally-relevant
_autodetect_all.key = m
_autodetect_all = Automatic
UTF-8.key = U
UTF-8 = Unicode
windows-1252.key = W
windows-1252 = Western
# Arabic
windows-1256.key = A
windows-1256 = Arabic (Windows)
ISO-8859-6 = Arabic (ISO)
# Baltic
windows-1257.key = B
windows-1257 = Baltic (Windows)
ISO-8859-4 = Baltic (ISO)
# Central European
windows-1250.key = E
windows-1250 = Central European (Windows)
ISO-8859-2.key = l
ISO-8859-2 = Central European (ISO)
# Chinese, Simplified
gbk.bis.key = S
gbk.bis = Chinese, Simplified
# Chinese, Traditional
Big5.key = T
Big5 = Chinese, Traditional
# Cyrillic
windows-1251.key = C
windows-1251 = Cyrillic (Windows)
ISO-8859-5 = Cyrillic (ISO)
KOI8-R = Cyrillic (KOI8-R)
KOI8-U = Cyrillic (KOI8-U)
IBM866 = Cyrillic (DOS)
# UI string in anticipation of Cyrillic analog of bug 1543077;
# deliberately not in use yet
# LOCALIZATION NOTE (Cyrillic.key): If taken into use, this string will appear
# instead of the string for windows-1251.key, so the use of the same
# accelerator is deliberate.
Cyrillic.key = C
# LOCALIZATION NOTE (Cyrillic): If taken into use, this string will appear
# as a single item instead of the five items windows-1251, ISO-8859-5,
# KOI8-R, KOI8-U, and IBM866, so this string does not need to make sense
# together with those strings and should be translated the way those were
# but omitting the part in parentheses.
Cyrillic = Cyrillic
# Greek
windows-1253.key = G
windows-1253 = Greek (Windows)
ISO-8859-7.key = O
ISO-8859-7 = Greek (ISO)
# Hebrew
windows-1255.key = H
windows-1255 = Hebrew
# LOCALIZATION NOTE (ISO-8859-8): The value for this item should begin with
# the same word for Hebrew as the value for windows-1255 so that this item
# sorts right after that one in the collation order for your locale.
ISO-8859-8 = Hebrew, Visual
# Japanese (NOT AN ENCODING NAME)
Japanese.key = J
Japanese = Japanese
# Korean
EUC-KR.key = K
EUC-KR = Korean
# Thai
windows-874.key = i
windows-874 = Thai
# Turkish
windows-1254.key = r
windows-1254 = Turkish
# Vietnamese
windows-1258.key = V
windows-1258 = Vietnamese

Просмотреть файл

@ -14,6 +14,8 @@
locale/@AB_CD@/global/autocomplete.properties (%chrome/global/autocomplete.properties)
locale/@AB_CD@/global/appPicker.dtd (%chrome/global/appPicker.dtd)
locale/@AB_CD@/global/browser.properties (%chrome/global/browser.properties)
locale/@AB_CD@/global/charsetMenu.dtd (%chrome/global/charsetMenu.dtd)
locale/@AB_CD@/global/charsetMenu.properties (%chrome/global/charsetMenu.properties)
locale/@AB_CD@/global/commonDialog.dtd (%chrome/global/commonDialog.dtd)
locale/@AB_CD@/global/commonDialogs.properties (%chrome/global/commonDialogs.properties)
locale/@AB_CD@/global/contentAreaCommands.properties (%chrome/global/contentAreaCommands.properties)

Просмотреть файл

@ -0,0 +1,223 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
var EXPORTED_SYMBOLS = ["CharsetMenu"];
const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm");
const { XPCOMUtils } = ChromeUtils.import(
"resource://gre/modules/XPCOMUtils.jsm"
);
XPCOMUtils.defineLazyGetter(this, "gBundle", function() {
const kUrl = "chrome://global/locale/charsetMenu.properties";
return Services.strings.createBundle(kUrl);
});
ChromeUtils.defineModuleGetter(
this,
"Deprecated",
"resource://gre/modules/Deprecated.jsm"
);
/**
* This set contains encodings that are in the Encoding Standard, except:
* - Japanese encodings are represented by one autodetection item
* - x-user-defined, which practically never makes sense as an end-user-chosen
* override.
* - Encodings that IE11 doesn't have in its corresponding menu.
*/
const kEncodings = new Set([
// Globally relevant
"_autodetect_all", // (NOT AN ENCODING NAME; using IE-consistent magic name)
"UTF-8",
"windows-1252",
// Arabic
"windows-1256",
"ISO-8859-6",
// Baltic
"windows-1257",
"ISO-8859-4",
// "ISO-8859-13", // Hidden since not in menu in IE11
// Central European
"windows-1250",
"ISO-8859-2",
// Chinese, Simplified
"GBK",
// Chinese, Traditional
"Big5",
// Cyrillic
"windows-1251",
"ISO-8859-5",
"KOI8-R",
"KOI8-U",
"IBM866", // Not in menu in Chromium. Maybe drop this?
// "x-mac-cyrillic", // Not in menu in IE11 or Chromium.
// Greek
"windows-1253",
"ISO-8859-7",
// Hebrew
"windows-1255",
"ISO-8859-8",
// Japanese (NOT AN ENCODING NAME)
"Japanese",
// Korean
"EUC-KR",
// Thai
"windows-874",
// Turkish
"windows-1254",
// Vietnamese
"windows-1258",
// Hiding rare European encodings that aren't in the menu in IE11 and would
// make the menu messy by sorting all over the place
// "ISO-8859-3",
// "ISO-8859-10",
// "ISO-8859-14",
// "ISO-8859-15",
// "ISO-8859-16",
// "macintosh"
]);
// Always at the start of the menu, in this order, followed by a separator.
const kPinned = ["_autodetect_all", "UTF-8", "windows-1252"];
kPinned.forEach(x => kEncodings.delete(x));
function CharsetComparator(a, b) {
// Normal sorting sorts the part in parenthesis in an order that
// happens to make the less frequently-used items first.
let titleA = a.label.replace(/\(.*/, "") + b.value;
let titleB = b.label.replace(/\(.*/, "") + a.value;
// Secondarily reverse sort by encoding name to sort "windows"
return titleA.localeCompare(titleB) || b.value.localeCompare(a.value);
}
var gCharsetInfoCache, gPinnedInfoCache;
var CharsetMenu = {
build(parent, deprecatedShowAccessKeys = true) {
if (!deprecatedShowAccessKeys) {
Deprecated.warning(
"CharsetMenu no longer supports building a menu with no access keys.",
"https://bugzilla.mozilla.org/show_bug.cgi?id=1088710"
);
}
function createDOMNode(doc, nodeInfo) {
let node = doc.createXULElement("menuitem");
node.setAttribute("type", "radio");
node.setAttribute("name", nodeInfo.name + "Group");
node.setAttribute(nodeInfo.name, nodeInfo.value);
node.setAttribute("label", nodeInfo.label);
if (nodeInfo.accesskey) {
node.setAttribute("accesskey", nodeInfo.accesskey);
}
return node;
}
if (parent.hasChildNodes()) {
// Charset menu already built
return;
}
this._ensureDataReady();
let doc = parent.ownerDocument;
gPinnedInfoCache.forEach(charsetInfo =>
parent.appendChild(createDOMNode(doc, charsetInfo))
);
parent.appendChild(doc.createXULElement("menuseparator"));
gCharsetInfoCache.forEach(charsetInfo =>
parent.appendChild(createDOMNode(doc, charsetInfo))
);
},
getData() {
this._ensureDataReady();
return {
pinnedCharsets: gPinnedInfoCache,
otherCharsets: gCharsetInfoCache,
};
},
_ensureDataReady() {
if (!gCharsetInfoCache) {
gPinnedInfoCache = this.getCharsetInfo(kPinned, false);
gCharsetInfoCache = this.getCharsetInfo(kEncodings);
}
},
getCharsetInfo(charsets, sort = true) {
let list = Array.from(charsets, charset => ({
label: this._getCharsetLabel(charset),
accesskey: this._getCharsetAccessKey(charset),
name: "charset",
value: charset,
}));
if (sort) {
list.sort(CharsetComparator);
}
return list;
},
_getCharsetLabel(charset) {
if (charset == "GBK") {
// Localization key has been revised
charset = "gbk.bis";
}
try {
return gBundle.GetStringFromName(charset);
} catch (ex) {}
return charset;
},
_getCharsetAccessKey(charset) {
if (charset == "GBK") {
// Localization key has been revised
charset = "gbk.bis";
}
try {
return gBundle.GetStringFromName(charset + ".key");
} catch (ex) {}
return "";
},
/**
* For substantially similar encodings, treat two encodings as the same
* for the purpose of the check mark.
*/
foldCharset(charset, isAutodetected) {
if (isAutodetected) {
switch (charset) {
case "Shift_JIS":
case "EUC-JP":
case "ISO-2022-JP":
return "Japanese";
default:
// fall through
}
}
switch (charset) {
case "ISO-8859-8-I":
return "windows-1255";
case "gb18030":
return "GBK";
default:
return charset;
}
},
/**
* This method is for comm-central callers only.
*/
update(parent, charset) {
let menuitem = parent
.getElementsByAttribute("charset", this.foldCharset(charset, false))
.item(0);
if (menuitem) {
menuitem.setAttribute("checked", "true");
}
},
};
Object.freeze(CharsetMenu);

Просмотреть файл

@ -48,6 +48,9 @@ with Files("tests/xpcshell/test_UpdateUtils*.js"):
with Files("AsyncPrefs.jsm"):
BUG_COMPONENT = ("Core", "Security: Process Sandboxing")
with Files("CharsetMenu.jsm"):
BUG_COMPONENT = ("Firefox", "Toolbars and Customization")
with Files("Color.jsm"):
BUG_COMPONENT = ("Toolkit", "Find Toolbar")
@ -157,6 +160,7 @@ EXTRA_JS_MODULES += [
"BrowserUtils.jsm",
"CanonicalJSON.jsm",
"CertUtils.jsm",
"CharsetMenu.jsm",
"Color.jsm",
"Console.jsm",
"ContentDOMReference.jsm",