зеркало из https://github.com/mozilla/gecko-dev.git
Backed out changeset 4891a17c55e2 (bug 1713627) for Browser-chrome failures in docshell/test/browser/browser_bug673087-1.js. CLOSED TREE
This commit is contained in:
Родитель
7292512649
Коммит
2118316ba4
|
@ -2025,6 +2025,7 @@ dependencies = [
|
|||
"rusqlite",
|
||||
"rust_minidump_writer_linux",
|
||||
"rustc_version",
|
||||
"shift_or_euc_c",
|
||||
"static_prefs",
|
||||
"storage",
|
||||
"unic-langid",
|
||||
|
@ -4631,6 +4632,26 @@ dependencies = [
|
|||
"opaque-debug",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shift_or_euc"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f930dea4685b9803954b9d74cdc175c6d946a22f2eafe5aa2e9a58cdcae7da8c"
|
||||
dependencies = [
|
||||
"encoding_rs",
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shift_or_euc_c"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c81ec08c8a68c45c48d8ef58b80ce038cc9945891c4a4996761e2ec5cba05abc"
|
||||
dependencies = [
|
||||
"encoding_rs",
|
||||
"shift_or_euc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shlex"
|
||||
version = "0.1.1"
|
||||
|
|
|
@ -98,8 +98,9 @@ class BrowserTabChild extends JSWindowActorChild {
|
|||
} catch (e) {}
|
||||
break;
|
||||
|
||||
case "ForceEncodingDetection":
|
||||
docShell.forceEncodingDetection();
|
||||
case "UpdateCharacterSet":
|
||||
docShell.charset = message.data.value;
|
||||
docShell.gatherCharsetMenuTelemetry();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -547,6 +547,16 @@
|
|||
</vbox>
|
||||
</panelview>
|
||||
|
||||
<panelview id="PanelUI-characterEncodingView" flex="1">
|
||||
<vbox class="panel-subview-body">
|
||||
<vbox id="PanelUI-characterEncodingView-pinned"
|
||||
class="PanelUI-characterEncodingView-list"/>
|
||||
<toolbarseparator/>
|
||||
<vbox id="PanelUI-characterEncodingView-charsets"
|
||||
class="PanelUI-characterEncodingView-list"/>
|
||||
</vbox>
|
||||
</panelview>
|
||||
|
||||
<panelview id="PanelUI-panicView" flex="1"
|
||||
descriptionheightworkaround="true">
|
||||
<vbox class="panel-subview-body">
|
||||
|
@ -585,6 +595,11 @@
|
|||
class="subviewbutton subviewbutton-iconic"
|
||||
data-l10n-id="appmenu-taskmanager"
|
||||
oncommand="switchToTabHavingURI('about:performance', true)"/>
|
||||
<toolbarbutton id="appMenu-characterencoding-button"
|
||||
class="subviewbutton subviewbutton-nav"
|
||||
label="&charsetMenu2.label;"
|
||||
closemenu="none"
|
||||
oncommand="PanelUI.showSubView('PanelUI-characterEncodingView', this)"/>
|
||||
<toolbarbutton id="appMenu-workoffline-button"
|
||||
class="subviewbutton"
|
||||
data-l10n-id="more-menu-go-offline"
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
%brandDTD;
|
||||
<!ENTITY % browserDTD SYSTEM "chrome://browser/locale/browser.dtd" >
|
||||
%browserDTD;
|
||||
<!ENTITY % charsetDTD SYSTEM "chrome://global/locale/charsetMenu.dtd" >
|
||||
%charsetDTD;
|
||||
<!ENTITY % textcontextDTD SYSTEM "chrome://global/locale/textcontext.dtd" >
|
||||
%textcontextDTD;
|
||||
<!ENTITY % placesDTD SYSTEM "chrome://browser/locale/places/places.dtd">
|
||||
|
|
|
@ -143,7 +143,8 @@
|
|||
</menu>
|
||||
|
||||
<menu id="view-menu" data-l10n-id="menu-view">
|
||||
<menupopup id="menu_viewPopup">
|
||||
<menupopup id="menu_viewPopup"
|
||||
onpopupshowing="updateCharacterEncodingMenuState();">
|
||||
<menu id="viewToolbarsMenu" data-l10n-id="menu-view-toolbars-menu">
|
||||
<menupopup id="view-menu-popup" onpopupshowing="onViewToolbarsPopupShowing(event);">
|
||||
<menuseparator/>
|
||||
|
@ -202,7 +203,7 @@
|
|||
</menu>
|
||||
<menuitem id="repair-text-encoding"
|
||||
disabled="true"
|
||||
oncommand="BrowserForceEncodingDetection();"
|
||||
oncommand="BrowserSetForcedCharacterSet('_autodetect_all')"
|
||||
data-l10n-id="menu-view-repair-text-encoding"/>
|
||||
<menuseparator/>
|
||||
#ifdef XP_MACOSX
|
||||
|
|
|
@ -27,6 +27,7 @@ XPCOMUtils.defineLazyModuleGetters(this, {
|
|||
BrowserUtils: "resource://gre/modules/BrowserUtils.jsm",
|
||||
BrowserWindowTracker: "resource:///modules/BrowserWindowTracker.jsm",
|
||||
CFRPageActions: "resource://activity-stream/lib/CFRPageActions.jsm",
|
||||
CharsetMenu: "resource://gre/modules/CharsetMenu.jsm",
|
||||
Color: "resource://gre/modules/Color.jsm",
|
||||
ContextualIdentityService:
|
||||
"resource://gre/modules/ContextualIdentityService.jsm",
|
||||
|
@ -4883,6 +4884,24 @@ function updateUserContextUIIndicator() {
|
|||
hbox.hidden = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Makes the Character Encoding menu enabled or disabled as appropriate.
|
||||
* To be called when the View menu or the app menu is opened.
|
||||
*/
|
||||
function updateCharacterEncodingMenuState() {
|
||||
let charsetMenu = document.getElementById("charsetMenu");
|
||||
// gBrowser is null on Mac when the menubar shows in the context of
|
||||
// non-browser windows. The above elements may be null depending on
|
||||
// what parts of the menubar are present. E.g. no app menu on Mac.
|
||||
if (gBrowser && gBrowser.selectedBrowser.mayEnableCharacterEncodingMenu) {
|
||||
if (charsetMenu) {
|
||||
charsetMenu.removeAttribute("disabled");
|
||||
}
|
||||
} else if (charsetMenu) {
|
||||
charsetMenu.setAttribute("disabled", "true");
|
||||
}
|
||||
}
|
||||
|
||||
var XULBrowserWindow = {
|
||||
// Stored Status, Link and Loading values
|
||||
status: "",
|
||||
|
@ -7059,11 +7078,37 @@ function handleDroppedLink(
|
|||
}
|
||||
}
|
||||
|
||||
function BrowserForceEncodingDetection() {
|
||||
gBrowser.selectedBrowser.forceEncodingDetection();
|
||||
function BrowserSetForcedCharacterSet(aCharset) {
|
||||
if (aCharset) {
|
||||
if (aCharset == "Japanese") {
|
||||
aCharset = "Shift_JIS";
|
||||
}
|
||||
gBrowser.selectedBrowser.characterSet = aCharset;
|
||||
// Save the forced character-set
|
||||
PlacesUIUtils.setCharsetForPage(
|
||||
gBrowser.currentURI,
|
||||
aCharset,
|
||||
window
|
||||
).catch(Cu.reportError);
|
||||
}
|
||||
BrowserCharsetReload();
|
||||
}
|
||||
|
||||
function BrowserCharsetReload() {
|
||||
BrowserReloadWithFlags(Ci.nsIWebNavigation.LOAD_FLAGS_CHARSET_CHANGE);
|
||||
}
|
||||
|
||||
function UpdateCurrentCharset(target) {
|
||||
let selectedCharset = CharsetMenu.foldCharset(
|
||||
gBrowser.selectedBrowser.characterSet,
|
||||
gBrowser.selectedBrowser.charsetAutodetected
|
||||
);
|
||||
for (let menuItem of target.getElementsByTagName("menuitem")) {
|
||||
let isSelected = menuItem.getAttribute("charset") === selectedCharset;
|
||||
menuItem.setAttribute("checked", isSelected);
|
||||
}
|
||||
}
|
||||
|
||||
var ToolbarContextMenu = {
|
||||
updateDownloadsAutoHide(popup) {
|
||||
let checkbox = document.getElementById(
|
||||
|
|
|
@ -439,7 +439,7 @@ const CustomizableWidgets = [
|
|||
id: "characterencoding-button",
|
||||
l10nId: "repair-text-encoding-button",
|
||||
onCommand(aEvent) {
|
||||
aEvent.view.BrowserForceEncodingDetection();
|
||||
aEvent.view.BrowserSetForcedCharacterSet("_autodetect_all");
|
||||
},
|
||||
},
|
||||
{
|
||||
|
|
|
@ -370,6 +370,7 @@ nsDocShell::nsDocShell(BrowsingContext* aBrowsingContext,
|
|||
: nsDocLoader(true),
|
||||
mContentWindowID(aContentWindowID),
|
||||
mBrowsingContext(aBrowsingContext),
|
||||
mForcedCharset(nullptr),
|
||||
mParentCharset(nullptr),
|
||||
mTreeOwner(nullptr),
|
||||
mScrollbarPref(ScrollbarPreference::Auto),
|
||||
|
@ -1526,7 +1527,7 @@ nsDocShell::GetCharset(nsACString& aCharset) {
|
|||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
nsDocShell::ForceEncodingDetection() {
|
||||
nsDocShell::GatherCharsetMenuTelemetry() {
|
||||
nsCOMPtr<nsIContentViewer> viewer;
|
||||
GetContentViewer(getter_AddRefs(viewer));
|
||||
if (!viewer) {
|
||||
|
@ -1538,11 +1539,15 @@ nsDocShell::ForceEncodingDetection() {
|
|||
return NS_OK;
|
||||
}
|
||||
|
||||
mForcedAutodetection = true;
|
||||
|
||||
LOGCHARSETMENU(("ENCODING_OVERRIDE_USED_AUTOMATIC"));
|
||||
Telemetry::ScalarSet(Telemetry::ScalarID::ENCODING_OVERRIDE_USED_AUTOMATIC,
|
||||
true);
|
||||
if (mForcedAutodetection) {
|
||||
LOGCHARSETMENU(("ENCODING_OVERRIDE_USED_AUTOMATIC"));
|
||||
Telemetry::ScalarSet(Telemetry::ScalarID::ENCODING_OVERRIDE_USED_AUTOMATIC,
|
||||
true);
|
||||
} else {
|
||||
LOGCHARSETMENU(("ENCODING_OVERRIDE_USED_MANUAL"));
|
||||
Telemetry::ScalarSet(Telemetry::ScalarID::ENCODING_OVERRIDE_USED_MANUAL,
|
||||
true);
|
||||
}
|
||||
|
||||
nsIURI* url = doc->GetOriginalURI();
|
||||
bool isFileURL = url && SchemeIsFile(url);
|
||||
|
@ -1556,6 +1561,28 @@ nsDocShell::ForceEncodingDetection() {
|
|||
Telemetry::AccumulateCategorical(
|
||||
Telemetry::LABELS_ENCODING_OVERRIDE_SITUATION_2::AutoOverridden);
|
||||
break;
|
||||
case kCharsetFromUserForced:
|
||||
case kCharsetFromUserForcedJapaneseAutoDetection:
|
||||
LOGCHARSETMENU(("ManuallyOverridden"));
|
||||
Telemetry::AccumulateCategorical(
|
||||
Telemetry::LABELS_ENCODING_OVERRIDE_SITUATION_2::ManuallyOverridden);
|
||||
break;
|
||||
case kCharsetFromTopLevelDomain:
|
||||
if (encoding == WINDOWS_1252_ENCODING) {
|
||||
LOGCHARSETMENU(("UnlabeledInLk"));
|
||||
Telemetry::AccumulateCategorical(
|
||||
Telemetry::LABELS_ENCODING_OVERRIDE_SITUATION_2::UnlabeledInLk);
|
||||
} else {
|
||||
LOGCHARSETMENU(("UnlabeledJp"));
|
||||
Telemetry::AccumulateCategorical(
|
||||
Telemetry::LABELS_ENCODING_OVERRIDE_SITUATION_2::UnlabeledJp);
|
||||
}
|
||||
break;
|
||||
case kCharsetFromFinalJapaneseAutoDetection:
|
||||
LOGCHARSETMENU(("UnlabeledJp"));
|
||||
Telemetry::AccumulateCategorical(
|
||||
Telemetry::LABELS_ENCODING_OVERRIDE_SITUATION_2::UnlabeledJp);
|
||||
break;
|
||||
case kCharsetFromInitialAutoDetectionASCII:
|
||||
// Deliberately no final version
|
||||
LOGCHARSETMENU(("UnlabeledAscii"));
|
||||
|
@ -1630,6 +1657,31 @@ nsDocShell::ForceEncodingDetection() {
|
|||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
nsDocShell::SetCharset(const nsACString& aCharset) {
|
||||
mForcedAutodetection = false;
|
||||
if (aCharset.IsEmpty()) {
|
||||
mForcedCharset = nullptr;
|
||||
return NS_OK;
|
||||
}
|
||||
if (aCharset.EqualsLiteral("_autodetect_all")) {
|
||||
mForcedCharset = WINDOWS_1252_ENCODING;
|
||||
mForcedAutodetection = true;
|
||||
return NS_OK;
|
||||
}
|
||||
const Encoding* encoding = Encoding::ForLabel(aCharset);
|
||||
if (!encoding) {
|
||||
// Reject unknown labels
|
||||
return NS_ERROR_INVALID_ARG;
|
||||
}
|
||||
if (!encoding->IsAsciiCompatible() && encoding != ISO_2022_JP_ENCODING) {
|
||||
// Reject XSS hazards
|
||||
return NS_ERROR_INVALID_ARG;
|
||||
}
|
||||
mForcedCharset = encoding;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
void nsDocShell::SetParentCharset(const Encoding*& aCharset,
|
||||
int32_t aCharsetSource,
|
||||
nsIPrincipal* aPrincipal) {
|
||||
|
@ -1990,6 +2042,30 @@ nsDocShell::GetMayEnableCharacterEncodingMenu(
|
|||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
nsDocShell::GetCharsetAutodetected(bool* aCharsetAutodetected) {
|
||||
*aCharsetAutodetected = false;
|
||||
if (!mContentViewer) {
|
||||
return NS_OK;
|
||||
}
|
||||
Document* doc = mContentViewer->GetDocument();
|
||||
if (!doc) {
|
||||
return NS_OK;
|
||||
}
|
||||
int32_t source = doc->GetDocumentCharacterSetSource();
|
||||
|
||||
if ((source >= kCharsetFromInitialAutoDetectionASCII &&
|
||||
source <= kCharsetFromFinalAutoDetectionFile) ||
|
||||
source == kCharsetFromUserForcedJapaneseAutoDetection ||
|
||||
source == kCharsetFromPendingUserForcedAutoDetection ||
|
||||
source == kCharsetFromInitialUserForcedAutoDetection ||
|
||||
source == kCharsetFromFinalUserForcedAutoDetection) {
|
||||
*aCharsetAutodetected = true;
|
||||
}
|
||||
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
nsDocShell::GetAllDocShellsInSubtree(int32_t aItemType,
|
||||
DocShellEnumeratorDirection aDirection,
|
||||
|
|
|
@ -327,9 +327,9 @@ class nsDocShell final : public nsDocLoader,
|
|||
void SetInFrameSwap(bool aInSwap) { mInFrameSwap = aInSwap; }
|
||||
bool InFrameSwap();
|
||||
|
||||
bool GetForcedAutodetection() { return mForcedAutodetection; }
|
||||
const mozilla::Encoding* GetForcedCharset() { return mForcedCharset; }
|
||||
|
||||
void ResetForcedAutodetection() { mForcedAutodetection = false; }
|
||||
bool GetForcedAutodetection() { return mForcedAutodetection; }
|
||||
|
||||
mozilla::HTMLEditor* GetHTMLEditorInternal();
|
||||
nsresult SetHTMLEditorInternal(mozilla::HTMLEditor* aHTMLEditor);
|
||||
|
@ -1189,6 +1189,7 @@ class nsDocShell final : public nsDocLoader,
|
|||
|
||||
mozilla::UniquePtr<mozilla::gfx::Matrix5x4> mColorMatrix;
|
||||
|
||||
const mozilla::Encoding* mForcedCharset;
|
||||
const mozilla::Encoding* mParentCharset;
|
||||
|
||||
// WEAK REFERENCES BELOW HERE.
|
||||
|
|
|
@ -494,10 +494,17 @@ interface nsIDocShell : nsIDocShellTreeItem
|
|||
/**
|
||||
* Upon getting, returns the canonical encoding label of the document
|
||||
* currently loaded into this docshell.
|
||||
*
|
||||
* Upon setting, sets the forced encoding for compatibility with legacy callers.
|
||||
*/
|
||||
readonly attribute ACString charset;
|
||||
attribute ACString charset;
|
||||
|
||||
void forceEncodingDetection();
|
||||
/**
|
||||
* Called when the user chose an encoding override from the character
|
||||
* encoding menu. Separate from the setter for the charset property to avoid
|
||||
* extensions adding noise to the data.
|
||||
*/
|
||||
void gatherCharsetMenuTelemetry();
|
||||
|
||||
/**
|
||||
* In a child docshell, this is the charset of the parent docshell
|
||||
|
@ -613,6 +620,11 @@ interface nsIDocShell : nsIDocShellTreeItem
|
|||
*/
|
||||
[infallible] readonly attribute boolean mayEnableCharacterEncodingMenu;
|
||||
|
||||
/**
|
||||
* Indicates that the character encoding was autodetected.
|
||||
*/
|
||||
[infallible] readonly attribute boolean charsetAutodetected;
|
||||
|
||||
attribute nsIEditor editor;
|
||||
readonly attribute boolean editable; /* this docShell is editable */
|
||||
readonly attribute boolean hasEditingSession; /* this docShell has an editing session */
|
||||
|
|
|
@ -21,6 +21,9 @@ support-files =
|
|||
file_bug234628-6-child.html
|
||||
file_bug234628-6-child.html^headers^
|
||||
file_bug234628-6.html
|
||||
file_bug234628-7-child.html
|
||||
file_bug234628-7-child.html^headers^
|
||||
file_bug234628-7.html
|
||||
file_bug234628-8-child.html
|
||||
file_bug234628-8.html
|
||||
file_bug234628-9-child.html
|
||||
|
@ -40,8 +43,14 @@ support-files =
|
|||
file_bug1328501.html
|
||||
file_bug1328501_frame.html
|
||||
file_bug1328501_framescript.js
|
||||
file_bug1543077-1-child.html
|
||||
file_bug1543077-1.html
|
||||
file_bug1543077-2-child.html
|
||||
file_bug1543077-2.html
|
||||
file_bug1543077-3-child.html
|
||||
file_bug1543077-3.html
|
||||
file_bug1543077-4-child.html
|
||||
file_bug1543077-4.html
|
||||
file_multiple_pushState.html
|
||||
file_onbeforeunload_0.html
|
||||
file_onbeforeunload_1.html
|
||||
|
@ -76,7 +85,10 @@ skip-if =
|
|||
os == "linux" && bits == 64 && !debug # Bug 1607713
|
||||
fission && os == "mac" && debug # Bug 1713903 - new Fission platform triage
|
||||
[browser_backforward_userinteraction_about.js]
|
||||
[browser_bug1543077-1.js]
|
||||
[browser_bug1543077-2.js]
|
||||
[browser_bug1543077-3.js]
|
||||
[browser_bug1543077-4.js]
|
||||
[browser_bug1594938.js]
|
||||
[browser_bug1206879.js]
|
||||
[browser_bug1309900_crossProcessHistoryNavigation.js]
|
||||
|
@ -106,6 +118,7 @@ skip-if = !fission || !crashreporter # On a crash we only keep history when fiss
|
|||
[browser_bug234628-4.js]
|
||||
[browser_bug234628-5.js]
|
||||
[browser_bug234628-6.js]
|
||||
[browser_bug234628-7.js]
|
||||
[browser_bug234628-8.js]
|
||||
[browser_bug234628-9.js]
|
||||
[browser_bug349769.js]
|
||||
|
|
|
@ -29,7 +29,7 @@ function afterOpen() {
|
|||
content.document.getElementById("testinput").value = TEXT.enteredText2;
|
||||
}).then(() => {
|
||||
/* Force the page encoding to Shift_JIS */
|
||||
BrowserForceEncodingDetection();
|
||||
BrowserSetForcedCharacterSet("Shift_JIS");
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
function test() {
|
||||
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
|
||||
runCharsetTest(
|
||||
rootDir + "file_bug1543077-1.html",
|
||||
afterOpen,
|
||||
"Japanese",
|
||||
afterChangeCharset
|
||||
);
|
||||
}
|
||||
|
||||
function afterOpen() {
|
||||
is(
|
||||
content.document.documentElement.textContent.indexOf("\u0434"),
|
||||
131,
|
||||
"Parent doc should be IBM866 initially"
|
||||
);
|
||||
|
||||
is(
|
||||
content.frames[0].document.documentElement.textContent.indexOf("\u0434"),
|
||||
87,
|
||||
"Child doc should be IBM866 initially"
|
||||
);
|
||||
}
|
||||
|
||||
function afterChangeCharset() {
|
||||
is(
|
||||
content.document.documentElement.textContent.indexOf("\u3042"),
|
||||
131,
|
||||
"Parent doc should decode as EUC-JP subsequently"
|
||||
);
|
||||
is(
|
||||
content.frames[0].document.documentElement.textContent.indexOf("\u3042"),
|
||||
87,
|
||||
"Child doc should decode as EUC-JP subsequently"
|
||||
);
|
||||
|
||||
is(
|
||||
content.document.characterSet,
|
||||
"EUC-JP",
|
||||
"Parent doc should report EUC-JP subsequently"
|
||||
);
|
||||
is(
|
||||
content.frames[0].document.characterSet,
|
||||
"EUC-JP",
|
||||
"Child doc should report EUC-JP subsequently"
|
||||
);
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
function test() {
|
||||
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
|
||||
runCharsetTest(
|
||||
rootDir + "file_bug1543077-2.html",
|
||||
afterOpen,
|
||||
"Japanese",
|
||||
afterChangeCharset
|
||||
);
|
||||
}
|
||||
|
||||
function afterOpen() {
|
||||
is(
|
||||
content.document.documentElement.textContent.indexOf("\u0412"),
|
||||
134,
|
||||
"Parent doc should be IBM866 initially"
|
||||
);
|
||||
|
||||
is(
|
||||
content.frames[0].document.documentElement.textContent.indexOf("\u0412"),
|
||||
90,
|
||||
"Child doc should be IBM866 initially"
|
||||
);
|
||||
}
|
||||
|
||||
function afterChangeCharset() {
|
||||
is(
|
||||
content.document.documentElement.textContent.indexOf("\u3042"),
|
||||
134,
|
||||
"Parent doc should decode as Shift_JIS subsequently"
|
||||
);
|
||||
is(
|
||||
content.frames[0].document.documentElement.textContent.indexOf("\u3042"),
|
||||
90,
|
||||
"Child doc should decode as Shift_JIS subsequently"
|
||||
);
|
||||
|
||||
is(
|
||||
content.document.characterSet,
|
||||
"Shift_JIS",
|
||||
"Parent doc should report Shift_JIS subsequently"
|
||||
);
|
||||
is(
|
||||
content.frames[0].document.characterSet,
|
||||
"Shift_JIS",
|
||||
"Child doc should report Shift_JIS subsequently"
|
||||
);
|
||||
}
|
|
@ -3,6 +3,7 @@ function test() {
|
|||
runCharsetTest(
|
||||
rootDir + "file_bug1543077-3.html",
|
||||
afterOpen,
|
||||
"Japanese",
|
||||
afterChangeCharset
|
||||
);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
function test() {
|
||||
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
|
||||
runCharsetTest(
|
||||
rootDir + "file_bug1543077-4.html",
|
||||
afterOpen,
|
||||
"Japanese",
|
||||
afterChangeCharset
|
||||
);
|
||||
}
|
||||
|
||||
function afterOpen() {
|
||||
is(
|
||||
content.document.documentElement.textContent.indexOf("\u0434"),
|
||||
131,
|
||||
"Parent doc should be IBM866 initially"
|
||||
);
|
||||
|
||||
is(
|
||||
content.frames[0].document.documentElement.textContent.indexOf("\u0412"),
|
||||
90,
|
||||
"Child doc should be IBM866 initially"
|
||||
);
|
||||
}
|
||||
|
||||
function afterChangeCharset() {
|
||||
is(
|
||||
content.document.documentElement.textContent.indexOf("\u3042"),
|
||||
131,
|
||||
"Parent doc should decode as EUC-JP subsequently"
|
||||
);
|
||||
is(
|
||||
content.frames[0].document.documentElement.textContent.indexOf("\u3042"),
|
||||
90,
|
||||
"Child doc should decode as Shift_JIS subsequently"
|
||||
);
|
||||
|
||||
is(
|
||||
content.document.characterSet,
|
||||
"EUC-JP",
|
||||
"Parent doc should report EUC-JP subsequently"
|
||||
);
|
||||
is(
|
||||
content.frames[0].document.characterSet,
|
||||
"Shift_JIS",
|
||||
"Child doc should report Shift_JIS subsequently"
|
||||
);
|
||||
}
|
|
@ -3,6 +3,7 @@ function test() {
|
|||
runCharsetTest(
|
||||
rootDir + "file_bug1648464-1.html",
|
||||
afterOpen,
|
||||
"_autodetect_all",
|
||||
afterChangeCharset
|
||||
);
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ function test() {
|
|||
runCharsetTest(
|
||||
rootDir + "file_bug1688368-1.sjs",
|
||||
afterOpen,
|
||||
"UTF-8",
|
||||
afterChangeCharset
|
||||
);
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ function test() {
|
|||
runCharsetTest(
|
||||
rootDir + "file_bug234628-1.html",
|
||||
afterOpen,
|
||||
"windows-1251",
|
||||
afterChangeCharset
|
||||
);
|
||||
}
|
||||
|
@ -23,25 +24,24 @@ function afterOpen() {
|
|||
|
||||
function afterChangeCharset() {
|
||||
is(
|
||||
content.document.documentElement.textContent.indexOf("\u20AC"),
|
||||
content.document.documentElement.textContent.indexOf("\u0402"),
|
||||
129,
|
||||
"Parent doc should be windows-1252 subsequently"
|
||||
"Parent doc should decode as windows-1251 subsequently"
|
||||
);
|
||||
|
||||
is(
|
||||
content.frames[0].document.documentElement.textContent.indexOf("\u20AC"),
|
||||
content.frames[0].document.documentElement.textContent.indexOf("\u0402"),
|
||||
85,
|
||||
"Child doc should be windows-1252 subsequently"
|
||||
"Child doc should decode as windows-1251 subsequently"
|
||||
);
|
||||
|
||||
is(
|
||||
content.document.characterSet,
|
||||
"windows-1252",
|
||||
"Parent doc should report windows-1252 subsequently"
|
||||
"windows-1251",
|
||||
"Parent doc should report windows-1251 subsequently"
|
||||
);
|
||||
is(
|
||||
content.frames[0].document.characterSet,
|
||||
"windows-1252",
|
||||
"Child doc should report windows-1252 subsequently"
|
||||
"windows-1251",
|
||||
"Child doc should report windows-1251 subsequently"
|
||||
);
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ function test() {
|
|||
runCharsetTest(
|
||||
rootDir + "file_bug234628-10.html",
|
||||
afterOpen,
|
||||
"windows-1251",
|
||||
afterChangeCharset
|
||||
);
|
||||
}
|
||||
|
@ -23,9 +24,9 @@ function afterOpen() {
|
|||
|
||||
function afterChangeCharset() {
|
||||
is(
|
||||
content.document.documentElement.textContent.indexOf("\u20AC"),
|
||||
content.document.documentElement.textContent.indexOf("\u0402"),
|
||||
151,
|
||||
"Parent doc should be windows-1252 initially"
|
||||
"Parent doc should decode as windows-1251 subsequently"
|
||||
);
|
||||
is(
|
||||
content.frames[0].document.documentElement.textContent.indexOf("\u20AC"),
|
||||
|
@ -35,8 +36,8 @@ function afterChangeCharset() {
|
|||
|
||||
is(
|
||||
content.document.characterSet,
|
||||
"windows-1252",
|
||||
"Parent doc should report windows-1252 subsequently"
|
||||
"windows-1251",
|
||||
"Parent doc should report windows-1251 subsequently"
|
||||
);
|
||||
is(
|
||||
content.frames[0].document.characterSet,
|
||||
|
|
|
@ -3,6 +3,7 @@ function test() {
|
|||
runCharsetTest(
|
||||
rootDir + "file_bug234628-11.html",
|
||||
afterOpen,
|
||||
"windows-1251",
|
||||
afterChangeCharset
|
||||
);
|
||||
}
|
||||
|
@ -23,9 +24,9 @@ function afterOpen() {
|
|||
|
||||
function afterChangeCharset() {
|
||||
is(
|
||||
content.document.documentElement.textContent.indexOf("\u20AC"),
|
||||
content.document.documentElement.textContent.indexOf("\u0402"),
|
||||
193,
|
||||
"Parent doc should be windows-1252 subsequently"
|
||||
"Parent doc should decode as windows-1251 subsequently"
|
||||
);
|
||||
is(
|
||||
content.frames[0].document.documentElement.textContent.indexOf("\u20AC"),
|
||||
|
@ -35,8 +36,8 @@ function afterChangeCharset() {
|
|||
|
||||
is(
|
||||
content.document.characterSet,
|
||||
"windows-1252",
|
||||
"Parent doc should report windows-1252 subsequently"
|
||||
"windows-1251",
|
||||
"Parent doc should report windows-1251 subsequently"
|
||||
);
|
||||
is(
|
||||
content.frames[0].document.characterSet,
|
||||
|
|
|
@ -3,6 +3,7 @@ function test() {
|
|||
runCharsetTest(
|
||||
rootDir + "file_bug234628-2.html",
|
||||
afterOpen,
|
||||
"windows-1251",
|
||||
afterChangeCharset
|
||||
);
|
||||
}
|
||||
|
@ -25,25 +26,26 @@ function afterOpen() {
|
|||
|
||||
function afterChangeCharset() {
|
||||
is(
|
||||
content.document.documentElement.textContent.indexOf("\u20AC"),
|
||||
content.document.documentElement.textContent.indexOf("\u0402"),
|
||||
129,
|
||||
"Parent doc should be windows-1252 subsequently"
|
||||
"Parent doc should decode as windows-1251 subsequently"
|
||||
);
|
||||
|
||||
is(
|
||||
content.frames[0].document.documentElement.textContent.indexOf("\u20AC"),
|
||||
content.frames[0].document.documentElement.textContent.indexOf(
|
||||
"\u0432\u201A\u00AC"
|
||||
),
|
||||
78,
|
||||
"Child doc should be UTF-8 subsequently"
|
||||
"Child doc should decode as windows-1251 subsequently"
|
||||
);
|
||||
|
||||
is(
|
||||
content.document.characterSet,
|
||||
"windows-1252",
|
||||
"Parent doc should report windows-1252 subsequently"
|
||||
"windows-1251",
|
||||
"Parent doc should report windows-1251 subsequently"
|
||||
);
|
||||
is(
|
||||
content.frames[0].document.characterSet,
|
||||
"UTF-8",
|
||||
"Child doc should report UTF-8 subsequently"
|
||||
"windows-1251",
|
||||
"Child doc should report windows-1251 subsequently"
|
||||
);
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ function test() {
|
|||
runCharsetTest(
|
||||
rootDir + "file_bug234628-3.html",
|
||||
afterOpen,
|
||||
"windows-1251",
|
||||
afterChangeCharset
|
||||
);
|
||||
}
|
||||
|
@ -23,25 +24,26 @@ function afterOpen() {
|
|||
|
||||
function afterChangeCharset() {
|
||||
is(
|
||||
content.document.documentElement.textContent.indexOf("\u20AC"),
|
||||
content.document.documentElement.textContent.indexOf("\u0402"),
|
||||
118,
|
||||
"Parent doc should be windows-1252 subsequently"
|
||||
"Parent doc should decode as windows-1251 subsequently"
|
||||
);
|
||||
|
||||
is(
|
||||
content.frames[0].document.documentElement.textContent.indexOf("\u20AC"),
|
||||
content.frames[0].document.documentElement.textContent.indexOf(
|
||||
"\u0432\u201A\u00AC"
|
||||
),
|
||||
73,
|
||||
"Child doc should be utf-8 subsequently"
|
||||
"Child doc should decode as windows-1251 subsequently"
|
||||
);
|
||||
|
||||
is(
|
||||
content.document.characterSet,
|
||||
"windows-1252",
|
||||
"Parent doc should report windows-1252 subsequently"
|
||||
"windows-1251",
|
||||
"Parent doc should report windows-1251 subsequently"
|
||||
);
|
||||
is(
|
||||
content.frames[0].document.characterSet,
|
||||
"UTF-8",
|
||||
"Child doc should report UTF-8 subsequently"
|
||||
"windows-1251",
|
||||
"Child doc should report windows-1251 subsequently"
|
||||
);
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ function test() {
|
|||
runCharsetTest(
|
||||
rootDir + "file_bug234628-4.html",
|
||||
afterOpen,
|
||||
"windows-1251",
|
||||
afterChangeCharset
|
||||
);
|
||||
}
|
||||
|
@ -23,9 +24,9 @@ function afterOpen() {
|
|||
|
||||
function afterChangeCharset() {
|
||||
is(
|
||||
content.document.documentElement.textContent.indexOf("\u20AC"),
|
||||
content.document.documentElement.textContent.indexOf("\u0402"),
|
||||
132,
|
||||
"Parent doc should decode as windows-1252 subsequently"
|
||||
"Parent doc should decode as windows-1251 subsequently"
|
||||
);
|
||||
is(
|
||||
content.frames[0].document.documentElement.textContent.indexOf("\u20AC"),
|
||||
|
@ -35,8 +36,8 @@ function afterChangeCharset() {
|
|||
|
||||
is(
|
||||
content.document.characterSet,
|
||||
"windows-1252",
|
||||
"Parent doc should report windows-1252 subsequently"
|
||||
"windows-1251",
|
||||
"Parent doc should report windows-1251 subsequently"
|
||||
);
|
||||
is(
|
||||
content.frames[0].document.characterSet,
|
||||
|
|
|
@ -3,6 +3,7 @@ function test() {
|
|||
runCharsetTest(
|
||||
rootDir + "file_bug234628-5.html",
|
||||
afterOpen,
|
||||
"windows-1251",
|
||||
afterChangeCharset
|
||||
);
|
||||
}
|
||||
|
@ -23,9 +24,9 @@ function afterOpen() {
|
|||
|
||||
function afterChangeCharset() {
|
||||
is(
|
||||
content.document.documentElement.textContent.indexOf("\u20AC"),
|
||||
content.document.documentElement.textContent.indexOf("\u0402"),
|
||||
146,
|
||||
"Parent doc should be windows-1252 subsequently"
|
||||
"Parent doc should decode as windows-1251 subsequently"
|
||||
);
|
||||
is(
|
||||
content.frames[0].document.documentElement.textContent.indexOf("\u20AC"),
|
||||
|
@ -35,8 +36,8 @@ function afterChangeCharset() {
|
|||
|
||||
is(
|
||||
content.document.characterSet,
|
||||
"windows-1252",
|
||||
"Parent doc should report windows-1252 subsequently"
|
||||
"windows-1251",
|
||||
"Parent doc should report windows-1251 subsequently"
|
||||
);
|
||||
is(
|
||||
content.frames[0].document.characterSet,
|
||||
|
|
|
@ -3,6 +3,7 @@ function test() {
|
|||
runCharsetTest(
|
||||
rootDir + "file_bug234628-6.html",
|
||||
afterOpen,
|
||||
"windows-1251",
|
||||
afterChangeCharset
|
||||
);
|
||||
}
|
||||
|
@ -23,21 +24,20 @@ function afterOpen() {
|
|||
|
||||
function afterChangeCharset() {
|
||||
is(
|
||||
content.document.documentElement.textContent.indexOf("\u20AC"),
|
||||
content.document.documentElement.textContent.indexOf("\u0402"),
|
||||
190,
|
||||
"Parent doc should be windows-1252 subsequently"
|
||||
"Parent doc should decode as windows-1251 subsequently"
|
||||
);
|
||||
|
||||
is(
|
||||
content.frames[0].document.documentElement.textContent.indexOf("\u20AC"),
|
||||
109,
|
||||
"Child doc should be utf-16 subsequently"
|
||||
"Child doc should decode as utf-16 subsequently"
|
||||
);
|
||||
|
||||
is(
|
||||
content.document.characterSet,
|
||||
"windows-1252",
|
||||
"Parent doc should report windows-1252 subsequently"
|
||||
"windows-1251",
|
||||
"Parent doc should report windows-1251 subsequently"
|
||||
);
|
||||
is(
|
||||
content.frames[0].document.characterSet,
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
function test() {
|
||||
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
|
||||
runCharsetTest(
|
||||
rootDir + "file_bug234628-7.html",
|
||||
afterOpen,
|
||||
"windows-1251",
|
||||
afterChangeCharset
|
||||
);
|
||||
}
|
||||
|
||||
function afterOpen() {
|
||||
is(
|
||||
content.document.documentElement.textContent.indexOf("\u20AC"),
|
||||
188,
|
||||
"Parent doc should be windows-1252 initially"
|
||||
);
|
||||
|
||||
is(
|
||||
content.frames[0].document.documentElement.textContent.indexOf("\u20AC"),
|
||||
107,
|
||||
"Child doc should be utf-8 initially"
|
||||
);
|
||||
}
|
||||
|
||||
function afterChangeCharset() {
|
||||
is(
|
||||
content.document.documentElement.textContent.indexOf("\u0402"),
|
||||
188,
|
||||
"Parent doc should decode as windows-1251 subsequently"
|
||||
);
|
||||
is(
|
||||
content.frames[0].document.documentElement.textContent.indexOf(
|
||||
"\u0432\u201A\u00AC"
|
||||
),
|
||||
107,
|
||||
"Child doc should decode as windows-1251 subsequently"
|
||||
);
|
||||
|
||||
is(
|
||||
content.document.characterSet,
|
||||
"windows-1251",
|
||||
"Parent doc should report windows-1251 subsequently"
|
||||
);
|
||||
is(
|
||||
content.frames[0].document.characterSet,
|
||||
"windows-1251",
|
||||
"Child doc should report windows-1251 subsequently"
|
||||
);
|
||||
}
|
|
@ -1,6 +1,6 @@
|
|||
function test() {
|
||||
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
|
||||
runCharsetCheck(rootDir + "file_bug234628-8.html", afterOpen);
|
||||
runCharsetTest(rootDir + "file_bug234628-8.html", afterOpen);
|
||||
}
|
||||
|
||||
function afterOpen() {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
function test() {
|
||||
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
|
||||
runCharsetCheck(rootDir + "file_bug234628-9.html", afterOpen);
|
||||
runCharsetTest(rootDir + "file_bug234628-9.html", afterOpen);
|
||||
}
|
||||
|
||||
function afterOpen() {
|
||||
|
|
|
@ -3,6 +3,7 @@ function test() {
|
|||
runCharsetTest(
|
||||
rootDir + "file_bug673087-2.html",
|
||||
afterOpen,
|
||||
"windows-1252",
|
||||
afterChangeCharset
|
||||
);
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ function afterOpen() {
|
|||
|
||||
/* Test that the content on load is the expected wrong decoding */
|
||||
testContent(wrongText).then(() => {
|
||||
BrowserForceEncodingDetection();
|
||||
BrowserSetForcedCharacterSet("Shift_JIS");
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport">
|
||||
<title>No encoding declaration in parent or child</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>Hiragana letter a if decoded as EUC-JP: ¤¢</p>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport">
|
||||
<title>No encoding declaration in parent or child</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>No encoding declaration in parent or child</h1>
|
||||
|
||||
<p>Hiragana letter a if decoded as EUC-JP: ¤¢</p>
|
||||
|
||||
<iframe src="file_bug1543077-1-child.html"></iframe>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport">
|
||||
<title>No encoding declaration in parent or child</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>Hiragana letter a if decoded as Shift_JIS: ‚ </p>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport">
|
||||
<title>No encoding declaration in parent or child</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>No encoding declaration in parent or child</h1>
|
||||
|
||||
<p>Hiragana letter a if decoded as Shift_JIS: ‚ </p>
|
||||
|
||||
<iframe src="file_bug1543077-2-child.html"></iframe>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport">
|
||||
<title>No encoding declaration in parent or child</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>Hiragana letter a if decoded as Shift_JIS: ‚ </p>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport">
|
||||
<title>No encoding declaration in parent or child</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>No encoding declaration in parent or child</h1>
|
||||
|
||||
<p>Hiragana letter a if decoded as EUC-JP: ¤¢</p>
|
||||
|
||||
<iframe src="file_bug1543077-4-child.html"></iframe>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport">
|
||||
<title>meta declaration in parent and BOMless UTF-8 with HTTP charset in child</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>Euro sign if decoded as UTF-8: €</p>
|
||||
<p>a with diaeresis if decoded as UTF-8: ä</p>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1 @@
|
|||
Content-Type: text/html; charset=utf-8
|
|
@ -0,0 +1,18 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="windows-1252">
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport">
|
||||
<title>meta declaration in parent and BOMless UTF-8 with HTTP charset in child</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>meta declaration in parent and BOMless UTF-8 with HTTP charset in child</h1>
|
||||
|
||||
<p>Euro sign if decoded as Windows-1252: €</p>
|
||||
<p>a with diaeresis if decoded as Windows-1252: ä</p>
|
||||
|
||||
<iframe src="file_bug234628-7-child.html"></iframe>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -63,22 +63,34 @@ function timelineTestOpenUrl(url) {
|
|||
}
|
||||
|
||||
/**
|
||||
* Helper function for encoding override tests, loads URL, runs check1,
|
||||
* forces encoding detection, runs check2.
|
||||
* Helper function for charset tests. It loads |url| in a new tab,
|
||||
* runs |check1| in a ContentTask when the page is ready, switches the
|
||||
* charset to |charset|, and then runs |check2| in a ContentTask when
|
||||
* the page has finished reloading.
|
||||
*
|
||||
* |charset| and |check2| can be omitted, in which case the test
|
||||
* finishes when |check1| completes.
|
||||
*/
|
||||
function runCharsetTest(url, check1, check2) {
|
||||
function runCharsetTest(url, check1, charset, check2) {
|
||||
waitForExplicitFinish();
|
||||
|
||||
BrowserTestUtils.openNewForegroundTab(gBrowser, url, true).then(afterOpen);
|
||||
|
||||
function afterOpen() {
|
||||
BrowserTestUtils.browserLoaded(gBrowser.selectedBrowser).then(
|
||||
afterChangeCharset
|
||||
);
|
||||
if (charset) {
|
||||
BrowserTestUtils.browserLoaded(gBrowser.selectedBrowser).then(
|
||||
afterChangeCharset
|
||||
);
|
||||
|
||||
SpecialPowers.spawn(gBrowser.selectedBrowser, [], check1).then(() => {
|
||||
BrowserForceEncodingDetection();
|
||||
});
|
||||
SpecialPowers.spawn(gBrowser.selectedBrowser, [], check1).then(() => {
|
||||
BrowserSetForcedCharacterSet(charset);
|
||||
});
|
||||
} else {
|
||||
SpecialPowers.spawn(gBrowser.selectedBrowser, [], check1).then(() => {
|
||||
gBrowser.removeCurrentTab();
|
||||
finish();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function afterChangeCharset() {
|
||||
|
@ -89,23 +101,6 @@ function runCharsetTest(url, check1, check2) {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function for charset tests. It loads |url| in a new tab,
|
||||
* runs |check|.
|
||||
*/
|
||||
function runCharsetCheck(url, check) {
|
||||
waitForExplicitFinish();
|
||||
|
||||
BrowserTestUtils.openNewForegroundTab(gBrowser, url, true).then(afterOpen);
|
||||
|
||||
function afterOpen() {
|
||||
SpecialPowers.spawn(gBrowser.selectedBrowser, [], check).then(() => {
|
||||
gBrowser.removeCurrentTab();
|
||||
finish();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async function pushState(url, frameId) {
|
||||
info(
|
||||
`Doing a pushState, expecting to load ${url} ${
|
||||
|
|
|
@ -362,11 +362,8 @@ nsDOMWindowUtils::GetDocCharsetIsForced(bool* aIsForced) {
|
|||
*aIsForced = false;
|
||||
|
||||
Document* doc = GetDocument();
|
||||
if (doc) {
|
||||
auto source = doc->GetDocumentCharacterSetSource();
|
||||
*aIsForced = source == kCharsetFromInitialUserForcedAutoDetection ||
|
||||
source == kCharsetFromFinalUserForcedAutoDetection;
|
||||
}
|
||||
*aIsForced =
|
||||
doc && doc->GetDocumentCharacterSetSource() >= kCharsetFromUserForced;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -206,7 +206,7 @@ void nsHTMLDocument::TryUserForcedCharset(nsIContentViewer* aCv,
|
|||
nsIDocShell* aDocShell,
|
||||
int32_t& aCharsetSource,
|
||||
NotNull<const Encoding*>& aEncoding) {
|
||||
if (aCharsetSource >= kCharsetFromXmlDeclarationUtf16) {
|
||||
if (kCharsetFromUserForced <= aCharsetSource) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -215,11 +215,20 @@ void nsHTMLDocument::TryUserForcedCharset(nsIContentViewer* aCv,
|
|||
return;
|
||||
}
|
||||
|
||||
if (aDocShell && nsDocShell::Cast(aDocShell)->GetForcedAutodetection()) {
|
||||
if (aDocShell) {
|
||||
// This is the Character Encoding menu code path in Firefox
|
||||
aEncoding = WINDOWS_1252_ENCODING;
|
||||
aCharsetSource = kCharsetFromPendingUserForcedAutoDetection;
|
||||
nsDocShell::Cast(aDocShell)->ResetForcedAutodetection();
|
||||
auto encoding = nsDocShell::Cast(aDocShell)->GetForcedCharset();
|
||||
|
||||
if (encoding) {
|
||||
if (!IsAsciiCompatible(encoding)) {
|
||||
return;
|
||||
}
|
||||
aEncoding = WrapNotNull(encoding);
|
||||
aCharsetSource = nsDocShell::Cast(aDocShell)->GetForcedAutodetection()
|
||||
? kCharsetFromPendingUserForcedAutoDetection
|
||||
: kCharsetFromUserForced;
|
||||
aDocShell->SetCharset(""_ns);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -229,7 +238,7 @@ void nsHTMLDocument::TryParentCharset(nsIDocShell* aDocShell,
|
|||
if (!aDocShell) {
|
||||
return;
|
||||
}
|
||||
if (aCharsetSource >= kCharsetFromXmlDeclarationUtf16) {
|
||||
if (aCharsetSource >= kCharsetFromUserForced) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -241,7 +250,9 @@ void nsHTMLDocument::TryParentCharset(nsIDocShell* aDocShell,
|
|||
if (!parentCharset) {
|
||||
return;
|
||||
}
|
||||
if (kCharsetFromPendingUserForcedAutoDetection == parentSource ||
|
||||
if (kCharsetFromUserForced == parentSource ||
|
||||
kCharsetFromUserForcedJapaneseAutoDetection == parentSource ||
|
||||
kCharsetFromPendingUserForcedAutoDetection == parentSource ||
|
||||
kCharsetFromInitialUserForcedAutoDetection == parentSource ||
|
||||
kCharsetFromFinalUserForcedAutoDetection == parentSource) {
|
||||
if (WillIgnoreCharsetOverride() ||
|
||||
|
@ -250,7 +261,11 @@ void nsHTMLDocument::TryParentCharset(nsIDocShell* aDocShell,
|
|||
return;
|
||||
}
|
||||
aEncoding = WrapNotNull(parentCharset);
|
||||
aCharsetSource = kCharsetFromPendingUserForcedAutoDetection;
|
||||
aCharsetSource =
|
||||
(kCharsetFromUserForced == parentSource ||
|
||||
kCharsetFromUserForcedJapaneseAutoDetection == parentSource)
|
||||
? kCharsetFromUserForced
|
||||
: kCharsetFromPendingUserForcedAutoDetection;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -678,9 +693,11 @@ bool nsHTMLDocument::WillIgnoreCharsetOverride() {
|
|||
switch (mCharacterSetSource) {
|
||||
case kCharsetUninitialized:
|
||||
case kCharsetFromFallback:
|
||||
case kCharsetFromTopLevelDomain:
|
||||
case kCharsetFromDocTypeDefault:
|
||||
case kCharsetFromInitialAutoDetectionWouldHaveBeenUTF8:
|
||||
case kCharsetFromInitialAutoDetectionWouldNotHaveBeenUTF8DependedOnTLD:
|
||||
case kCharsetFromFinalJapaneseAutoDetection:
|
||||
case kCharsetFromFinalAutoDetectionWouldHaveBeenUTF8:
|
||||
case kCharsetFromFinalAutoDetectionWouldNotHaveBeenUTF8DependedOnTLD:
|
||||
case kCharsetFromParentFrame:
|
||||
|
@ -688,6 +705,8 @@ bool nsHTMLDocument::WillIgnoreCharsetOverride() {
|
|||
case kCharsetFromMetaPrescan:
|
||||
case kCharsetFromMetaTag:
|
||||
case kCharsetFromChannel:
|
||||
case kCharsetFromUserForced:
|
||||
case kCharsetFromUserForcedJapaneseAutoDetection:
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -176,6 +176,9 @@ class nsHTMLDocument : public mozilla::dom::Document {
|
|||
void TryUserForcedCharset(nsIContentViewer* aCv, nsIDocShell* aDocShell,
|
||||
int32_t& aCharsetSource,
|
||||
NotNull<const Encoding*>& aEncoding);
|
||||
static void TryCacheCharset(nsICachingChannel* aCachingChannel,
|
||||
int32_t& aCharsetSource,
|
||||
NotNull<const Encoding*>& aEncoding);
|
||||
void TryParentCharset(nsIDocShell* aDocShell, int32_t& charsetSource,
|
||||
NotNull<const Encoding*>& aEncoding);
|
||||
|
||||
|
|
|
@ -67,6 +67,12 @@ interface nsIBrowser : nsISupports
|
|||
*/
|
||||
attribute boolean mayEnableCharacterEncodingMenu;
|
||||
|
||||
/**
|
||||
* Whether or not the character encoding was detected by analyzing
|
||||
* content (as opposed to reading a protocol label).
|
||||
*/
|
||||
attribute boolean charsetAutodetected;
|
||||
|
||||
/**
|
||||
* Called by Gecko to update the browser when its state changes.
|
||||
*
|
||||
|
@ -96,6 +102,8 @@ interface nsIBrowser : nsISupports
|
|||
* @param aCharset the character set of the document
|
||||
* @param aMayEnableCharacterEncodingMenu whether or not the content encoding
|
||||
* menu may be enabled
|
||||
* @param aCharsetAutodetected whether or not the given character set was
|
||||
* autodetected
|
||||
* @param aDocumentURI the URI of the new document
|
||||
* @param aTitle the title of the new doucment
|
||||
* @param aContentPrincipal the security principal of the new document
|
||||
|
@ -112,6 +120,7 @@ interface nsIBrowser : nsISupports
|
|||
void updateForLocationChange(in nsIURI aLocation,
|
||||
in AString aCharset,
|
||||
in boolean aMayEnableCharacterEncodingMenu,
|
||||
in boolean aCharsetAutodetected,
|
||||
in nsIURI aDocumentURI,
|
||||
in AString aTitle,
|
||||
in nsIPrincipal aContentPrincipal,
|
||||
|
|
|
@ -3602,6 +3602,7 @@ NS_IMETHODIMP BrowserChild::OnStateChange(nsIWebProgress* aWebProgress,
|
|||
stateChangeData->isNavigating() = docShell->GetIsNavigating();
|
||||
stateChangeData->mayEnableCharacterEncodingMenu() =
|
||||
docShell->GetMayEnableCharacterEncodingMenu();
|
||||
stateChangeData->charsetAutodetected() = docShell->GetCharsetAutodetected();
|
||||
|
||||
RefPtr<Document> document = browsingContext->GetExtantDocument();
|
||||
if (document && aStateFlags & nsIWebProgressListener::STATE_STOP) {
|
||||
|
@ -3700,6 +3701,8 @@ NS_IMETHODIMP BrowserChild::OnLocationChange(nsIWebProgress* aWebProgress,
|
|||
|
||||
locationChangeData->mayEnableCharacterEncodingMenu() =
|
||||
docShell->GetMayEnableCharacterEncodingMenu();
|
||||
locationChangeData->charsetAutodetected() =
|
||||
docShell->GetCharsetAutodetected();
|
||||
|
||||
locationChangeData->contentPrincipal() = document->NodePrincipal();
|
||||
locationChangeData->contentPartitionedPrincipal() =
|
||||
|
|
|
@ -2713,6 +2713,8 @@ mozilla::ipc::IPCResult BrowserParent::RecvOnStateChange(
|
|||
Unused << browser->SetIsNavigating(aStateChangeData->isNavigating());
|
||||
Unused << browser->SetMayEnableCharacterEncodingMenu(
|
||||
aStateChangeData->mayEnableCharacterEncodingMenu());
|
||||
Unused << browser->SetCharsetAutodetected(
|
||||
aStateChangeData->charsetAutodetected());
|
||||
Unused << browser->UpdateForStateChange(aStateChangeData->charset(),
|
||||
aStateChangeData->documentURI(),
|
||||
aStateChangeData->contentType());
|
||||
|
@ -2781,6 +2783,7 @@ mozilla::ipc::IPCResult BrowserParent::RecvOnLocationChange(
|
|||
Unused << browser->UpdateForLocationChange(
|
||||
aLocation, aLocationChangeData->charset(),
|
||||
aLocationChangeData->mayEnableCharacterEncodingMenu(),
|
||||
aLocationChangeData->charsetAutodetected(),
|
||||
aLocationChangeData->documentURI(), aLocationChangeData->title(),
|
||||
aLocationChangeData->contentPrincipal(),
|
||||
aLocationChangeData->contentPartitionedPrincipal(),
|
||||
|
|
|
@ -130,6 +130,7 @@ struct WebProgressStateChangeData
|
|||
{
|
||||
bool isNavigating;
|
||||
bool mayEnableCharacterEncodingMenu;
|
||||
bool charsetAutodetected;
|
||||
|
||||
// The following fields are only set when the aStateFlags param passed with
|
||||
// this struct is |nsIWebProgress.STATE_STOP|.
|
||||
|
@ -143,6 +144,7 @@ struct WebProgressLocationChangeData
|
|||
bool isNavigating;
|
||||
bool isSyntheticDocument;
|
||||
bool mayEnableCharacterEncodingMenu;
|
||||
bool charsetAutodetected;
|
||||
nsString contentType;
|
||||
nsString title;
|
||||
nsString charset;
|
||||
|
|
|
@ -0,0 +1,124 @@
|
|||
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
// Mostly copied and pasted from
|
||||
// third_party/rust/shift_or_euc/src/lib.rs , so
|
||||
// "top-level directory of this distribution" above refers to
|
||||
// third_party/rust/shift_or_euc/
|
||||
|
||||
#ifndef mozilla_JapaneseDetector_h
|
||||
#define mozilla_JapaneseDetector_h
|
||||
|
||||
#include "mozilla/Encoding.h"
|
||||
|
||||
namespace mozilla {
|
||||
class JapaneseDetector;
|
||||
}; // namespace mozilla
|
||||
|
||||
#define SHIFT_OR_EUC_DETECTOR mozilla::JapaneseDetector
|
||||
|
||||
#include "shift_or_euc.h"
|
||||
|
||||
namespace mozilla {
|
||||
|
||||
/**
|
||||
* A Japanese legacy encoding detector for detecting between Shift_JIS,
|
||||
* EUC-JP, and, optionally, ISO-2022-JP _given_ the assumption that the
|
||||
* encoding is one of those.
|
||||
*
|
||||
* # Principle of Operation
|
||||
*
|
||||
* The detector is based on two observations:
|
||||
*
|
||||
* 1. The ISO-2022-JP escape sequences don't normally occur in Shift_JIS or
|
||||
* EUC-JP, so encountering such an escape sequence (before non-ASCII has been
|
||||
* encountered) can be taken as indication of ISO-2022-JP.
|
||||
* 2. When normal (full-with) kana or common kanji encoded as Shift_JIS is
|
||||
* decoded as EUC-JP, or vice versa, the result is either an error or
|
||||
* half-width katakana, and it's very uncommon for Japanese HTML to have
|
||||
* half-width katakana character before a normal kana or common kanji
|
||||
* character. Therefore, if decoding as Shift_JIS results in error or
|
||||
* have-width katakana, the detector decides that the content is EUC-JP, and
|
||||
* vice versa.
|
||||
*
|
||||
* # Failure Modes
|
||||
*
|
||||
* The detector gives the wrong answer if the text has a half-width katakana
|
||||
* character before normal kana or common kanji. Some uncommon kanji are
|
||||
* undecidable. (All JIS X 0208 Level 1 kanji are decidable.)
|
||||
*
|
||||
* The half-width katakana issue is mainly relevant for old 8-bit JIS X
|
||||
* 0201-only text files that would decode correctly as Shift_JIS but that the
|
||||
* detector detects as EUC-JP.
|
||||
*
|
||||
* The undecidable kanji issue does not realistically show up when a full
|
||||
* document is fed to the detector, because, realistically, in a full
|
||||
* document, there is at least one kana or common kanji. It can occur,
|
||||
* though, if the detector is only run on a prefix of a document and the
|
||||
* prefix only contains the title of the document. It is possible for
|
||||
* document title to consist entirely of undecidable kanji. (Indeed,
|
||||
* Japanese Wikipedia has articles with such titles.) If the detector is
|
||||
* undecided, a fallback to Shift_JIS should be used.
|
||||
*/
|
||||
class JapaneseDetector final {
|
||||
public:
|
||||
~JapaneseDetector() {}
|
||||
|
||||
static void operator delete(void* aDetector) {
|
||||
shift_or_euc_detector_free(reinterpret_cast<JapaneseDetector*>(aDetector));
|
||||
}
|
||||
|
||||
/**
|
||||
* Instantiates the detector. If `aAllow2022` is `true` the possible
|
||||
* guesses are Shift_JIS, EUC-JP, ISO-2022-JP, and undecided. If
|
||||
* `aAllow2022` is `false`, the possible guesses are Shift_JIS, EUC-JP,
|
||||
* and undecided.
|
||||
*/
|
||||
static inline UniquePtr<JapaneseDetector> Create(bool aAllow2022) {
|
||||
UniquePtr<JapaneseDetector> detector(shift_or_euc_detector_new(aAllow2022));
|
||||
return detector;
|
||||
}
|
||||
|
||||
/**
|
||||
* Feeds bytes to the detector. If `aLast` is `true` the end of the stream
|
||||
* is considered to occur immediately after the end of `aBuffer`.
|
||||
* Otherwise, the stream is expected to continue. `aBuffer` may be empty.
|
||||
*
|
||||
* If you're running the detector only on a prefix of a complete
|
||||
* document, _do not_ pass `aLast` as `true` after the prefix if the
|
||||
* stream as a whole still contains more content.
|
||||
*
|
||||
* Returns `SHIFT_JIS_ENCODING` if the detector guessed
|
||||
* Shift_JIS. Returns `EUC_JP_ENCODING` if the detector
|
||||
* guessed EUC-JP. Returns `ISO_2022_JP_ENCODING` if the
|
||||
* detector guessed ISO-2022-JP (only possible if `true` was passed as
|
||||
* `aAllow2022` when instantiating the detector). Returns `nullptr` if the
|
||||
* detector is undecided. If `nullptr` is returned even when passing `true`
|
||||
* as `aLast`, falling back to Shift_JIS is the best guess for Web
|
||||
* purposes.
|
||||
*
|
||||
* Do not call again after the method has returned non-`nullptr` or after
|
||||
* the method has been called with `true` as `aLast`. (Asserts if the
|
||||
* previous sentence isn't adhered to.)
|
||||
*/
|
||||
inline const mozilla::Encoding* Feed(Span<const uint8_t> aBuffer,
|
||||
bool aLast) {
|
||||
return shift_or_euc_detector_feed(this, aBuffer.Elements(),
|
||||
aBuffer.Length(), aLast);
|
||||
}
|
||||
|
||||
private:
|
||||
JapaneseDetector() = delete;
|
||||
JapaneseDetector(const JapaneseDetector&) = delete;
|
||||
JapaneseDetector& operator=(const JapaneseDetector&) = delete;
|
||||
};
|
||||
|
||||
}; // namespace mozilla
|
||||
|
||||
#endif // mozilla_JapaneseDetector_h
|
|
@ -27,12 +27,14 @@ DIRS += [
|
|||
EXPORTS.mozilla += [
|
||||
"Encoding.h",
|
||||
"EncodingDetector.h",
|
||||
"JapaneseDetector.h",
|
||||
]
|
||||
|
||||
EXPORTS += [
|
||||
"../third_party/rust/chardetng_c/include/chardetng.h",
|
||||
"../third_party/rust/encoding_c/include/encoding_rs.h",
|
||||
"../third_party/rust/encoding_c/include/encoding_rs_statics.h",
|
||||
"../third_party/rust/shift_or_euc_c/include/shift_or_euc.h",
|
||||
]
|
||||
|
||||
|
||||
|
|
|
@ -177,6 +177,10 @@ exclude-multi-locale = [
|
|||
reference = "toolkit/locales/en-US/toolkit/about/*Compat.ftl"
|
||||
l10n = "{l}toolkit/toolkit/about/*Compat.ftl"
|
||||
|
||||
[[paths]]
|
||||
reference = "toolkit/locales/en-US/chrome/global/charsetMenu.properties"
|
||||
l10n = "{l}toolkit/chrome/global/charsetMenu.properties"
|
||||
|
||||
[[paths]]
|
||||
reference = "toolkit/locales/en-US/chrome/global/commonDialogs.properties"
|
||||
l10n = "{l}toolkit/chrome/global/commonDialogs.properties"
|
||||
|
|
|
@ -5581,6 +5581,24 @@
|
|||
# Prefs starting with "intl."
|
||||
#---------------------------------------------------------------------------
|
||||
|
||||
# Whether the new encoding detector is enabled for the .jp TLD.
|
||||
- name: intl.charset.detector.ng.jp.enabled
|
||||
type: bool
|
||||
value: true
|
||||
mirror: always
|
||||
|
||||
# Whether the new encoding detector is enabled for the .in TLD.
|
||||
- name: intl.charset.detector.ng.in.enabled
|
||||
type: bool
|
||||
value: true
|
||||
mirror: always
|
||||
|
||||
# Whether the new encoding detector is enabled for the .lk TLD.
|
||||
- name: intl.charset.detector.ng.lk.enabled
|
||||
type: bool
|
||||
value: true
|
||||
mirror: always
|
||||
|
||||
# If true, dispatch the keydown and keyup events on any web apps even during
|
||||
# composition.
|
||||
- name: intl.ime.hack.on_any_apps.fire_key_events_for_composition
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "mozilla/DebugOnly.h"
|
||||
#include "mozilla/Encoding.h"
|
||||
#include "mozilla/EncodingDetector.h"
|
||||
#include "mozilla/JapaneseDetector.h"
|
||||
#include "mozilla/Likely.h"
|
||||
#include "mozilla/Maybe.h"
|
||||
#include "mozilla/SchedulerGroup.h"
|
||||
|
@ -306,6 +307,9 @@ int32_t nsHtml5StreamParser::MaybeRollBackSource(int32_t aSource) {
|
|||
}
|
||||
|
||||
void nsHtml5StreamParser::GuessEncoding(bool aEof, bool aInitial) {
|
||||
if (mJapaneseDetector) {
|
||||
return;
|
||||
}
|
||||
if (aInitial) {
|
||||
if (!mDetectorHasSeenNonAscii) {
|
||||
mDetectorHadOnlySeenAsciiWhenFirstGuessing = true;
|
||||
|
@ -316,6 +320,7 @@ void nsHtml5StreamParser::GuessEncoding(bool aEof, bool aInitial) {
|
|||
bool forced = (mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
|
||||
mCharsetSource == kCharsetFromInitialUserForcedAutoDetection);
|
||||
MOZ_ASSERT(
|
||||
mCharsetSource != kCharsetFromFinalJapaneseAutoDetection &&
|
||||
mCharsetSource != kCharsetFromFinalUserForcedAutoDetection &&
|
||||
mCharsetSource != kCharsetFromFinalAutoDetectionWouldHaveBeenUTF8 &&
|
||||
mCharsetSource !=
|
||||
|
@ -380,7 +385,8 @@ void nsHtml5StreamParser::GuessEncoding(bool aEof, bool aInitial) {
|
|||
mCharsetSource = MaybeRollBackSource(source);
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
} else {
|
||||
MOZ_ASSERT(mCharsetSource < kCharsetFromXmlDeclarationUtf16 || forced);
|
||||
MOZ_ASSERT(mCharsetSource < kCharsetFromFinalJapaneseAutoDetection ||
|
||||
forced);
|
||||
// We've already committed to a decoder. Request a reload from the
|
||||
// docshell.
|
||||
mTreeBuilder->NeedsCharsetSwitchTo(encoding, source, 0);
|
||||
|
@ -401,9 +407,44 @@ void nsHtml5StreamParser::GuessEncoding(bool aEof, bool aInitial) {
|
|||
}
|
||||
}
|
||||
|
||||
void nsHtml5StreamParser::FeedJapaneseDetector(Span<const uint8_t> aBuffer,
|
||||
bool aLast) {
|
||||
MOZ_ASSERT(!mDecodingLocalFileWithoutTokenizing);
|
||||
const Encoding* detected = mJapaneseDetector->Feed(aBuffer, aLast);
|
||||
if (!detected) {
|
||||
return;
|
||||
}
|
||||
DontGuessEncoding();
|
||||
int32_t source = kCharsetFromFinalJapaneseAutoDetection;
|
||||
if (mCharsetSource == kCharsetFromUserForced) {
|
||||
source = kCharsetFromUserForcedJapaneseAutoDetection;
|
||||
}
|
||||
if (detected == mEncoding) {
|
||||
MOZ_ASSERT(mCharsetSource < source, "Why are we running chardet at all?");
|
||||
mCharsetSource = source;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
} else if (HasDecoder()) {
|
||||
// We've already committed to a decoder. Request a reload from the
|
||||
// docshell.
|
||||
mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(detected), source, 0);
|
||||
FlushTreeOpsAndDisarmTimer();
|
||||
Interrupt();
|
||||
} else {
|
||||
// Got a confident answer from the sniffing buffer. That code will
|
||||
// take care of setting up the decoder.
|
||||
mEncoding = WrapNotNull(detected);
|
||||
mCharsetSource = source;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
}
|
||||
}
|
||||
|
||||
void nsHtml5StreamParser::FeedDetector(Span<const uint8_t> aBuffer,
|
||||
bool aLast) {
|
||||
mDetectorHasSeenNonAscii = mDetector->Feed(aBuffer, aLast);
|
||||
if (mJapaneseDetector) {
|
||||
FeedJapaneseDetector(aBuffer, aLast);
|
||||
} else {
|
||||
mDetectorHasSeenNonAscii = mDetector->Feed(aBuffer, aLast);
|
||||
}
|
||||
}
|
||||
|
||||
void nsHtml5StreamParser::SetViewSourceTitle(nsIURI* aURL) {
|
||||
|
@ -450,12 +491,13 @@ nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
|
|||
NS_ASSERTION(IsParserThread(), "Wrong thread!");
|
||||
nsresult rv = NS_OK;
|
||||
if (mDecodingLocalFileWithoutTokenizing &&
|
||||
mCharsetSource <= kCharsetFromFallback) {
|
||||
mCharsetSource <= kCharsetFromTopLevelDomain) {
|
||||
MOZ_ASSERT(mEncoding != UTF_8_ENCODING);
|
||||
mUnicodeDecoder = UTF_8_ENCODING->NewDecoderWithBOMRemoval();
|
||||
} else {
|
||||
if (mCharsetSource >= kCharsetFromFinalAutoDetectionWouldHaveBeenUTF8) {
|
||||
if (!(mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
|
||||
if (mCharsetSource >= kCharsetFromFinalJapaneseAutoDetection) {
|
||||
if (!(mCharsetSource == kCharsetFromUserForced ||
|
||||
mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
|
||||
mCharsetSource == kCharsetFromInitialUserForcedAutoDetection)) {
|
||||
DontGuessEncoding();
|
||||
}
|
||||
|
@ -639,7 +681,7 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
|
|||
uint32_t aCountToSniffingLimit,
|
||||
bool aEof) {
|
||||
MOZ_ASSERT(IsParserThread(), "Wrong thread!");
|
||||
MOZ_ASSERT(mCharsetSource < kCharsetFromXmlDeclarationUtf16,
|
||||
MOZ_ASSERT(mCharsetSource < kCharsetFromUserForcedJapaneseAutoDetection,
|
||||
"Should not finalize sniffing with strong decision already made.");
|
||||
if (mMode == VIEW_SOURCE_XML) {
|
||||
static const XML_Memory_Handling_Suite memsuite = {
|
||||
|
@ -702,9 +744,12 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
|
|||
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
|
||||
}
|
||||
bool forced = (mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
|
||||
mCharsetSource == kCharsetFromInitialUserForcedAutoDetection ||
|
||||
mCharsetSource == kCharsetFromFinalUserForcedAutoDetection);
|
||||
bool forced =
|
||||
(mCharsetSource == kCharsetFromUserForced ||
|
||||
mCharsetSource == kCharsetFromUserForcedJapaneseAutoDetection ||
|
||||
mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
|
||||
mCharsetSource == kCharsetFromInitialUserForcedAutoDetection ||
|
||||
mCharsetSource == kCharsetFromFinalUserForcedAutoDetection);
|
||||
if (!mChannelHadCharset &&
|
||||
(forced || mCharsetSource < kCharsetFromMetaPrescan) &&
|
||||
(mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA)) {
|
||||
|
@ -731,7 +776,17 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
|
|||
if (forced &&
|
||||
(encoding->IsAsciiCompatible() || encoding == ISO_2022_JP_ENCODING)) {
|
||||
// Honor override
|
||||
if (mCharsetSource == kCharsetFromFinalUserForcedAutoDetection) {
|
||||
if (mCharsetSource == kCharsetFromUserForced &&
|
||||
mEncoding->IsJapaneseLegacy()) {
|
||||
mFeedChardet = true;
|
||||
if (!mJapaneseDetector) {
|
||||
mJapaneseDetector = mozilla::JapaneseDetector::Create(true);
|
||||
}
|
||||
FinalizeSniffingWithDetector(aFromSegment, aCountToSniffingLimit,
|
||||
false);
|
||||
} else if (mCharsetSource ==
|
||||
kCharsetFromUserForcedJapaneseAutoDetection ||
|
||||
mCharsetSource == kCharsetFromFinalUserForcedAutoDetection) {
|
||||
DontGuessEncoding();
|
||||
} else {
|
||||
FinalizeSniffingWithDetector(aFromSegment, aCountToSniffingLimit,
|
||||
|
@ -753,7 +808,15 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
|
|||
}
|
||||
if (forced && mCharsetSource != kCharsetFromIrreversibleAutoDetection) {
|
||||
// neither meta nor XML declaration found, honor override
|
||||
if (mCharsetSource == kCharsetFromFinalUserForcedAutoDetection) {
|
||||
if (mCharsetSource == kCharsetFromUserForced &&
|
||||
mEncoding->IsJapaneseLegacy()) {
|
||||
mFeedChardet = true;
|
||||
if (!mJapaneseDetector) {
|
||||
mJapaneseDetector = mozilla::JapaneseDetector::Create(true);
|
||||
}
|
||||
FinalizeSniffingWithDetector(aFromSegment, aCountToSniffingLimit, false);
|
||||
} else if (mCharsetSource == kCharsetFromUserForcedJapaneseAutoDetection ||
|
||||
mCharsetSource == kCharsetFromFinalUserForcedAutoDetection) {
|
||||
DontGuessEncoding();
|
||||
} else {
|
||||
FinalizeSniffingWithDetector(aFromSegment, aCountToSniffingLimit, false);
|
||||
|
@ -958,7 +1021,9 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
|
|||
// this is the last buffer
|
||||
uint32_t countToSniffingLimit = SNIFFING_BUFFER_SIZE - mSniffingLength;
|
||||
bool forced =
|
||||
(mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
|
||||
(mCharsetSource == kCharsetFromUserForced ||
|
||||
mCharsetSource == kCharsetFromUserForcedJapaneseAutoDetection ||
|
||||
mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
|
||||
mCharsetSource == kCharsetFromInitialUserForcedAutoDetection ||
|
||||
mCharsetSource == kCharsetFromFinalUserForcedAutoDetection);
|
||||
if (!mChannelHadCharset && (mMode == NORMAL || mMode == VIEW_SOURCE_HTML ||
|
||||
|
@ -980,7 +1045,18 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
|
|||
if (forced && (encoding->IsAsciiCompatible() ||
|
||||
encoding == ISO_2022_JP_ENCODING)) {
|
||||
// Honor override
|
||||
if (mCharsetSource == kCharsetFromFinalUserForcedAutoDetection) {
|
||||
if (mCharsetSource == kCharsetFromUserForced &&
|
||||
mEncoding->IsJapaneseLegacy()) {
|
||||
mFeedChardet = true;
|
||||
if (!mJapaneseDetector) {
|
||||
mJapaneseDetector = mozilla::JapaneseDetector::Create(true);
|
||||
}
|
||||
FinalizeSniffingWithDetector(aFromSegment, countToSniffingLimit,
|
||||
false);
|
||||
} else if (mCharsetSource ==
|
||||
kCharsetFromUserForcedJapaneseAutoDetection ||
|
||||
mCharsetSource ==
|
||||
kCharsetFromFinalUserForcedAutoDetection) {
|
||||
DontGuessEncoding();
|
||||
} else {
|
||||
FinalizeSniffingWithDetector(aFromSegment, countToSniffingLimit,
|
||||
|
@ -1015,7 +1091,9 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
|
|||
}
|
||||
if (encoding) {
|
||||
// meta scan successful; honor overrides unless meta is XSS-dangerous
|
||||
if ((mCharsetSource == kCharsetFromFinalUserForcedAutoDetection) &&
|
||||
if ((mCharsetSource == kCharsetFromUserForced ||
|
||||
mCharsetSource == kCharsetFromUserForcedJapaneseAutoDetection ||
|
||||
mCharsetSource == kCharsetFromFinalUserForcedAutoDetection) &&
|
||||
(encoding->IsAsciiCompatible() || encoding == ISO_2022_JP_ENCODING)) {
|
||||
// Honor override
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
|
||||
|
@ -1215,7 +1293,7 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
|
|||
// let's instantiate only if we make it out of this method with the
|
||||
// intent to use it.
|
||||
auto detectorCreator = MakeScopeExit([&] {
|
||||
if (mFeedChardet) {
|
||||
if (mFeedChardet && !mJapaneseDetector) {
|
||||
mDetector = mozilla::EncodingDetector::Create();
|
||||
}
|
||||
});
|
||||
|
@ -1243,7 +1321,7 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
|
|||
nsresult rv = GetChannel(getter_AddRefs(channel));
|
||||
if (NS_SUCCEEDED(rv)) {
|
||||
isSrcdoc = NS_IsSrcdocChannel(channel);
|
||||
if (!isSrcdoc && mCharsetSource <= kCharsetFromFallback) {
|
||||
if (!isSrcdoc && mCharsetSource <= kCharsetFromTopLevelDomain) {
|
||||
nsCOMPtr<nsIURI> originalURI;
|
||||
rv = channel->GetOriginalURI(getter_AddRefs(originalURI));
|
||||
if (NS_SUCCEEDED(rv)) {
|
||||
|
@ -1379,8 +1457,44 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
|
|||
if (!(mCharsetSource == kCharsetFromPendingUserForcedAutoDetection ||
|
||||
mCharsetSource == kCharsetFromInitialUserForcedAutoDetection ||
|
||||
mCharsetSource == kCharsetFromFinalUserForcedAutoDetection)) {
|
||||
if (mCharsetSource >= kCharsetFromFinalAutoDetectionWouldHaveBeenUTF8) {
|
||||
DontGuessEncoding();
|
||||
if (mCharsetSource >= kCharsetFromFinalJapaneseAutoDetection) {
|
||||
if ((mCharsetSource == kCharsetFromUserForced) &&
|
||||
mEncoding->IsJapaneseLegacy()) {
|
||||
// Japanese detector only
|
||||
if (!mJapaneseDetector) {
|
||||
mJapaneseDetector = mozilla::JapaneseDetector::Create(true);
|
||||
}
|
||||
mGuessEncoding = false;
|
||||
} else {
|
||||
DontGuessEncoding();
|
||||
}
|
||||
}
|
||||
|
||||
// Compute various pref-based special cases
|
||||
if (!mDecodingLocalFileWithoutTokenizing && mFeedChardet) {
|
||||
if (mTLD.EqualsLiteral("jp")) {
|
||||
if (!mJapaneseDetector &&
|
||||
!StaticPrefs::intl_charset_detector_ng_jp_enabled()) {
|
||||
mJapaneseDetector = mozilla::JapaneseDetector::Create(true);
|
||||
}
|
||||
if (mJapaneseDetector && mEncoding == WINDOWS_1252_ENCODING &&
|
||||
mCharsetSource <= kCharsetFromTopLevelDomain) {
|
||||
mCharsetSource = kCharsetFromTopLevelDomain;
|
||||
mEncoding = SHIFT_JIS_ENCODING;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
}
|
||||
} else if ((mTLD.EqualsLiteral("in") &&
|
||||
!StaticPrefs::intl_charset_detector_ng_in_enabled()) ||
|
||||
(mTLD.EqualsLiteral("lk") &&
|
||||
!StaticPrefs::intl_charset_detector_ng_lk_enabled())) {
|
||||
if (mEncoding == WINDOWS_1252_ENCODING &&
|
||||
mCharsetSource <= kCharsetFromTopLevelDomain) {
|
||||
// Avoid breaking font hacks that Chrome doesn't break.
|
||||
mCharsetSource = kCharsetFromTopLevelDomain;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
}
|
||||
DontGuessEncoding();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -45,6 +45,7 @@ class nsIURI;
|
|||
|
||||
namespace mozilla {
|
||||
class EncodingDetector;
|
||||
class JapaneseDetector;
|
||||
template <typename T>
|
||||
class Buffer;
|
||||
|
||||
|
@ -218,6 +219,11 @@ class nsHtml5StreamParser final : public nsISupports {
|
|||
|
||||
// Not from an external interface
|
||||
|
||||
/**
|
||||
* Pass a buffer to the JapaneseDetector.
|
||||
*/
|
||||
void FeedJapaneseDetector(mozilla::Span<const uint8_t> aBuffer, bool aLast);
|
||||
|
||||
/**
|
||||
* Pass a buffer to the Japanese or Cyrillic detector as appropriate.
|
||||
*/
|
||||
|
@ -652,6 +658,11 @@ class nsHtml5StreamParser final : public nsISupports {
|
|||
|
||||
nsCOMPtr<nsIRunnable> mLoadFlusher;
|
||||
|
||||
/**
|
||||
* The Japanese detector.
|
||||
*/
|
||||
mozilla::UniquePtr<mozilla::JapaneseDetector> mJapaneseDetector;
|
||||
|
||||
/**
|
||||
* The generict detector.
|
||||
*/
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
enum {
|
||||
kCharsetUninitialized,
|
||||
kCharsetFromFallback,
|
||||
kCharsetFromTopLevelDomain,
|
||||
kCharsetFromDocTypeDefault, // This and up confident for XHR
|
||||
// Start subdividing source for telementry purposes
|
||||
kCharsetFromInitialAutoDetectionASCII,
|
||||
|
@ -16,6 +17,7 @@ enum {
|
|||
kCharsetFromInitialAutoDetectionWouldNotHaveBeenUTF8Generic,
|
||||
kCharsetFromInitialAutoDetectionWouldNotHaveBeenUTF8Content,
|
||||
kCharsetFromInitialAutoDetectionWouldNotHaveBeenUTF8DependedOnTLD,
|
||||
kCharsetFromFinalJapaneseAutoDetection,
|
||||
// Deliberately no Final version of ASCII
|
||||
kCharsetFromFinalAutoDetectionWouldHaveBeenUTF8,
|
||||
kCharsetFromFinalAutoDetectionWouldNotHaveBeenUTF8Generic,
|
||||
|
@ -35,6 +37,8 @@ enum {
|
|||
// later
|
||||
kCharsetFromInitialUserForcedAutoDetection,
|
||||
kCharsetFromFinalUserForcedAutoDetection,
|
||||
kCharsetFromUserForced, // propagates to child frames
|
||||
kCharsetFromUserForcedJapaneseAutoDetection,
|
||||
kCharsetFromXmlDeclarationUtf16, // This one is overridden by
|
||||
// kCharsetFromChannel
|
||||
kCharsetFromIrreversibleAutoDetection, // This one is overridden by
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
{"files":{"CONTRIBUTING.md":"0e64fb3dd5a00e3fd528de6442de3f2ca851bd718c45cca0871aaf4eedac9ee1","COPYRIGHT":"3a7313aa2f19bf7095a2fd731c3d5e76f38d5e4640bd2a115d53032f24b2aa6c","Cargo.toml":"f9f41b76ecbe257a312ab09ed1208189b8dc9952d12d17a216fe2846d1d471c8","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"dac4dde23582d18b01701032860d8f8a1979fb2cf626060ca8de77e081a2a3d5","README.md":"b7148745a7ef59788e76fbe638d4b41c54dcaa1313a809f4630a020645f892a8","examples/detect.rs":"eb7239ccc802290ef24331db600ca1226198801dd86df86876b4b738ef4b8470","src/lib.rs":"f2a83db125d553af5c6fabae0487ef211aad62f2d93c4418dc510cbd425d472a"},"package":"f930dea4685b9803954b9d74cdc175c6d946a22f2eafe5aa2e9a58cdcae7da8c"}
|
|
@ -0,0 +1,38 @@
|
|||
If you send a pull request / patch, please observe the following.
|
||||
|
||||
## Licensing
|
||||
|
||||
Since this crate is dual-licensed,
|
||||
[section 5 of the Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0#contributions)
|
||||
is considered to apply in the sense of Contributions being automatically
|
||||
under the Apache License 2.0 or MIT dual license (see the `COPYRIGHT` file).
|
||||
That is, by the act of offering a Contribution, you place your Contribution
|
||||
under the Apache License 2.0 or MIT dual license stated in the `COPYRIGHT`
|
||||
file. Please do not contribute if you aren't willing or allowed to license your
|
||||
contributions in this manner.
|
||||
|
||||
You are encouraged to dedicate test code that you contribute to the Public
|
||||
Domain using the CC0 dedication. If you contribute test code that is not
|
||||
dedicated to the Public Domain, please be sure not to put it in a part of
|
||||
source code that the comments designate as being dedicated to the Public
|
||||
Domain.
|
||||
|
||||
## Copyright Notices
|
||||
|
||||
If you require the addition of your copyright notice, it's up to you to edit in
|
||||
your notice as part of your Contribution. Not adding a copyright notice is
|
||||
taken as a waiver of copyright notice.
|
||||
|
||||
## Compatibility with Stable Rust
|
||||
|
||||
Please ensure that your Contribution compiles with the latest stable-channel
|
||||
rustc.
|
||||
|
||||
## rustfmt
|
||||
|
||||
The `rustfmt` version used for this code is `rustfmt-nightly`. Please either
|
||||
use that version or avoid using `rustfmt` (so as not to reformat all the code).
|
||||
|
||||
## Unit tests
|
||||
|
||||
Please ensure that `cargo test` succeeds.
|
|
@ -0,0 +1,9 @@
|
|||
shift_or_euc is copyright 2018 Mozilla Foundation.
|
||||
|
||||
Licensed under the Apache License, Version 2.0
|
||||
<LICENSE-APACHE or
|
||||
https://www.apache.org/licenses/LICENSE-2.0> or the MIT
|
||||
license <LICENSE-MIT or https://opensource.org/licenses/MIT>,
|
||||
at your option. All files in the project carrying such
|
||||
notice may not be copied, modified, or distributed except
|
||||
according to those terms.
|
|
@ -0,0 +1,30 @@
|
|||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||
#
|
||||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g., crates.io) dependencies
|
||||
#
|
||||
# If you believe there's an error in this file please file an
|
||||
# issue against the rust-lang/cargo repository. If you're
|
||||
# editing this file be aware that the upstream Cargo.toml
|
||||
# will likely look very different (and much more reasonable)
|
||||
|
||||
[package]
|
||||
edition = "2018"
|
||||
name = "shift_or_euc"
|
||||
version = "0.1.0"
|
||||
authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
|
||||
description = "Detects among the Japanese legacy encodings"
|
||||
homepage = "https://docs.rs/shift_or_euc/"
|
||||
documentation = "https://docs.rs/shift_or_euc/"
|
||||
readme = "README.md"
|
||||
keywords = ["encoding", "web", "charset"]
|
||||
categories = ["text-processing", "encoding", "web-programming", "internationalization"]
|
||||
license = "MIT/Apache-2.0"
|
||||
repository = "https://github.com/hsivonen/shift_or_euc"
|
||||
[dependencies.encoding_rs]
|
||||
version = "0.8.17"
|
||||
|
||||
[dependencies.memchr]
|
||||
version = "2.2.0"
|
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -0,0 +1,25 @@
|
|||
Copyright (c) 2018 Mozilla Foundation
|
||||
|
||||
Permission is hereby granted, free of charge, to any
|
||||
person obtaining a copy of this software and associated
|
||||
documentation files (the "Software"), to deal in the
|
||||
Software without restriction, including without
|
||||
limitation the rights to use, copy, modify, merge,
|
||||
publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software
|
||||
is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice
|
||||
shall be included in all copies or substantial portions
|
||||
of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
||||
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
||||
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
|
@ -0,0 +1,73 @@
|
|||
# shift_or_euc
|
||||
|
||||
[![Apache 2 / MIT dual-licensed](https://img.shields.io/badge/license-Apache%202%20%2F%20MIT-blue.svg)](https://github.com/hsivonen/shift_or_euc/blob/master/COPYRIGHT)
|
||||
|
||||
A Japanese legacy encoding detector for detecting between Shift_JIS, EUC-JP,
|
||||
and, optionally, ISO-2022-JP _given_ the assumption that the encoding is one
|
||||
of those.
|
||||
|
||||
This detector is generally more accurate (but see below about the failure
|
||||
mode on half-width katakana) and decides much sooner than machine
|
||||
learning-based detectors. To decide EUC-JP, machine learning-based detectors
|
||||
try to gain confidence that the input looks like EUC-JP. To decide EUC-JP,
|
||||
this detector instead looks for two simple rule-based signs of the input not
|
||||
being Shift_JIS.
|
||||
|
||||
As a consequence of not containing machine learning tables, the binary size
|
||||
footprint that this crate adds on top of
|
||||
[`encoding_rs`](https://docs.rs/crate/encoding_rs) is tiny.
|
||||
|
||||
## Documentation
|
||||
|
||||
[API documentation on docs.rs](https://docs.rs/crate/shift_or_euc)
|
||||
|
||||
## Licensing
|
||||
|
||||
See the file named [COPYRIGHT](https://github.com/hsivonen/shift_or_euc/blob/master/COPYRIGHT).
|
||||
|
||||
## Sample Program Usage
|
||||
|
||||
1. [Install Rust](https://rustup.rs/)
|
||||
2. `git clone https://github.com/hsivonen/shift_or_euc`
|
||||
3. `cd shift_or_euc`
|
||||
4. `cargo run --example detect PATH_TO_FILE`
|
||||
|
||||
The program prints one of:
|
||||
|
||||
* Shift_JIS
|
||||
* EUC-JP
|
||||
* ISO-2022-JP
|
||||
* Undecided
|
||||
|
||||
## Principle of Operation
|
||||
|
||||
The detector is based on two observations:
|
||||
|
||||
1. The ISO-2022-JP escape sequences don't normally occur in Shift_JIS or
|
||||
EUC-JP, so encountering such an escape sequence (before non-ASCII has been
|
||||
encountered) can be taken as indication of ISO-2022-JP.
|
||||
2. When normal (full-with) kana or common kanji encoded as Shift_JIS is
|
||||
decoded as EUC-JP, or vice versa, the result is either an error or half-width
|
||||
katakana, and it's very uncommon for Japanese HTML to have half-width katakana
|
||||
character before a normal kana or common kanji character. Therefore, if
|
||||
decoding as Shift_JIS results in error or have-width katakana, the detector
|
||||
decides that the content is EUC-JP, and vice versa.
|
||||
|
||||
## Failure Modes
|
||||
|
||||
The detector gives the wrong answer if the text has a half-width katakana
|
||||
character before normal kana or common kanji. Some uncommon kanji are
|
||||
undecidable. (All JIS X 0208 Level 1 kanji are decidable.)
|
||||
|
||||
The half-width katakana issue is mainly relevant for old 8-bit JIS X 0201-only
|
||||
text files that would decode correctly as Shift_JIS but that the detector
|
||||
detects as EUC-JP.
|
||||
|
||||
The undecidable kanji issue does not realistically show up when a full
|
||||
document is fed to the detector, because, realistically, in a full document,
|
||||
there is at least one kana or common kanji. It can occur, though, if the
|
||||
detector is only run on a prefix of a document and the prefix only contains
|
||||
the title of the document. It is possible for document title to consist
|
||||
entirely of undecidable kanji. (Indeed, Japanese Wikipedia has articles with
|
||||
such titles.) If the detector is undecided, falling back to Shift_JIS is
|
||||
typically the Web oriented better guess.
|
|
@ -0,0 +1,56 @@
|
|||
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
|
||||
use shift_or_euc::Detector;
|
||||
|
||||
fn main() {
|
||||
let mut args = std::env::args_os();
|
||||
if args.next().is_none() {
|
||||
eprintln!("Error: Program name missing from arguments.");
|
||||
std::process::exit(-1);
|
||||
}
|
||||
if let Some(path) = args.next() {
|
||||
if args.next().is_some() {
|
||||
eprintln!("Error: Too many arguments.");
|
||||
std::process::exit(-3);
|
||||
}
|
||||
if let Ok(mut file) = File::open(path) {
|
||||
let mut buffer = [0u8; 4096];
|
||||
let mut detector = Detector::new(true);
|
||||
loop {
|
||||
if let Ok(num_read) = file.read(&mut buffer[..]) {
|
||||
let opt_enc = if num_read == 0 {
|
||||
detector.feed(b"", true)
|
||||
} else {
|
||||
detector.feed(&buffer[..num_read], false)
|
||||
};
|
||||
if let Some(encoding) = opt_enc {
|
||||
println!("{}", encoding.name());
|
||||
return;
|
||||
} else if num_read == 0 {
|
||||
println!("Undecided");
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
eprintln!("Error: Error reading file.");
|
||||
std::process::exit(-5);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
eprintln!("Error: Could not open file.");
|
||||
std::process::exit(-4);
|
||||
}
|
||||
} else {
|
||||
eprintln!("Error: One path argument needed.");
|
||||
std::process::exit(-2);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,278 @@
|
|||
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#![doc(html_root_url = "https://docs.rs/shift_or_euc/0.1.0")]
|
||||
|
||||
//! A Japanese legacy encoding detector for detecting between Shift_JIS,
|
||||
//! EUC-JP, and, optionally, ISO-2022-JP _given_ the assumption that the
|
||||
//! encoding is one of those.
|
||||
//!
|
||||
//! This detector is generally more accurate (but see below about the failure
|
||||
//! mode on half-width katakana) and decides much sooner than machine
|
||||
//! learning-based detectors. To decide EUC-JP, machine learning-based
|
||||
//! detectors try to gain confidence that the input looks like EUC-JP. To
|
||||
//! decide EUC-JP, this detector instead looks for two simple rule-based
|
||||
//! signs of the input not being Shift_JIS.
|
||||
//!
|
||||
//! As a consequence of not containing machine learning tables, the binary
|
||||
//! size footprint that this crate adds on top of
|
||||
//! [`encoding_rs`](https://docs.rs/crate/encoding_rs) is tiny.
|
||||
//!
|
||||
//! # Licensing
|
||||
//!
|
||||
//! See the file named [COPYRIGHT](https://github.com/hsivonen/shift_or_euc/blob/master/COPYRIGHT).
|
||||
//!
|
||||
//! # Principle of Operation
|
||||
//!
|
||||
//! The detector is based on two observations:
|
||||
//!
|
||||
//! 1. The ISO-2022-JP escape sequences don't normally occur in Shift_JIS or
|
||||
//! EUC-JP, so encountering such an escape sequence (before non-ASCII has been
|
||||
//! encountered) can be taken as indication of ISO-2022-JP.
|
||||
//! 2. When normal (full-with) kana or common kanji encoded as Shift_JIS is
|
||||
//! decoded as EUC-JP, or vice versa, the result is either an error or
|
||||
//! half-width katakana, and it's very uncommon for Japanese HTML to have
|
||||
//! half-width katakana character before a normal kana or common kanji
|
||||
//! character. Therefore, if decoding as Shift_JIS results in error or
|
||||
//! have-width katakana, the detector decides that the content is EUC-JP, and
|
||||
//! vice versa.
|
||||
//!
|
||||
//! # Failure Modes
|
||||
//!
|
||||
//! The detector gives the wrong answer if the text has a half-width katakana
|
||||
//! character before normal kana or common kanji. Some uncommon kanji are
|
||||
//! undecidable. (All JIS X 0208 Level 1 kanji are decidable.)
|
||||
//!
|
||||
//! The half-width katakana issue is mainly relevant for old 8-bit JIS X
|
||||
//! 0201-only text files that would decode correctly as Shift_JIS but that the
|
||||
//! detector detects as EUC-JP.
|
||||
//!
|
||||
//! The undecidable kanji issue does not realistically show up when a full
|
||||
//! document is fed to the detector, because, realistically, in a full
|
||||
//! document, there is at least one kana or common kanji. It can occur,
|
||||
//! though, if the detector is only run on a prefix of a document and the
|
||||
//! prefix only contains the title of the document. It is possible for
|
||||
//! document title to consist entirely of undecidable kanji. (Indeed,
|
||||
//! Japanese Wikipedia has articles with such titles.) If the detector is
|
||||
//! undecided, falling back to Shift_JIS is typically the Web oriented better
|
||||
//! guess.
|
||||
|
||||
use encoding_rs::Decoder;
|
||||
use encoding_rs::DecoderResult;
|
||||
use encoding_rs::Encoding;
|
||||
use encoding_rs::EUC_JP;
|
||||
use encoding_rs::ISO_2022_JP;
|
||||
use encoding_rs::SHIFT_JIS;
|
||||
|
||||
/// Returns the index of the first non-ASCII byte or the first
|
||||
/// 0x1B, whichever comes first, or the length of the buffer
|
||||
/// if neither is found.
|
||||
fn find_non_ascii_or_escape(buffer: &[u8]) -> usize {
|
||||
let ascii_up_to = Encoding::ascii_valid_up_to(buffer);
|
||||
if let Some(escape) = memchr::memchr(0x1B, &buffer[..ascii_up_to]) {
|
||||
escape
|
||||
} else {
|
||||
ascii_up_to
|
||||
}
|
||||
}
|
||||
|
||||
/// Feed decoder with one byte (if `last` is `false`) or EOF (if `last` is
|
||||
/// `true`). `byte` is ignored if `last` is `true`.
|
||||
/// Returns `true` if there was no rejection or `false` upon rejecting the
|
||||
/// encoding hypothesis represented by this decoder.
|
||||
#[inline(always)]
|
||||
fn feed_decoder(decoder: &mut Decoder, byte: u8, last: bool) -> bool {
|
||||
let mut output = [0u16; 1];
|
||||
let input = [byte];
|
||||
let (result, _read, written) = decoder.decode_to_utf16_without_replacement(
|
||||
if last { b"" } else { &input },
|
||||
&mut output,
|
||||
last,
|
||||
);
|
||||
match result {
|
||||
DecoderResult::InputEmpty => {
|
||||
if written == 1 {
|
||||
match output[0] {
|
||||
0xFF61...0xFF9F => {
|
||||
return false;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
DecoderResult::Malformed(_, _) => {
|
||||
return false;
|
||||
}
|
||||
DecoderResult::OutputFull => {
|
||||
unreachable!();
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// A detector for detecting the character encoding of input on the
|
||||
/// precondition that the encoding is a Japanese legacy encoding.
|
||||
pub struct Detector {
|
||||
shift_jis_decoder: Decoder,
|
||||
euc_jp_decoder: Decoder,
|
||||
second_byte_in_escape: u8,
|
||||
iso_2022_jp_disqualified: bool,
|
||||
escape_seen: bool,
|
||||
finished: bool,
|
||||
}
|
||||
|
||||
impl Detector {
|
||||
/// Instantiates the detector. If `allow_2022` is `true` the possible
|
||||
/// guesses are Shift_JIS, EUC-JP, ISO-2022-JP, and undecided. If
|
||||
/// `allow_2022` is `false`, the possible guesses are Shift_JIS, EUC-JP,
|
||||
/// and undecided.
|
||||
pub fn new(allow_2022: bool) -> Self {
|
||||
Detector {
|
||||
shift_jis_decoder: SHIFT_JIS.new_decoder_without_bom_handling(),
|
||||
euc_jp_decoder: EUC_JP.new_decoder_without_bom_handling(),
|
||||
second_byte_in_escape: 0,
|
||||
iso_2022_jp_disqualified: !allow_2022,
|
||||
escape_seen: false,
|
||||
finished: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Feeds bytes to the detector. If `last` is `true` the end of the stream
|
||||
/// is considered to occur immediately after the end of `buffer`.
|
||||
/// Otherwise, the stream is expected to continue. `buffer` may be empty.
|
||||
///
|
||||
/// If you're running the detector only on a prefix of a complete
|
||||
/// document, _do not_ pass `last` as `true` after the prefix if the
|
||||
/// stream as a whole still contains more content.
|
||||
///
|
||||
/// Returns `Some(encoding_rs::SHIFT_JIS)` if the detector guessed
|
||||
/// Shift_JIS. Returns `Some(encoding_rs::EUC_JP)` if the detector
|
||||
/// guessed EUC-JP. Returns `Some(encoding_rs::ISO_2022_JP)` if the
|
||||
/// detector guessed ISO-2022-JP (only possible if `true` was passed as
|
||||
/// `allow_2022` when instantiating the detector). Returns `None` if the
|
||||
/// detector is undecided. If `None` is returned even when passing `true`
|
||||
/// as `last`, falling back to Shift_JIS is the best guess for Web
|
||||
/// purposes.
|
||||
///
|
||||
/// Do not call again after the method has returned `Some(_)` or after
|
||||
/// the method has been called with `true` as `last`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If called after the method has returned `Some(_)` or after the method
|
||||
/// has been called with `true` as `last`.
|
||||
pub fn feed(&mut self, buffer: &[u8], last: bool) -> Option<&'static Encoding> {
|
||||
assert!(
|
||||
!self.finished,
|
||||
"Tried to used a detector that has finished."
|
||||
);
|
||||
self.finished = true; // Will change back to false unless we return early
|
||||
let mut i = 0;
|
||||
if !self.iso_2022_jp_disqualified {
|
||||
if !self.escape_seen {
|
||||
i = find_non_ascii_or_escape(buffer);
|
||||
}
|
||||
while i < buffer.len() {
|
||||
let byte = buffer[i];
|
||||
if byte > 0x7F {
|
||||
self.iso_2022_jp_disqualified = true;
|
||||
break;
|
||||
}
|
||||
if !self.escape_seen && byte == 0x1B {
|
||||
self.escape_seen = true;
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
if self.escape_seen && self.second_byte_in_escape == 0 {
|
||||
self.second_byte_in_escape = byte;
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
match (self.second_byte_in_escape, byte) {
|
||||
(0x28, 0x42) | (0x28, 0x4A) | (0x28, 0x49) | (0x24, 0x40) | (0x24, 0x42) => {
|
||||
return Some(ISO_2022_JP);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
if self.escape_seen {
|
||||
self.iso_2022_jp_disqualified = true;
|
||||
break;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
for &byte in &buffer[i..] {
|
||||
if !feed_decoder(&mut self.euc_jp_decoder, byte, false) {
|
||||
return Some(SHIFT_JIS);
|
||||
}
|
||||
if !feed_decoder(&mut self.shift_jis_decoder, byte, false) {
|
||||
return Some(EUC_JP);
|
||||
}
|
||||
}
|
||||
if last {
|
||||
if !feed_decoder(&mut self.euc_jp_decoder, 0, true) {
|
||||
return Some(SHIFT_JIS);
|
||||
}
|
||||
if !feed_decoder(&mut self.shift_jis_decoder, 0, true) {
|
||||
return Some(EUC_JP);
|
||||
}
|
||||
return None;
|
||||
}
|
||||
self.finished = false;
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
// Any copyright to the test code below this comment is dedicated to the
|
||||
// Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
#[test]
|
||||
fn test_iso_2022_jp() {
|
||||
let mut detector = Detector::new(true);
|
||||
assert_eq!(
|
||||
detector.feed(b"abc\x1B\x28\x42\xFF", true),
|
||||
Some(ISO_2022_JP)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_precedence() {
|
||||
let mut detector = Detector::new(true);
|
||||
assert_eq!(detector.feed(b"abc\xFF", true), Some(SHIFT_JIS));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_euc_jp() {
|
||||
let mut detector = Detector::new(true);
|
||||
assert_eq!(detector.feed(b"abc\x81\x40", true), Some(SHIFT_JIS));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_shift_jis() {
|
||||
let mut detector = Detector::new(true);
|
||||
assert_eq!(detector.feed(b"abc\xEB\xA8", true), Some(EUC_JP));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_shift_jis_before_invalid_euc_jp() {
|
||||
let mut detector = Detector::new(true);
|
||||
assert_eq!(detector.feed(b"abc\xEB\xA8\x81\x40", true), Some(EUC_JP));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_undecided() {
|
||||
let mut detector = Detector::new(true);
|
||||
assert_eq!(detector.feed(b"abc", false), None);
|
||||
assert_eq!(detector.feed(b"abc", false), None);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
{"files":{"CONTRIBUTING.md":"0e64fb3dd5a00e3fd528de6442de3f2ca851bd718c45cca0871aaf4eedac9ee1","COPYRIGHT":"3a7313aa2f19bf7095a2fd731c3d5e76f38d5e4640bd2a115d53032f24b2aa6c","Cargo.toml":"342e5345f4fb433b89f397b07e4e7162376b30cbbc1d6f6ccb11523116e6ed6b","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"dac4dde23582d18b01701032860d8f8a1979fb2cf626060ca8de77e081a2a3d5","README.md":"a323f1f4537bc7b3f9b3b216c8ac5041b83aa0321f5349a52627aade947c6272","include/shift_or_euc.h":"47c3b9832cb7eb8995aa37dcc2e76be7d4f5c7b3fa6b43135e579831ab449cd8","src/lib.rs":"cab1898dd6724e0a0324a1e44f6348c107f13916da8873dba69c70dbc95ba9cd"},"package":"c81ec08c8a68c45c48d8ef58b80ce038cc9945891c4a4996761e2ec5cba05abc"}
|
|
@ -0,0 +1,38 @@
|
|||
If you send a pull request / patch, please observe the following.
|
||||
|
||||
## Licensing
|
||||
|
||||
Since this crate is dual-licensed,
|
||||
[section 5 of the Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0#contributions)
|
||||
is considered to apply in the sense of Contributions being automatically
|
||||
under the Apache License 2.0 or MIT dual license (see the `COPYRIGHT` file).
|
||||
That is, by the act of offering a Contribution, you place your Contribution
|
||||
under the Apache License 2.0 or MIT dual license stated in the `COPYRIGHT`
|
||||
file. Please do not contribute if you aren't willing or allowed to license your
|
||||
contributions in this manner.
|
||||
|
||||
You are encouraged to dedicate test code that you contribute to the Public
|
||||
Domain using the CC0 dedication. If you contribute test code that is not
|
||||
dedicated to the Public Domain, please be sure not to put it in a part of
|
||||
source code that the comments designate as being dedicated to the Public
|
||||
Domain.
|
||||
|
||||
## Copyright Notices
|
||||
|
||||
If you require the addition of your copyright notice, it's up to you to edit in
|
||||
your notice as part of your Contribution. Not adding a copyright notice is
|
||||
taken as a waiver of copyright notice.
|
||||
|
||||
## Compatibility with Stable Rust
|
||||
|
||||
Please ensure that your Contribution compiles with the latest stable-channel
|
||||
rustc.
|
||||
|
||||
## rustfmt
|
||||
|
||||
The `rustfmt` version used for this code is `rustfmt-nightly`. Please either
|
||||
use that version or avoid using `rustfmt` (so as not to reformat all the code).
|
||||
|
||||
## Unit tests
|
||||
|
||||
Please ensure that `cargo test` succeeds.
|
|
@ -0,0 +1,9 @@
|
|||
shift_or_euc is copyright 2018 Mozilla Foundation.
|
||||
|
||||
Licensed under the Apache License, Version 2.0
|
||||
<LICENSE-APACHE or
|
||||
https://www.apache.org/licenses/LICENSE-2.0> or the MIT
|
||||
license <LICENSE-MIT or https://opensource.org/licenses/MIT>,
|
||||
at your option. All files in the project carrying such
|
||||
notice may not be copied, modified, or distributed except
|
||||
according to those terms.
|
|
@ -0,0 +1,30 @@
|
|||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||
#
|
||||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g., crates.io) dependencies
|
||||
#
|
||||
# If you believe there's an error in this file please file an
|
||||
# issue against the rust-lang/cargo repository. If you're
|
||||
# editing this file be aware that the upstream Cargo.toml
|
||||
# will likely look very different (and much more reasonable)
|
||||
|
||||
[package]
|
||||
edition = "2018"
|
||||
name = "shift_or_euc_c"
|
||||
version = "0.1.0"
|
||||
authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
|
||||
description = "C API for shift_or_euc"
|
||||
homepage = "https://docs.rs/shift_or_euc_c/"
|
||||
documentation = "https://docs.rs/shift_or_euc_c/"
|
||||
readme = "README.md"
|
||||
keywords = ["encoding", "web", "charset"]
|
||||
categories = ["text-processing", "encoding", "web-programming", "internationalization"]
|
||||
license = "MIT/Apache-2.0"
|
||||
repository = "https://github.com/hsivonen/shift_or_euc_c"
|
||||
[dependencies.encoding_rs]
|
||||
version = "0.8.17"
|
||||
|
||||
[dependencies.shift_or_euc]
|
||||
version = "0.1.0"
|
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -0,0 +1,25 @@
|
|||
Copyright (c) 2018 Mozilla Foundation
|
||||
|
||||
Permission is hereby granted, free of charge, to any
|
||||
person obtaining a copy of this software and associated
|
||||
documentation files (the "Software"), to deal in the
|
||||
Software without restriction, including without
|
||||
limitation the rights to use, copy, modify, merge,
|
||||
publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software
|
||||
is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice
|
||||
shall be included in all copies or substantial portions
|
||||
of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
||||
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
||||
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
|
@ -0,0 +1,13 @@
|
|||
# shift_or_euc_c
|
||||
|
||||
[![Apache 2 / MIT dual-licensed](https://img.shields.io/badge/license-Apache%202%20%2F%20MIT-blue.svg)](https://github.com/hsivonen/shift_or_euc_c/blob/master/COPYRIGHT)
|
||||
|
||||
C API for [`shift_or_euc`](https://docs.rs/crate/shift_or_euc).
|
||||
|
||||
## Documentation
|
||||
|
||||
[API documentation on docs.rs](https://docs.rs/crate/shift_or_euc_c)
|
||||
|
||||
## Licensing
|
||||
|
||||
See the file named [COPYRIGHT](https://github.com/hsivonen/shift_or_euc_c/blob/master/COPYRIGHT).
|
|
@ -0,0 +1,88 @@
|
|||
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#ifndef shift_or_euc_h
|
||||
#define shift_or_euc_h
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include "encoding_rs.h"
|
||||
|
||||
#ifndef SHIFT_OR_EUC_DETECTOR
|
||||
#define SHIFT_OR_EUC_DETECTOR Detector
|
||||
#ifndef __cplusplus
|
||||
typedef struct Detector_ Detector;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/// Instantiates the detector. If `allow_2022` is `true` the possible
|
||||
/// guesses are Shift_JIS, EUC-JP, ISO-2022-JP, and undecided. If
|
||||
/// `allow_2022` is `false`, the possible guesses are Shift_JIS, EUC-JP,
|
||||
/// and undecided.
|
||||
///
|
||||
/// The instantiated detector must be freed after use using
|
||||
/// `shift_or_euc_detector_free`.
|
||||
SHIFT_OR_EUC_DETECTOR* shift_or_euc_detector_new(bool allow_2022);
|
||||
|
||||
/// Deallocates a detector obtained from `shift_or_euc_detector_new`.
|
||||
void shift_or_euc_detector_free(SHIFT_OR_EUC_DETECTOR* detector);
|
||||
|
||||
/// Feeds bytes to the detector. If `last` is `true` the end of the stream
|
||||
/// is considered to occur immediately after the end of `buffer`.
|
||||
/// Otherwise, the stream is expected to continue. `buffer_len` may be zero.
|
||||
/// `buffer` must not be `NULL` but may be undereferencable when
|
||||
/// `buffer_len` is zero.
|
||||
///
|
||||
/// If you're running the detector only on a prefix of a complete
|
||||
/// document, _do not_ pass `last` as `true` after the prefix if the
|
||||
/// stream as a whole still contains more content.
|
||||
///
|
||||
/// Returns `SHIFT_JIS_ENCODING` if the detector guessed
|
||||
/// Shift_JIS. Returns `EUC_JP_ENCODING` if the detector
|
||||
/// guessed EUC-JP. Returns `ISO_2022_JP_ENCODING` if the
|
||||
/// detector guessed ISO-2022-JP (only possible if `true` was passed as
|
||||
/// `allow_2022` when instantiating the detector). Returns `NULL` if the
|
||||
/// detector is undecided. If `NULL` is returned even when passing `true`
|
||||
/// as `last`, falling back to Shift_JIS is the best guess for Web
|
||||
/// purposes.
|
||||
///
|
||||
/// Do not call again after the function has returned non-`NULL` or after
|
||||
/// the function has been called with `true` as `last`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If called after the function has returned non-`NULL` or after the
|
||||
/// function has been called with `true` as `last`.
|
||||
///
|
||||
/// # Undefined Behavior
|
||||
///
|
||||
/// UB ensues if
|
||||
///
|
||||
/// * `detector` does not point to a detector obtained from
|
||||
/// `shift_or_euc_detector_new` but not yet freed with
|
||||
/// `shift_or_euc_detector_free`.
|
||||
/// * `buffer` is `NULL`.
|
||||
/// * `buffer` and `buffer_len` don't designate a range of memory
|
||||
/// valid for reading.
|
||||
ENCODING_RS_ENCODING const* shift_or_euc_detector_feed(
|
||||
SHIFT_OR_EUC_DETECTOR* detector,
|
||||
uint8_t const* buffer,
|
||||
size_t buffer_len,
|
||||
bool last
|
||||
);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // shift_or_euc_h
|
|
@ -0,0 +1,94 @@
|
|||
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#![doc(html_root_url = "https://docs.rs/shift_or_euc_c/0.1.0")]
|
||||
|
||||
//! C API for [`shift_or_euc`](https://docs.rs/shift_or_euc/)
|
||||
//!
|
||||
//! # Panics
|
||||
//!
|
||||
//! This crate is designed to be used only in a `panic=abort` scenario.
|
||||
//! Panic propagation across FFI is not handled!
|
||||
//!
|
||||
//! # Licensing
|
||||
//!
|
||||
//! See the file named [COPYRIGHT](https://github.com/hsivonen/shift_or_euc/blob/master/COPYRIGHT).
|
||||
|
||||
use encoding_rs::Encoding;
|
||||
use shift_or_euc::*;
|
||||
|
||||
/// Instantiates the detector. If `allow_2022` is `true` the possible
|
||||
/// guesses are Shift_JIS, EUC-JP, ISO-2022-JP, and undecided. If
|
||||
/// `allow_2022` is `false`, the possible guesses are Shift_JIS, EUC-JP,
|
||||
/// and undecided.
|
||||
///
|
||||
/// The instantiated detector must be freed after use using
|
||||
/// `shift_or_euc_detector_free`.
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn shift_or_euc_detector_new(allow_2022: bool) -> *mut Detector {
|
||||
Box::into_raw(Box::new(Detector::new(allow_2022)))
|
||||
}
|
||||
|
||||
/// Deallocates a detector obtained from `shift_or_euc_detector_new`.
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn shift_or_euc_detector_free(detector: *mut Detector) {
|
||||
let _ = Box::from_raw(detector);
|
||||
}
|
||||
|
||||
/// Feeds bytes to the detector. If `last` is `true` the end of the stream
|
||||
/// is considered to occur immediately after the end of `buffer`.
|
||||
/// Otherwise, the stream is expected to continue. `buffer_len` may be zero.
|
||||
/// `buffer` must not be `NULL` but may be undereferencable when
|
||||
/// `buffer_len` is zero.
|
||||
///
|
||||
/// If you're running the detector only on a prefix of a complete
|
||||
/// document, _do not_ pass `last` as `true` after the prefix if the
|
||||
/// stream as a whole still contains more content.
|
||||
///
|
||||
/// Returns `SHIFT_JIS_ENCODING` if the detector guessed
|
||||
/// Shift_JIS. Returns `EUC_JP_ENCODING` if the detector
|
||||
/// guessed EUC-JP. Returns `ISO_2022_JP_ENCODING` if the
|
||||
/// detector guessed ISO-2022-JP (only possible if `true` was passed as
|
||||
/// `allow_2022` when instantiating the detector). Returns `NULL` if the
|
||||
/// detector is undecided. If `NULL` is returned even when passing `true`
|
||||
/// as `last`, falling back to Shift_JIS is the best guess for Web
|
||||
/// purposes.
|
||||
///
|
||||
/// Do not call again after the function has returned non-`NULL` or after
|
||||
/// the function has been called with `true` as `last`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If called after the function has returned non-`NULL` or after the
|
||||
/// function has been called with `true` as `last`.
|
||||
///
|
||||
/// # Undefined Behavior
|
||||
///
|
||||
/// UB ensues if
|
||||
///
|
||||
/// * `detector` does not point to a detector obtained from
|
||||
/// `shift_or_euc_detector_new` but not yet freed with
|
||||
/// `shift_or_euc_detector_free`.
|
||||
/// * `buffer` is `NULL`.
|
||||
/// * `buffer` and `buffer_len` don't designate a range of memory
|
||||
/// valid for reading.
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn shift_or_euc_detector_feed(
|
||||
detector: *mut Detector,
|
||||
buffer: *const u8,
|
||||
buffer_len: usize,
|
||||
last: bool,
|
||||
) -> *const Encoding {
|
||||
if let Some(encoding) = (*detector).feed(::std::slice::from_raw_parts(buffer, buffer_len), last)
|
||||
{
|
||||
encoding
|
||||
} else {
|
||||
::std::ptr::null()
|
||||
}
|
||||
}
|
|
@ -52,19 +52,20 @@ class ViewSourceChild extends JSWindowActorChild {
|
|||
* loading.
|
||||
*/
|
||||
viewSource(URL, outerWindowID, lineNumber) {
|
||||
let otherDocShell;
|
||||
let forceEncodingDetection = false;
|
||||
let otherDocShell, forcedCharSet;
|
||||
|
||||
if (outerWindowID) {
|
||||
let contentWindow = Services.wm.getOuterWindowWithId(outerWindowID);
|
||||
if (contentWindow) {
|
||||
otherDocShell = contentWindow.docShell;
|
||||
|
||||
forceEncodingDetection = contentWindow.windowUtils.docCharsetIsForced;
|
||||
let utils = contentWindow.windowUtils;
|
||||
let doc = contentWindow.document;
|
||||
forcedCharSet = utils.docCharsetIsForced ? doc.characterSet : null;
|
||||
}
|
||||
}
|
||||
|
||||
this.loadSource(URL, otherDocShell, lineNumber, forceEncodingDetection);
|
||||
this.loadSource(URL, otherDocShell, lineNumber, forcedCharSet);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -104,14 +105,18 @@ class ViewSourceChild extends JSWindowActorChild {
|
|||
* @param lineNumber (optional)
|
||||
* The line number to focus as soon as the source has finished
|
||||
* loading.
|
||||
* @param forceEncodingDetection (optional)
|
||||
* Force autodetection of the character encoding.
|
||||
* @param forcedCharSet (optional)
|
||||
* The document character set to use instead of the default one.
|
||||
*/
|
||||
loadSource(URL, otherDocShell, lineNumber, forceEncodingDetection) {
|
||||
loadSource(URL, otherDocShell, lineNumber, forcedCharSet) {
|
||||
const viewSrcURL = "view-source:" + URL;
|
||||
|
||||
if (forceEncodingDetection) {
|
||||
this.docShell.forceEncodingDetection();
|
||||
if (forcedCharSet) {
|
||||
try {
|
||||
this.docShell.charset = forcedCharSet;
|
||||
} catch (e) {
|
||||
/* invalid charset */
|
||||
}
|
||||
}
|
||||
|
||||
ViewSourcePageChild.setInitialLineNumber(lineNumber);
|
||||
|
|
|
@ -255,6 +255,8 @@
|
|||
|
||||
this._mayEnableCharacterEncodingMenu = null;
|
||||
|
||||
this._charsetAutodetected = false;
|
||||
|
||||
this._contentPrincipal = null;
|
||||
|
||||
this._contentPartitionedPrincipal = null;
|
||||
|
@ -583,11 +585,17 @@
|
|||
: this.contentDocument.title;
|
||||
}
|
||||
|
||||
forceEncodingDetection() {
|
||||
set characterSet(val) {
|
||||
if (this.isRemoteBrowser) {
|
||||
this.sendMessageToActor("ForceEncodingDetection", {}, "BrowserTab");
|
||||
this.sendMessageToActor(
|
||||
"UpdateCharacterSet",
|
||||
{ value: val },
|
||||
"BrowserTab"
|
||||
);
|
||||
this._characterSet = val;
|
||||
} else {
|
||||
this.docShell.forceEncodingDetection();
|
||||
this.docShell.charset = val;
|
||||
this.docShell.gatherCharsetMenuTelemetry();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -607,6 +615,18 @@
|
|||
}
|
||||
}
|
||||
|
||||
get charsetAutodetected() {
|
||||
return this.isRemoteBrowser
|
||||
? this._charsetAutodetected
|
||||
: this.docShell.charsetAutodetected;
|
||||
}
|
||||
|
||||
set charsetAutodetected(aAutodetected) {
|
||||
if (this.isRemoteBrowser) {
|
||||
this._charsetAutodetected = aAutodetected;
|
||||
}
|
||||
}
|
||||
|
||||
get contentPrincipal() {
|
||||
return this.isRemoteBrowser
|
||||
? this._contentPrincipal
|
||||
|
@ -1127,6 +1147,7 @@
|
|||
aLocation,
|
||||
aCharset,
|
||||
aMayEnableCharacterEncodingMenu,
|
||||
aCharsetAutodetected,
|
||||
aDocumentURI,
|
||||
aTitle,
|
||||
aContentPrincipal,
|
||||
|
@ -1142,6 +1163,7 @@
|
|||
if (aCharset != null) {
|
||||
this._characterSet = aCharset;
|
||||
this._mayEnableCharacterEncodingMenu = aMayEnableCharacterEncodingMenu;
|
||||
this._charsetAutodetected = aCharsetAutodetected;
|
||||
}
|
||||
|
||||
if (aContentType != null) {
|
||||
|
@ -1556,6 +1578,7 @@
|
|||
"_documentContentType",
|
||||
"_characterSet",
|
||||
"_mayEnableCharacterEncodingMenu",
|
||||
"_charsetAutodetected",
|
||||
"_contentPrincipal",
|
||||
"_contentPartitionedPrincipal",
|
||||
"_isSyntheticDocument",
|
||||
|
|
|
@ -39,6 +39,7 @@ cert_storage = { path = "../../../../security/manager/ssl/cert_storage" }
|
|||
bitsdownload = { path = "../../../components/bitsdownload", optional = true }
|
||||
storage = { path = "../../../../storage/rust" }
|
||||
bookmark_sync = { path = "../../../components/places/bookmark_sync", optional = true }
|
||||
shift_or_euc_c = "0.1.0"
|
||||
chardetng_c = "0.1.1"
|
||||
audio_thread_priority = "0.23.4"
|
||||
mdns_service = { path="../../../../dom/media/webrtc/transport/mdns_service", optional = true }
|
||||
|
|
|
@ -45,6 +45,7 @@ extern crate processtools;
|
|||
#[cfg(feature = "gecko_profiler")]
|
||||
extern crate profiler_helper;
|
||||
extern crate rsdparsa_capi;
|
||||
extern crate shift_or_euc_c;
|
||||
extern crate static_prefs;
|
||||
extern crate storage;
|
||||
#[cfg(feature = "quantum_render")]
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
<!-- This Source Code Form is subject to the terms of the Mozilla Public
|
||||
- License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
- file, You can obtain one at http://mozilla.org/MPL/2.0/. -->
|
||||
|
||||
<!ENTITY charsetMenu2.label "Text Encoding">
|
|
@ -0,0 +1,114 @@
|
|||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
# LOCALIZATION NOTE: The property keys ending with ".key" are for access keys.
|
||||
# Localizations may add or delete properties where the property key ends with
|
||||
# ".key" as appropriate for the localization. The code that uses this data can
|
||||
# deal with the absence of an access key for an item.
|
||||
#
|
||||
# For gbk, gbk.bis and gbk.bis.key are used to trigger string changes in
|
||||
# localizations.
|
||||
#
|
||||
# In the en-US version of this file, access keys are given to the following:
|
||||
# * UTF-8
|
||||
# * All encodings that are the fallback encoding for some locale in Firefox
|
||||
# * All encodings that are the fallback encoding for some locale in IE
|
||||
# * All Japanese encodings
|
||||
#
|
||||
# For the items whose property key does not end in ".key" and whose value
|
||||
# includes "(" U+0028 LEFT PARENTHESIS, the "(" character is significant for
|
||||
# processing by CharsetMenu.jsm. If your localization does not use ASCII
|
||||
# parentheses where en-US does in this file, please file a bug to make
|
||||
# CharsetMenu.jsm also recognize the delimiter your localization uses.
|
||||
# (When this code was developed, all localizations appeared to use
|
||||
# U+0028 LEFT PARENTHESIS for this purpose.)
|
||||
|
||||
# Globally-relevant
|
||||
|
||||
_autodetect_all.key = m
|
||||
_autodetect_all = Automatic
|
||||
UTF-8.key = U
|
||||
UTF-8 = Unicode
|
||||
windows-1252.key = W
|
||||
windows-1252 = Western
|
||||
|
||||
# Arabic
|
||||
windows-1256.key = A
|
||||
windows-1256 = Arabic (Windows)
|
||||
ISO-8859-6 = Arabic (ISO)
|
||||
|
||||
# Baltic
|
||||
windows-1257.key = B
|
||||
windows-1257 = Baltic (Windows)
|
||||
ISO-8859-4 = Baltic (ISO)
|
||||
|
||||
# Central European
|
||||
windows-1250.key = E
|
||||
windows-1250 = Central European (Windows)
|
||||
ISO-8859-2.key = l
|
||||
ISO-8859-2 = Central European (ISO)
|
||||
|
||||
# Chinese, Simplified
|
||||
gbk.bis.key = S
|
||||
gbk.bis = Chinese, Simplified
|
||||
|
||||
# Chinese, Traditional
|
||||
Big5.key = T
|
||||
Big5 = Chinese, Traditional
|
||||
|
||||
# Cyrillic
|
||||
windows-1251.key = C
|
||||
windows-1251 = Cyrillic (Windows)
|
||||
ISO-8859-5 = Cyrillic (ISO)
|
||||
KOI8-R = Cyrillic (KOI8-R)
|
||||
KOI8-U = Cyrillic (KOI8-U)
|
||||
IBM866 = Cyrillic (DOS)
|
||||
|
||||
# UI string in anticipation of Cyrillic analog of bug 1543077;
|
||||
# deliberately not in use yet
|
||||
|
||||
# LOCALIZATION NOTE (Cyrillic.key): If taken into use, this string will appear
|
||||
# instead of the string for windows-1251.key, so the use of the same
|
||||
# accelerator is deliberate.
|
||||
Cyrillic.key = C
|
||||
# LOCALIZATION NOTE (Cyrillic): If taken into use, this string will appear
|
||||
# as a single item instead of the five items windows-1251, ISO-8859-5,
|
||||
# KOI8-R, KOI8-U, and IBM866, so this string does not need to make sense
|
||||
# together with those strings and should be translated the way those were
|
||||
# but omitting the part in parentheses.
|
||||
Cyrillic = Cyrillic
|
||||
|
||||
# Greek
|
||||
windows-1253.key = G
|
||||
windows-1253 = Greek (Windows)
|
||||
ISO-8859-7.key = O
|
||||
ISO-8859-7 = Greek (ISO)
|
||||
|
||||
# Hebrew
|
||||
windows-1255.key = H
|
||||
windows-1255 = Hebrew
|
||||
# LOCALIZATION NOTE (ISO-8859-8): The value for this item should begin with
|
||||
# the same word for Hebrew as the value for windows-1255 so that this item
|
||||
# sorts right after that one in the collation order for your locale.
|
||||
ISO-8859-8 = Hebrew, Visual
|
||||
|
||||
# Japanese (NOT AN ENCODING NAME)
|
||||
Japanese.key = J
|
||||
Japanese = Japanese
|
||||
|
||||
# Korean
|
||||
EUC-KR.key = K
|
||||
EUC-KR = Korean
|
||||
|
||||
# Thai
|
||||
windows-874.key = i
|
||||
windows-874 = Thai
|
||||
|
||||
# Turkish
|
||||
windows-1254.key = r
|
||||
windows-1254 = Turkish
|
||||
|
||||
# Vietnamese
|
||||
windows-1258.key = V
|
||||
windows-1258 = Vietnamese
|
|
@ -14,6 +14,8 @@
|
|||
locale/@AB_CD@/global/autocomplete.properties (%chrome/global/autocomplete.properties)
|
||||
locale/@AB_CD@/global/appPicker.dtd (%chrome/global/appPicker.dtd)
|
||||
locale/@AB_CD@/global/browser.properties (%chrome/global/browser.properties)
|
||||
locale/@AB_CD@/global/charsetMenu.dtd (%chrome/global/charsetMenu.dtd)
|
||||
locale/@AB_CD@/global/charsetMenu.properties (%chrome/global/charsetMenu.properties)
|
||||
locale/@AB_CD@/global/commonDialog.dtd (%chrome/global/commonDialog.dtd)
|
||||
locale/@AB_CD@/global/commonDialogs.properties (%chrome/global/commonDialogs.properties)
|
||||
locale/@AB_CD@/global/contentAreaCommands.properties (%chrome/global/contentAreaCommands.properties)
|
||||
|
|
|
@ -0,0 +1,223 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
* You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
var EXPORTED_SYMBOLS = ["CharsetMenu"];
|
||||
|
||||
const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm");
|
||||
const { XPCOMUtils } = ChromeUtils.import(
|
||||
"resource://gre/modules/XPCOMUtils.jsm"
|
||||
);
|
||||
XPCOMUtils.defineLazyGetter(this, "gBundle", function() {
|
||||
const kUrl = "chrome://global/locale/charsetMenu.properties";
|
||||
return Services.strings.createBundle(kUrl);
|
||||
});
|
||||
|
||||
ChromeUtils.defineModuleGetter(
|
||||
this,
|
||||
"Deprecated",
|
||||
"resource://gre/modules/Deprecated.jsm"
|
||||
);
|
||||
|
||||
/**
|
||||
* This set contains encodings that are in the Encoding Standard, except:
|
||||
* - Japanese encodings are represented by one autodetection item
|
||||
* - x-user-defined, which practically never makes sense as an end-user-chosen
|
||||
* override.
|
||||
* - Encodings that IE11 doesn't have in its corresponding menu.
|
||||
*/
|
||||
const kEncodings = new Set([
|
||||
// Globally relevant
|
||||
"_autodetect_all", // (NOT AN ENCODING NAME; using IE-consistent magic name)
|
||||
"UTF-8",
|
||||
"windows-1252",
|
||||
// Arabic
|
||||
"windows-1256",
|
||||
"ISO-8859-6",
|
||||
// Baltic
|
||||
"windows-1257",
|
||||
"ISO-8859-4",
|
||||
// "ISO-8859-13", // Hidden since not in menu in IE11
|
||||
// Central European
|
||||
"windows-1250",
|
||||
"ISO-8859-2",
|
||||
// Chinese, Simplified
|
||||
"GBK",
|
||||
// Chinese, Traditional
|
||||
"Big5",
|
||||
// Cyrillic
|
||||
"windows-1251",
|
||||
"ISO-8859-5",
|
||||
"KOI8-R",
|
||||
"KOI8-U",
|
||||
"IBM866", // Not in menu in Chromium. Maybe drop this?
|
||||
// "x-mac-cyrillic", // Not in menu in IE11 or Chromium.
|
||||
// Greek
|
||||
"windows-1253",
|
||||
"ISO-8859-7",
|
||||
// Hebrew
|
||||
"windows-1255",
|
||||
"ISO-8859-8",
|
||||
// Japanese (NOT AN ENCODING NAME)
|
||||
"Japanese",
|
||||
// Korean
|
||||
"EUC-KR",
|
||||
// Thai
|
||||
"windows-874",
|
||||
// Turkish
|
||||
"windows-1254",
|
||||
// Vietnamese
|
||||
"windows-1258",
|
||||
// Hiding rare European encodings that aren't in the menu in IE11 and would
|
||||
// make the menu messy by sorting all over the place
|
||||
// "ISO-8859-3",
|
||||
// "ISO-8859-10",
|
||||
// "ISO-8859-14",
|
||||
// "ISO-8859-15",
|
||||
// "ISO-8859-16",
|
||||
// "macintosh"
|
||||
]);
|
||||
|
||||
// Always at the start of the menu, in this order, followed by a separator.
|
||||
const kPinned = ["_autodetect_all", "UTF-8", "windows-1252"];
|
||||
|
||||
kPinned.forEach(x => kEncodings.delete(x));
|
||||
|
||||
function CharsetComparator(a, b) {
|
||||
// Normal sorting sorts the part in parenthesis in an order that
|
||||
// happens to make the less frequently-used items first.
|
||||
let titleA = a.label.replace(/\(.*/, "") + b.value;
|
||||
let titleB = b.label.replace(/\(.*/, "") + a.value;
|
||||
// Secondarily reverse sort by encoding name to sort "windows"
|
||||
return titleA.localeCompare(titleB) || b.value.localeCompare(a.value);
|
||||
}
|
||||
|
||||
var gCharsetInfoCache, gPinnedInfoCache;
|
||||
|
||||
var CharsetMenu = {
|
||||
build(parent, deprecatedShowAccessKeys = true) {
|
||||
if (!deprecatedShowAccessKeys) {
|
||||
Deprecated.warning(
|
||||
"CharsetMenu no longer supports building a menu with no access keys.",
|
||||
"https://bugzilla.mozilla.org/show_bug.cgi?id=1088710"
|
||||
);
|
||||
}
|
||||
function createDOMNode(doc, nodeInfo) {
|
||||
let node = doc.createXULElement("menuitem");
|
||||
node.setAttribute("type", "radio");
|
||||
node.setAttribute("name", nodeInfo.name + "Group");
|
||||
node.setAttribute(nodeInfo.name, nodeInfo.value);
|
||||
node.setAttribute("label", nodeInfo.label);
|
||||
if (nodeInfo.accesskey) {
|
||||
node.setAttribute("accesskey", nodeInfo.accesskey);
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
if (parent.hasChildNodes()) {
|
||||
// Charset menu already built
|
||||
return;
|
||||
}
|
||||
this._ensureDataReady();
|
||||
let doc = parent.ownerDocument;
|
||||
|
||||
gPinnedInfoCache.forEach(charsetInfo =>
|
||||
parent.appendChild(createDOMNode(doc, charsetInfo))
|
||||
);
|
||||
parent.appendChild(doc.createXULElement("menuseparator"));
|
||||
gCharsetInfoCache.forEach(charsetInfo =>
|
||||
parent.appendChild(createDOMNode(doc, charsetInfo))
|
||||
);
|
||||
},
|
||||
|
||||
getData() {
|
||||
this._ensureDataReady();
|
||||
return {
|
||||
pinnedCharsets: gPinnedInfoCache,
|
||||
otherCharsets: gCharsetInfoCache,
|
||||
};
|
||||
},
|
||||
|
||||
_ensureDataReady() {
|
||||
if (!gCharsetInfoCache) {
|
||||
gPinnedInfoCache = this.getCharsetInfo(kPinned, false);
|
||||
gCharsetInfoCache = this.getCharsetInfo(kEncodings);
|
||||
}
|
||||
},
|
||||
|
||||
getCharsetInfo(charsets, sort = true) {
|
||||
let list = Array.from(charsets, charset => ({
|
||||
label: this._getCharsetLabel(charset),
|
||||
accesskey: this._getCharsetAccessKey(charset),
|
||||
name: "charset",
|
||||
value: charset,
|
||||
}));
|
||||
|
||||
if (sort) {
|
||||
list.sort(CharsetComparator);
|
||||
}
|
||||
return list;
|
||||
},
|
||||
|
||||
_getCharsetLabel(charset) {
|
||||
if (charset == "GBK") {
|
||||
// Localization key has been revised
|
||||
charset = "gbk.bis";
|
||||
}
|
||||
try {
|
||||
return gBundle.GetStringFromName(charset);
|
||||
} catch (ex) {}
|
||||
return charset;
|
||||
},
|
||||
_getCharsetAccessKey(charset) {
|
||||
if (charset == "GBK") {
|
||||
// Localization key has been revised
|
||||
charset = "gbk.bis";
|
||||
}
|
||||
try {
|
||||
return gBundle.GetStringFromName(charset + ".key");
|
||||
} catch (ex) {}
|
||||
return "";
|
||||
},
|
||||
|
||||
/**
|
||||
* For substantially similar encodings, treat two encodings as the same
|
||||
* for the purpose of the check mark.
|
||||
*/
|
||||
foldCharset(charset, isAutodetected) {
|
||||
if (isAutodetected) {
|
||||
switch (charset) {
|
||||
case "Shift_JIS":
|
||||
case "EUC-JP":
|
||||
case "ISO-2022-JP":
|
||||
return "Japanese";
|
||||
default:
|
||||
// fall through
|
||||
}
|
||||
}
|
||||
switch (charset) {
|
||||
case "ISO-8859-8-I":
|
||||
return "windows-1255";
|
||||
|
||||
case "gb18030":
|
||||
return "GBK";
|
||||
|
||||
default:
|
||||
return charset;
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* This method is for comm-central callers only.
|
||||
*/
|
||||
update(parent, charset) {
|
||||
let menuitem = parent
|
||||
.getElementsByAttribute("charset", this.foldCharset(charset, false))
|
||||
.item(0);
|
||||
if (menuitem) {
|
||||
menuitem.setAttribute("checked", "true");
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
Object.freeze(CharsetMenu);
|
|
@ -48,6 +48,9 @@ with Files("tests/xpcshell/test_UpdateUtils*.js"):
|
|||
with Files("AsyncPrefs.jsm"):
|
||||
BUG_COMPONENT = ("Core", "Security: Process Sandboxing")
|
||||
|
||||
with Files("CharsetMenu.jsm"):
|
||||
BUG_COMPONENT = ("Firefox", "Toolbars and Customization")
|
||||
|
||||
with Files("Color.jsm"):
|
||||
BUG_COMPONENT = ("Toolkit", "Find Toolbar")
|
||||
|
||||
|
@ -157,6 +160,7 @@ EXTRA_JS_MODULES += [
|
|||
"BrowserUtils.jsm",
|
||||
"CanonicalJSON.jsm",
|
||||
"CertUtils.jsm",
|
||||
"CharsetMenu.jsm",
|
||||
"Color.jsm",
|
||||
"Console.jsm",
|
||||
"ContentDOMReference.jsm",
|
||||
|
|
Загрузка…
Ссылка в новой задаче