зеркало из https://github.com/mozilla/gecko-dev.git
Backed out 6 changesets (bug 1543077) for causing bc failures at docshell/test/browser/browser_bug1543077.js
Backed out changeset f593045cc48f (bug 1543077) Backed out changeset 25449ba8aceb (bug 1543077) Backed out changeset ccc438262e29 (bug 1543077) Backed out changeset 4573c25b1ce0 (bug 1543077) Backed out changeset 1cbaafb9373a (bug 1543077) Backed out changeset 1a0e7ced8e47 (bug 1543077) --HG-- extra : rebase_source : f04bf405303fe03776f0e70b03db076c0a41ae45
This commit is contained in:
Родитель
5519af913c
Коммит
1dd6cb6ee5
|
@ -1009,21 +1009,21 @@ name = "encoding_c"
|
|||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_glue"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"nserror 0.1.0",
|
||||
"nsstring 0.1.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.17"
|
||||
version = "0.8.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
@ -1299,7 +1299,6 @@ dependencies = [
|
|||
"profiler_helper 0.1.0",
|
||||
"rsdparsa_capi 0.1.0",
|
||||
"rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"shift_or_euc_c 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"storage 0.1.0",
|
||||
"webrender_bindings 0.1.0",
|
||||
"xpcom 0.1.0",
|
||||
|
@ -2018,7 +2017,7 @@ name = "nsstring"
|
|||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2774,24 +2773,6 @@ dependencies = [
|
|||
"opaque-debug 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shift_or_euc"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shift_or_euc_c"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"shift_or_euc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shlex"
|
||||
version = "0.1.1"
|
||||
|
@ -3824,7 +3805,7 @@ dependencies = [
|
|||
"checksum either 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18785c1ba806c258137c937e44ada9ee7e69a37e3c72077542cd2f069d78562a"
|
||||
"checksum ena 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "25b4e5febb25f08c49f1b07dc33a182729a6b21edfb562b5aef95f78e0dbe5bb"
|
||||
"checksum encoding_c 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "769ecb8b33323998e482b218c0d13cd64c267609023b4b7ec3ee740714c318ee"
|
||||
"checksum encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)" = "4155785c79f2f6701f185eb2e6b4caf0555ec03477cb4c70db67b465311620ed"
|
||||
"checksum encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)" = "0535f350c60aac0b87ccf28319abc749391e912192255b0c00a2c12c6917bd73"
|
||||
"checksum env_logger 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0561146661ae44c579e993456bc76d11ce1e0c7d745e57b2fa7146b6e49fa2ad"
|
||||
"checksum error-chain 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ff511d5dc435d703f4971bc399647c9bc38e20cb41452e3b9feb4765419ed3f3"
|
||||
"checksum euclid 0.19.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d1a7698bdda3d7444a79d33bdc96e8b518d44ea3ff101d8492a6ca1207b886ea"
|
||||
|
@ -3986,8 +3967,6 @@ dependencies = [
|
|||
"checksum serde_json 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)" = "44dd2cfde475037451fa99b7e5df77aa3cfd1536575fa8e7a538ab36dcde49ae"
|
||||
"checksum sha-1 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "23962131a91661d643c98940b20fcaffe62d776a823247be80a48fcb8b6fce68"
|
||||
"checksum sha2 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b4d8bfd0e469f417657573d8451fb33d16cfe0989359b93baf3a1ffc639543d"
|
||||
"checksum shift_or_euc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f930dea4685b9803954b9d74cdc175c6d946a22f2eafe5aa2e9a58cdcae7da8c"
|
||||
"checksum shift_or_euc_c 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c81ec08c8a68c45c48d8ef58b80ce038cc9945891c4a4996761e2ec5cba05abc"
|
||||
"checksum shlex 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2"
|
||||
"checksum siphasher 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2ffc669b726f2bc9a3bcff66e5e23b56ba6bf70e22a34c3d7b6d0b3450b65b84"
|
||||
"checksum slab 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "17b4fcaed89ab08ef143da37bc52adbcc04d4a69014f4c1208d6b51f0c47bc23"
|
||||
|
|
|
@ -6620,9 +6620,6 @@ function handleDroppedLink(event, urlOrLinks, nameOrTriggeringPrincipal, trigger
|
|||
|
||||
function BrowserSetForcedCharacterSet(aCharset) {
|
||||
if (aCharset) {
|
||||
if (aCharset == "Japanese") {
|
||||
aCharset = "Shift_JIS";
|
||||
}
|
||||
gBrowser.selectedBrowser.characterSet = aCharset;
|
||||
// Save the forced character-set
|
||||
PlacesUIUtils.setCharsetForPage(getWebNavigation().currentURI,
|
||||
|
@ -6637,8 +6634,7 @@ function BrowserCharsetReload() {
|
|||
}
|
||||
|
||||
function UpdateCurrentCharset(target) {
|
||||
let selectedCharset = CharsetMenu.foldCharset(gBrowser.selectedBrowser.characterSet,
|
||||
gBrowser.selectedBrowser.charsetAutodetected);
|
||||
let selectedCharset = CharsetMenu.foldCharset(gBrowser.selectedBrowser.characterSet);
|
||||
for (let menuItem of target.getElementsByTagName("menuitem")) {
|
||||
let isSelected = menuItem.getAttribute("charset") === selectedCharset;
|
||||
menuItem.setAttribute("checked", isSelected);
|
||||
|
|
|
@ -358,8 +358,7 @@ const CustomizableWidgets = [
|
|||
},
|
||||
updateCurrentCharset(aDocument) {
|
||||
let currentCharset = aDocument.defaultView.gBrowser.selectedBrowser.characterSet;
|
||||
let {charsetAutodetected} = aDocument.defaultView.gBrowser.selectedBrowser;
|
||||
currentCharset = CharsetMenu.foldCharset(currentCharset, charsetAutodetected);
|
||||
currentCharset = CharsetMenu.foldCharset(currentCharset);
|
||||
|
||||
let pinnedContainer = aDocument.getElementById("PanelUI-characterEncodingView-pinned");
|
||||
let charsetContainer = aDocument.getElementById("PanelUI-characterEncodingView-charsets");
|
||||
|
|
|
@ -1275,6 +1275,7 @@ nsDocShell::GatherCharsetMenuTelemetry() {
|
|||
case kCharsetFromDocTypeDefault:
|
||||
case kCharsetFromCache:
|
||||
case kCharsetFromParentFrame:
|
||||
case kCharsetFromHintPrevDoc:
|
||||
// Changing charset on an unlabeled doc.
|
||||
if (isFileURL) {
|
||||
Telemetry::AccumulateCategorical(
|
||||
|
@ -1890,26 +1891,6 @@ nsDocShell::GetMayEnableCharacterEncodingMenu(
|
|||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
nsDocShell::GetCharsetAutodetected(bool* aCharsetAutodetected) {
|
||||
*aCharsetAutodetected = false;
|
||||
if (!mContentViewer) {
|
||||
return NS_OK;
|
||||
}
|
||||
Document* doc = mContentViewer->GetDocument();
|
||||
if (!doc) {
|
||||
return NS_OK;
|
||||
}
|
||||
int32_t source = doc->GetDocumentCharacterSetSource();
|
||||
|
||||
if (source == kCharsetFromAutoDetection ||
|
||||
source == kCharsetFromUserForcedAutoDetection) {
|
||||
*aCharsetAutodetected = true;
|
||||
}
|
||||
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
nsDocShell::GetDocShellEnumerator(int32_t aItemType,
|
||||
DocShellEnumeratorDirection aDirection,
|
||||
|
@ -8384,11 +8365,11 @@ nsresult nsDocShell::SetupNewViewer(nsIContentViewer* aNewViewer) {
|
|||
|
||||
const Encoding* forceCharset = nullptr;
|
||||
const Encoding* hintCharset = nullptr;
|
||||
int32_t hintCharsetSource = kCharsetUninitialized;
|
||||
float textZoom = 1.0;
|
||||
float pageZoom = 1.0;
|
||||
float overrideDPPX = 1.0;
|
||||
bool styleDisabled = false;
|
||||
int32_t hintCharsetSource;
|
||||
float textZoom;
|
||||
float pageZoom;
|
||||
float overrideDPPX;
|
||||
bool styleDisabled;
|
||||
// |newMUDV| also serves as a flag to set the data from the above vars
|
||||
nsCOMPtr<nsIContentViewer> newCv;
|
||||
|
||||
|
@ -10200,8 +10181,6 @@ nsresult nsDocShell::DoURILoad(nsDocShellLoadState* aLoadState,
|
|||
MOZ_ASSERT(NS_SUCCEEDED(rv));
|
||||
}
|
||||
|
||||
Unused << rv; // Keep Coverity happy
|
||||
|
||||
nsCOMPtr<nsIWritablePropertyBag2> props(do_QueryInterface(channel));
|
||||
if (props) {
|
||||
// save true referrer for those who need it (e.g. xpinstall whitelisting)
|
||||
|
|
|
@ -878,11 +878,6 @@ interface nsIDocShell : nsIDocShellTreeItem
|
|||
*/
|
||||
[infallible] readonly attribute boolean mayEnableCharacterEncodingMenu;
|
||||
|
||||
/**
|
||||
* Indicates that the character encoding was autodetected.
|
||||
*/
|
||||
[infallible] readonly attribute boolean charsetAutodetected;
|
||||
|
||||
attribute nsIEditor editor;
|
||||
readonly attribute boolean editable; /* this docShell is editable */
|
||||
readonly attribute boolean hasEditingSession; /* this docShell has an editing session */
|
||||
|
|
|
@ -44,14 +44,6 @@ support-files =
|
|||
file_bug1415918_beforeunload_iframe_2.html
|
||||
file_bug1415918_beforeunload_iframe.html
|
||||
file_bug1415918_beforeunload.html
|
||||
file_bug1543077-1-child.html
|
||||
file_bug1543077-1.html
|
||||
file_bug1543077-2-child.html
|
||||
file_bug1543077-2.html
|
||||
file_bug1543077-3-child.html
|
||||
file_bug1543077-3.html
|
||||
file_bug1543077-4-child.html
|
||||
file_bug1543077-4.html
|
||||
file_multiple_pushState.html
|
||||
print_postdata.sjs
|
||||
test-form_sjis.html
|
||||
|
@ -68,10 +60,6 @@ support-files =
|
|||
onpageshow_message.html
|
||||
file_cross_process_csp_inheritance.html
|
||||
|
||||
[browser_bug1543077-1.js]
|
||||
[browser_bug1543077-2.js]
|
||||
[browser_bug1543077-3.js]
|
||||
[browser_bug1543077-4.js]
|
||||
[browser_bug1206879.js]
|
||||
[browser_bug1309900_crossProcessHistoryNavigation.js]
|
||||
[browser_bug1328501.js]
|
||||
|
|
|
@ -1,18 +0,0 @@
|
|||
function test() {
|
||||
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
|
||||
runCharsetTest(rootDir + "file_bug1543077-1.html", afterOpen, "Japanese", afterChangeCharset);
|
||||
}
|
||||
|
||||
function afterOpen() {
|
||||
is(content.document.documentElement.textContent.indexOf("\u00A4"), 131, "Parent doc should be windows-1252 initially");
|
||||
|
||||
is(content.frames[0].document.documentElement.textContent.indexOf("\u00A4"), 87, "Child doc should be windows-1252 initially");
|
||||
}
|
||||
|
||||
function afterChangeCharset() {
|
||||
is(content.document.documentElement.textContent.indexOf("\u3042"), 131, "Parent doc should decode as EUC-JP subsequently");
|
||||
is(content.frames[0].document.documentElement.textContent.indexOf("\u3042"), 87, "Child doc should decode as EUC-JP subsequently");
|
||||
|
||||
is(content.document.characterSet, "EUC-JP", "Parent doc should report EUC-JP subsequently");
|
||||
is(content.frames[0].document.characterSet, "EUC-JP", "Child doc should report EUC-JP subsequently");
|
||||
}
|
|
@ -1,18 +0,0 @@
|
|||
function test() {
|
||||
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
|
||||
runCharsetTest(rootDir + "file_bug1543077-2.html", afterOpen, "Japanese", afterChangeCharset);
|
||||
}
|
||||
|
||||
function afterOpen() {
|
||||
is(content.document.documentElement.textContent.indexOf("\u201A"), 134, "Parent doc should be windows-1252 initially");
|
||||
|
||||
is(content.frames[0].document.documentElement.textContent.indexOf("\u201A"), 90, "Child doc should be windows-1252 initially");
|
||||
}
|
||||
|
||||
function afterChangeCharset() {
|
||||
is(content.document.documentElement.textContent.indexOf("\u3042"), 134, "Parent doc should decode as Shift_JIS subsequently");
|
||||
is(content.frames[0].document.documentElement.textContent.indexOf("\u3042"), 90, "Child doc should decode as Shift_JIS subsequently");
|
||||
|
||||
is(content.document.characterSet, "Shift_JIS", "Parent doc should report Shift_JIS subsequently");
|
||||
is(content.frames[0].document.characterSet, "Shift_JIS", "Child doc should report Shift_JIS subsequently");
|
||||
}
|
|
@ -1,18 +0,0 @@
|
|||
function test() {
|
||||
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
|
||||
runCharsetTest(rootDir + "file_bug1543077-3.html", afterOpen, "Japanese", afterChangeCharset);
|
||||
}
|
||||
|
||||
function afterOpen() {
|
||||
is(content.document.documentElement.textContent.indexOf("\u001B"), 136, "Parent doc should be windows-1252 initially");
|
||||
|
||||
is(content.frames[0].document.documentElement.textContent.indexOf("\u001B"), 92, "Child doc should be windows-1252 initially");
|
||||
}
|
||||
|
||||
function afterChangeCharset() {
|
||||
is(content.document.documentElement.textContent.indexOf("\u3042"), 136, "Parent doc should decode as ISO-2022-JP subsequently");
|
||||
is(content.frames[0].document.documentElement.textContent.indexOf("\u3042"), 92, "Child doc should decode as ISO-2022-JP subsequently");
|
||||
|
||||
is(content.document.characterSet, "ISO-2022-JP", "Parent doc should report ISO-2022-JP subsequently");
|
||||
is(content.frames[0].document.characterSet, "ISO-2022-JP", "Child doc should report ISO-2022-JP subsequently");
|
||||
}
|
|
@ -1,18 +0,0 @@
|
|||
function test() {
|
||||
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
|
||||
runCharsetTest(rootDir + "file_bug1543077-4.html", afterOpen, "Japanese", afterChangeCharset);
|
||||
}
|
||||
|
||||
function afterOpen() {
|
||||
is(content.document.documentElement.textContent.indexOf("\u00A4"), 131, "Parent doc should be windows-1252 initially");
|
||||
|
||||
is(content.frames[0].document.documentElement.textContent.indexOf("\u201A"), 90, "Child doc should be windows-1252 initially");
|
||||
}
|
||||
|
||||
function afterChangeCharset() {
|
||||
is(content.document.documentElement.textContent.indexOf("\u3042"), 131, "Parent doc should decode as EUC-JP subsequently");
|
||||
is(content.frames[0].document.documentElement.textContent.indexOf("\u3042"), 90, "Child doc should decode as Shift_JIS subsequently");
|
||||
|
||||
is(content.document.characterSet, "EUC-JP", "Parent doc should report EUC-JP subsequently");
|
||||
is(content.frames[0].document.characterSet, "Shift_JIS", "Child doc should report Shift_JIS subsequently");
|
||||
}
|
|
@ -1,11 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport">
|
||||
<title>No encoding declaration in parent or child</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>Hiragana letter a if decoded as EUC-JP: <20><></p>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport">
|
||||
<title>No encoding declaration in parent or child</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>No encoding declaration in parent or child</h1>
|
||||
|
||||
<p>Hiragana letter a if decoded as EUC-JP: <20><></p>
|
||||
|
||||
<iframe src="file_bug1543077-1-child.html"></iframe>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport">
|
||||
<title>No encoding declaration in parent or child</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>Hiragana letter a if decoded as Shift_JIS: <20><></p>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport">
|
||||
<title>No encoding declaration in parent or child</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>No encoding declaration in parent or child</h1>
|
||||
|
||||
<p>Hiragana letter a if decoded as Shift_JIS: <20><></p>
|
||||
|
||||
<iframe src="file_bug1543077-2-child.html"></iframe>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport">
|
||||
<title>No encoding declaration in parent or child</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>Hiragana letter a if decoded as ISO-2022-JP: $B$"(B</p>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport">
|
||||
<title>No encoding declaration in parent or child</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>No encoding declaration in parent or child</h1>
|
||||
|
||||
<p>Hiragana letter a if decoded as ISO-2022-JP: $B$"(B</p>
|
||||
|
||||
<iframe src="file_bug1543077-3-child.html"></iframe>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport">
|
||||
<title>No encoding declaration in parent or child</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>Hiragana letter a if decoded as Shift_JIS: <20><></p>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport">
|
||||
<title>No encoding declaration in parent or child</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>No encoding declaration in parent or child</h1>
|
||||
|
||||
<p>Hiragana letter a if decoded as EUC-JP: <20><></p>
|
||||
|
||||
<iframe src="file_bug1543077-4-child.html"></iframe>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -93,12 +93,6 @@ interface nsIBrowser : nsISupports
|
|||
*/
|
||||
attribute boolean mayEnableCharacterEncodingMenu;
|
||||
|
||||
/**
|
||||
* Whether or not the character encoding was detected by analyzing
|
||||
* content (as opposed to reading a protocol label).
|
||||
*/
|
||||
attribute boolean charsetAutodetected;
|
||||
|
||||
/**
|
||||
* Called by Gecko to update the browser when its state changes.
|
||||
*
|
||||
|
|
|
@ -3504,8 +3504,6 @@ NS_IMETHODIMP BrowserChild::OnStateChange(nsIWebProgress* aWebProgress,
|
|||
stateChangeData->isNavigating() = docShell->GetIsNavigating();
|
||||
stateChangeData->mayEnableCharacterEncodingMenu() =
|
||||
docShell->GetMayEnableCharacterEncodingMenu();
|
||||
stateChangeData->charsetAutodetected() =
|
||||
docShell->GetCharsetAutodetected();
|
||||
|
||||
if (document && aStateFlags & nsIWebProgressListener::STATE_STOP) {
|
||||
document->GetContentType(stateChangeData->contentType());
|
||||
|
|
|
@ -2364,8 +2364,6 @@ mozilla::ipc::IPCResult BrowserParent::RecvOnStateChange(
|
|||
Unused << browser->SetIsNavigating(aStateChangeData->isNavigating());
|
||||
Unused << browser->SetMayEnableCharacterEncodingMenu(
|
||||
aStateChangeData->mayEnableCharacterEncodingMenu());
|
||||
Unused << browser->SetCharsetAutodetected(
|
||||
aStateChangeData->charsetAutodetected());
|
||||
Unused << browser->UpdateForStateChange(aStateChangeData->charset(),
|
||||
aStateChangeData->documentURI(),
|
||||
aStateChangeData->contentType());
|
||||
|
|
|
@ -122,7 +122,6 @@ struct WebProgressStateChangeData
|
|||
{
|
||||
bool isNavigating;
|
||||
bool mayEnableCharacterEncodingMenu;
|
||||
bool charsetAutodetected;
|
||||
|
||||
// The following fields are only set when the aStateFlags param passed with
|
||||
// this struct is |nsIWebProgress.STATE_STOP|.
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
DIRS += ['src']
|
||||
TEST_DIRS += ['tests']
|
||||
|
||||
with Files('**'):
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "CharDistribution.h"
|
||||
|
||||
#include "JISFreq.tab"
|
||||
#include "mozilla/ArrayUtils.h"
|
||||
|
||||
#define SURE_YES 0.99f
|
||||
#define SURE_NO 0.01f
|
||||
|
||||
// return confidence base on received data
|
||||
float CharDistributionAnalysis::GetConfidence(void) {
|
||||
// if we didn't receive any character in our consideration range, or the
|
||||
// number of frequent characters is below the minimum threshold, return
|
||||
// negative answer
|
||||
if (mTotalChars <= 0 || mFreqChars <= mDataThreshold) return SURE_NO;
|
||||
|
||||
if (mTotalChars != mFreqChars) {
|
||||
float r =
|
||||
mFreqChars / ((mTotalChars - mFreqChars) * mTypicalDistributionRatio);
|
||||
|
||||
if (r < SURE_YES) return r;
|
||||
}
|
||||
// normalize confidence, (we don't want to be 100% sure)
|
||||
return SURE_YES;
|
||||
}
|
||||
|
||||
SJISDistributionAnalysis::SJISDistributionAnalysis() {
|
||||
mCharToFreqOrder = JISCharToFreqOrder;
|
||||
mTableSize = mozilla::ArrayLength(JISCharToFreqOrder);
|
||||
mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO;
|
||||
}
|
||||
|
||||
EUCJPDistributionAnalysis::EUCJPDistributionAnalysis() {
|
||||
mCharToFreqOrder = JISCharToFreqOrder;
|
||||
mTableSize = mozilla::ArrayLength(JISCharToFreqOrder);
|
||||
mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO;
|
||||
}
|
|
@ -0,0 +1,201 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef CharDistribution_h__
|
||||
#define CharDistribution_h__
|
||||
|
||||
#include "nscore.h"
|
||||
|
||||
#define ENOUGH_DATA_THRESHOLD 1024
|
||||
|
||||
class CharDistributionAnalysis {
|
||||
public:
|
||||
CharDistributionAnalysis() { Reset(); }
|
||||
|
||||
// feed a block of data and do distribution analysis
|
||||
void HandleData(const char* aBuf, uint32_t aLen) {}
|
||||
|
||||
// Feed a character with known length
|
||||
void HandleOneChar(const char* aStr, uint32_t aCharLen) {
|
||||
int32_t order;
|
||||
|
||||
// we only care about 2-bytes character in our distribution analysis
|
||||
order = (aCharLen == 2) ? GetOrder(aStr) : -1;
|
||||
|
||||
if (order >= 0) {
|
||||
mTotalChars++;
|
||||
// order is valid
|
||||
if ((uint32_t)order < mTableSize) {
|
||||
if (512 > mCharToFreqOrder[order]) mFreqChars++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// return confidence base on existing data
|
||||
float GetConfidence(void);
|
||||
|
||||
// Reset analyser, clear any state
|
||||
void Reset() {
|
||||
mDone = false;
|
||||
mTotalChars = 0;
|
||||
mFreqChars = 0;
|
||||
mDataThreshold = 0;
|
||||
}
|
||||
|
||||
// It is not necessary to receive all data to draw conclusion. For charset
|
||||
// detection,
|
||||
// certain amount of data is enough
|
||||
bool GotEnoughData() { return mTotalChars > ENOUGH_DATA_THRESHOLD; }
|
||||
|
||||
protected:
|
||||
// we do not handle character base on its original encoding string, but
|
||||
// convert this encoding string to a number, here called order.
|
||||
// This allow multiple encoding of a language to share one frequency table
|
||||
virtual int32_t GetOrder(const char* str) { return -1; }
|
||||
|
||||
// If this flag is set to true, detection is done and conclusion has been made
|
||||
bool mDone;
|
||||
|
||||
// The number of characters whose frequency order is less than 512
|
||||
uint32_t mFreqChars;
|
||||
|
||||
// Total character encounted.
|
||||
uint32_t mTotalChars;
|
||||
|
||||
// Number of hi-byte characters needed to trigger detection
|
||||
uint32_t mDataThreshold;
|
||||
|
||||
// Mapping table to get frequency order from char order (get from GetOrder())
|
||||
const int16_t* mCharToFreqOrder;
|
||||
|
||||
// Size of above table
|
||||
uint32_t mTableSize;
|
||||
|
||||
// This is a constant value varies from language to language, it is used in
|
||||
// calculating confidence. See my paper for further detail.
|
||||
float mTypicalDistributionRatio;
|
||||
};
|
||||
|
||||
class EUCTWDistributionAnalysis : public CharDistributionAnalysis {
|
||||
public:
|
||||
EUCTWDistributionAnalysis();
|
||||
|
||||
protected:
|
||||
// for euc-TW encoding, we are interested
|
||||
// first byte range: 0xc4 -- 0xfe
|
||||
// second byte range: 0xa1 -- 0xfe
|
||||
// no validation needed here. State machine has done that
|
||||
int32_t GetOrder(const char* str) override {
|
||||
if ((unsigned char)*str >= (unsigned char)0xc4)
|
||||
return 94 * ((unsigned char)str[0] - (unsigned char)0xc4) +
|
||||
(unsigned char)str[1] - (unsigned char)0xa1;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
class EUCKRDistributionAnalysis : public CharDistributionAnalysis {
|
||||
public:
|
||||
EUCKRDistributionAnalysis();
|
||||
|
||||
protected:
|
||||
// for euc-KR encoding, we are interested
|
||||
// first byte range: 0xb0 -- 0xfe
|
||||
// second byte range: 0xa1 -- 0xfe
|
||||
// no validation needed here. State machine has done that
|
||||
int32_t GetOrder(const char* str) override {
|
||||
if ((unsigned char)*str >= (unsigned char)0xb0)
|
||||
return 94 * ((unsigned char)str[0] - (unsigned char)0xb0) +
|
||||
(unsigned char)str[1] - (unsigned char)0xa1;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
class GB2312DistributionAnalysis : public CharDistributionAnalysis {
|
||||
public:
|
||||
GB2312DistributionAnalysis();
|
||||
|
||||
protected:
|
||||
// for GB2312 encoding, we are interested
|
||||
// first byte range: 0xb0 -- 0xfe
|
||||
// second byte range: 0xa1 -- 0xfe
|
||||
// no validation needed here. State machine has done that
|
||||
int32_t GetOrder(const char* str) override {
|
||||
if ((unsigned char)*str >= (unsigned char)0xb0 &&
|
||||
(unsigned char)str[1] >= (unsigned char)0xa1)
|
||||
return 94 * ((unsigned char)str[0] - (unsigned char)0xb0) +
|
||||
(unsigned char)str[1] - (unsigned char)0xa1;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
class Big5DistributionAnalysis : public CharDistributionAnalysis {
|
||||
public:
|
||||
Big5DistributionAnalysis();
|
||||
|
||||
protected:
|
||||
// for big5 encoding, we are interested
|
||||
// first byte range: 0xa4 -- 0xfe
|
||||
// second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
|
||||
// no validation needed here. State machine has done that
|
||||
int32_t GetOrder(const char* str) override {
|
||||
if ((unsigned char)*str >= (unsigned char)0xa4)
|
||||
if ((unsigned char)str[1] >= (unsigned char)0xa1)
|
||||
return 157 * ((unsigned char)str[0] - (unsigned char)0xa4) +
|
||||
(unsigned char)str[1] - (unsigned char)0xa1 + 63;
|
||||
else
|
||||
return 157 * ((unsigned char)str[0] - (unsigned char)0xa4) +
|
||||
(unsigned char)str[1] - (unsigned char)0x40;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
class SJISDistributionAnalysis : public CharDistributionAnalysis {
|
||||
public:
|
||||
SJISDistributionAnalysis();
|
||||
|
||||
protected:
|
||||
// for sjis encoding, we are interested
|
||||
// first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
|
||||
// second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
|
||||
// no validation needed here. State machine has done that
|
||||
int32_t GetOrder(const char* str) override {
|
||||
int32_t order;
|
||||
if ((unsigned char)*str >= (unsigned char)0x81 &&
|
||||
(unsigned char)*str <= (unsigned char)0x9f)
|
||||
order = 188 * ((unsigned char)str[0] - (unsigned char)0x81);
|
||||
else if ((unsigned char)*str >= (unsigned char)0xe0 &&
|
||||
(unsigned char)*str <= (unsigned char)0xef)
|
||||
order = 188 * ((unsigned char)str[0] - (unsigned char)0xe0 + 31);
|
||||
else
|
||||
return -1;
|
||||
order += (unsigned char)*(str + 1) - 0x40;
|
||||
if ((unsigned char)str[1] > (unsigned char)0x7f) order--;
|
||||
return order;
|
||||
}
|
||||
};
|
||||
|
||||
class EUCJPDistributionAnalysis : public CharDistributionAnalysis {
|
||||
public:
|
||||
EUCJPDistributionAnalysis();
|
||||
|
||||
protected:
|
||||
// for euc-JP encoding, we are interested
|
||||
// first byte range: 0xa0 -- 0xfe
|
||||
// second byte range: 0xa1 -- 0xfe
|
||||
// no validation needed here. State machine has done that
|
||||
int32_t GetOrder(const char* str) override {
|
||||
if ((unsigned char)*str >= (unsigned char)0xa0)
|
||||
return 94 * ((unsigned char)str[0] - (unsigned char)0xa1) +
|
||||
(unsigned char)str[1] - (unsigned char)0xa1;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
#endif // CharDistribution_h__
|
|
@ -0,0 +1,554 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
//Sampling from about 20M text materials include literature and computer technology
|
||||
|
||||
// Japanese frequency table, applied to both S-JIS and EUC-JP
|
||||
//They are sorted in order.
|
||||
|
||||
/******************************************************************************
|
||||
* 128 --> 0.77094
|
||||
* 256 --> 0.85710
|
||||
* 512 --> 0.92635
|
||||
* 1024 --> 0.97130
|
||||
* 2048 --> 0.99431
|
||||
*
|
||||
* Idea Distribution Ratio = 0.92635 / (1-0.92635) = 12.58
|
||||
* Random Distribution Ration = 512 / (2965+62+83+86-512) = 0.191
|
||||
*
|
||||
* Typical Distribution Ratio, 25% of IDR
|
||||
*****************************************************************************/
|
||||
|
||||
#define JIS_TYPICAL_DISTRIBUTION_RATIO (float) 3.0
|
||||
|
||||
// Char to FreqOrder table
|
||||
static const int16_t JISCharToFreqOrder[] =
|
||||
{
|
||||
40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, // 16
|
||||
3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, // 32
|
||||
1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, // 48
|
||||
2042,1061,1062, 48, 49, 44, 45, 433, 434,1040,1041, 996, 787,2997,1255,4305, // 64
|
||||
2108,4609,1684,1648,5073,5074,5075,5076,5077,5078,3687,5079,4610,5080,3927,3928, // 80
|
||||
5081,3296,3432, 290,2285,1471,2187,5082,2580,2825,1303,2140,1739,1445,2691,3375, // 96
|
||||
1691,3297,4306,4307,4611, 452,3376,1182,2713,3688,3069,4308,5083,5084,5085,5086, // 112
|
||||
5087,5088,5089,5090,5091,5092,5093,5094,5095,5096,5097,5098,5099,5100,5101,5102, // 128
|
||||
5103,5104,5105,5106,5107,5108,5109,5110,5111,5112,4097,5113,5114,5115,5116,5117, // 144
|
||||
5118,5119,5120,5121,5122,5123,5124,5125,5126,5127,5128,5129,5130,5131,5132,5133, // 160
|
||||
5134,5135,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145,5146,5147,5148,5149, // 176
|
||||
5150,5151,5152,4612,5153,5154,5155,5156,5157,5158,5159,5160,5161,5162,5163,5164, // 192
|
||||
5165,5166,5167,5168,5169,5170,5171,5172,5173,5174,5175,1472, 598, 618, 820,1205, // 208
|
||||
1309,1412,1858,1307,1692,5176,5177,5178,5179,5180,5181,5182,1142,1452,1234,1172, // 224
|
||||
1875,2043,2149,1793,1382,2973, 925,2404,1067,1241, 960,1377,2935,1491, 919,1217, // 240
|
||||
1865,2030,1406,1499,2749,4098,5183,5184,5185,5186,5187,5188,2561,4099,3117,1804, // 256
|
||||
2049,3689,4309,3513,1663,5189,3166,3118,3298,1587,1561,3433,5190,3119,1625,2998, // 272
|
||||
3299,4613,1766,3690,2786,4614,5191,5192,5193,5194,2161, 26,3377, 2,3929, 20, // 288
|
||||
3691, 47,4100, 50, 17, 16, 35, 268, 27, 243, 42, 155, 24, 154, 29, 184, // 304
|
||||
4, 91, 14, 92, 53, 396, 33, 289, 9, 37, 64, 620, 21, 39, 321, 5, // 320
|
||||
12, 11, 52, 13, 3, 208, 138, 0, 7, 60, 526, 141, 151,1069, 181, 275, // 336
|
||||
1591, 83, 132,1475, 126, 331, 829, 15, 69, 160, 59, 22, 157, 55,1079, 312, // 352
|
||||
109, 38, 23, 25, 10, 19, 79,5195, 61, 382,1124, 8, 30,5196,5197,5198, // 368
|
||||
5199,5200,5201,5202,5203,5204,5205,5206, 89, 62, 74, 34,2416, 112, 139, 196, // 384
|
||||
271, 149, 84, 607, 131, 765, 46, 88, 153, 683, 76, 874, 101, 258, 57, 80, // 400
|
||||
32, 364, 121,1508, 169,1547, 68, 235, 145,2999, 41, 360,3027, 70, 63, 31, // 416
|
||||
43, 259, 262,1383, 99, 533, 194, 66, 93, 846, 217, 192, 56, 106, 58, 565, // 432
|
||||
280, 272, 311, 256, 146, 82, 308, 71, 100, 128, 214, 655, 110, 261, 104,1140, // 448
|
||||
54, 51, 36, 87, 67,3070, 185,2618,2936,2020, 28,1066,2390,2059,5207,5208, // 464
|
||||
5209,5210,5211,5212,5213,5214,5215,5216,4615,5217,5218,5219,5220,5221,5222,5223, // 480
|
||||
5224,5225,5226,5227,5228,5229,5230,5231,5232,5233,5234,5235,5236,3514,5237,5238, // 496
|
||||
5239,5240,5241,5242,5243,5244,2297,2031,4616,4310,3692,5245,3071,5246,3598,5247, // 512
|
||||
4617,3231,3515,5248,4101,4311,4618,3808,4312,4102,5249,4103,4104,3599,5250,5251, // 528
|
||||
5252,5253,5254,5255,5256,5257,5258,5259,5260,5261,5262,5263,5264,5265,5266,5267, // 544
|
||||
5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278,5279,5280,5281,5282,5283, // 560
|
||||
5284,5285,5286,5287,5288,5289,5290,5291,5292,5293,5294,5295,5296,5297,5298,5299, // 576
|
||||
5300,5301,5302,5303,5304,5305,5306,5307,5308,5309,5310,5311,5312,5313,5314,5315, // 592
|
||||
5316,5317,5318,5319,5320,5321,5322,5323,5324,5325,5326,5327,5328,5329,5330,5331, // 608
|
||||
5332,5333,5334,5335,5336,5337,5338,5339,5340,5341,5342,5343,5344,5345,5346,5347, // 624
|
||||
5348,5349,5350,5351,5352,5353,5354,5355,5356,5357,5358,5359,5360,5361,5362,5363, // 640
|
||||
5364,5365,5366,5367,5368,5369,5370,5371,5372,5373,5374,5375,5376,5377,5378,5379, // 656
|
||||
5380,5381, 363, 642,2787,2878,2788,2789,2316,3232,2317,3434,2011, 165,1942,3930, // 672
|
||||
3931,3932,3933,5382,4619,5383,4620,5384,5385,5386,5387,5388,5389,5390,5391,5392, // 688
|
||||
5393,5394,5395,5396,5397,5398,5399,5400,5401,5402,5403,5404,5405,5406,5407,5408, // 704
|
||||
5409,5410,5411,5412,5413,5414,5415,5416,5417,5418,5419,5420,5421,5422,5423,5424, // 720
|
||||
5425,5426,5427,5428,5429,5430,5431,5432,5433,5434,5435,5436,5437,5438,5439,5440, // 736
|
||||
5441,5442,5443,5444,5445,5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456, // 752
|
||||
5457,5458,5459,5460,5461,5462,5463,5464,5465,5466,5467,5468,5469,5470,5471,5472, // 768
|
||||
5473,5474,5475,5476,5477,5478,5479,5480,5481,5482,5483,5484,5485,5486,5487,5488, // 784
|
||||
5489,5490,5491,5492,5493,5494,5495,5496,5497,5498,5499,5500,5501,5502,5503,5504, // 800
|
||||
5505,5506,5507,5508,5509,5510,5511,5512,5513,5514,5515,5516,5517,5518,5519,5520, // 816
|
||||
5521,5522,5523,5524,5525,5526,5527,5528,5529,5530,5531,5532,5533,5534,5535,5536, // 832
|
||||
5537,5538,5539,5540,5541,5542,5543,5544,5545,5546,5547,5548,5549,5550,5551,5552, // 848
|
||||
5553,5554,5555,5556,5557,5558,5559,5560,5561,5562,5563,5564,5565,5566,5567,5568, // 864
|
||||
5569,5570,5571,5572,5573,5574,5575,5576,5577,5578,5579,5580,5581,5582,5583,5584, // 880
|
||||
5585,5586,5587,5588,5589,5590,5591,5592,5593,5594,5595,5596,5597,5598,5599,5600, // 896
|
||||
5601,5602,5603,5604,5605,5606,5607,5608,5609,5610,5611,5612,5613,5614,5615,5616, // 912
|
||||
5617,5618,5619,5620,5621,5622,5623,5624,5625,5626,5627,5628,5629,5630,5631,5632, // 928
|
||||
5633,5634,5635,5636,5637,5638,5639,5640,5641,5642,5643,5644,5645,5646,5647,5648, // 944
|
||||
5649,5650,5651,5652,5653,5654,5655,5656,5657,5658,5659,5660,5661,5662,5663,5664, // 960
|
||||
5665,5666,5667,5668,5669,5670,5671,5672,5673,5674,5675,5676,5677,5678,5679,5680, // 976
|
||||
5681,5682,5683,5684,5685,5686,5687,5688,5689,5690,5691,5692,5693,5694,5695,5696, // 992
|
||||
5697,5698,5699,5700,5701,5702,5703,5704,5705,5706,5707,5708,5709,5710,5711,5712, // 1008
|
||||
5713,5714,5715,5716,5717,5718,5719,5720,5721,5722,5723,5724,5725,5726,5727,5728, // 1024
|
||||
5729,5730,5731,5732,5733,5734,5735,5736,5737,5738,5739,5740,5741,5742,5743,5744, // 1040
|
||||
5745,5746,5747,5748,5749,5750,5751,5752,5753,5754,5755,5756,5757,5758,5759,5760, // 1056
|
||||
5761,5762,5763,5764,5765,5766,5767,5768,5769,5770,5771,5772,5773,5774,5775,5776, // 1072
|
||||
5777,5778,5779,5780,5781,5782,5783,5784,5785,5786,5787,5788,5789,5790,5791,5792, // 1088
|
||||
5793,5794,5795,5796,5797,5798,5799,5800,5801,5802,5803,5804,5805,5806,5807,5808, // 1104
|
||||
5809,5810,5811,5812,5813,5814,5815,5816,5817,5818,5819,5820,5821,5822,5823,5824, // 1120
|
||||
5825,5826,5827,5828,5829,5830,5831,5832,5833,5834,5835,5836,5837,5838,5839,5840, // 1136
|
||||
5841,5842,5843,5844,5845,5846,5847,5848,5849,5850,5851,5852,5853,5854,5855,5856, // 1152
|
||||
5857,5858,5859,5860,5861,5862,5863,5864,5865,5866,5867,5868,5869,5870,5871,5872, // 1168
|
||||
5873,5874,5875,5876,5877,5878,5879,5880,5881,5882,5883,5884,5885,5886,5887,5888, // 1184
|
||||
5889,5890,5891,5892,5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904, // 1200
|
||||
5905,5906,5907,5908,5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920, // 1216
|
||||
5921,5922,5923,5924,5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,5936, // 1232
|
||||
5937,5938,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,5952, // 1248
|
||||
5953,5954,5955,5956,5957,5958,5959,5960,5961,5962,5963,5964,5965,5966,5967,5968, // 1264
|
||||
5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984, // 1280
|
||||
5985,5986,5987,5988,5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999,6000, // 1296
|
||||
6001,6002,6003,6004,6005,6006,6007,6008,6009,6010,6011,6012,6013,6014,6015,6016, // 1312
|
||||
6017,6018,6019,6020,6021,6022,6023,6024,6025,6026,6027,6028,6029,6030,6031,6032, // 1328
|
||||
6033,6034,6035,6036,6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048, // 1344
|
||||
6049,6050,6051,6052,6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064, // 1360
|
||||
6065,6066,6067,6068,6069,6070,6071,6072,6073,6074,6075,6076,6077,6078,6079,6080, // 1376
|
||||
6081,6082,6083,6084,6085,6086,6087,6088,6089,6090,6091,6092,6093,6094,6095,6096, // 1392
|
||||
6097,6098,6099,6100,6101,6102,6103,6104,6105,6106,6107,6108,6109,6110,6111,6112, // 1408
|
||||
6113,6114,2044,2060,4621, 997,1235, 473,1186,4622, 920,3378,6115,6116, 379,1108, // 1424
|
||||
4313,2657,2735,3934,6117,3809, 636,3233, 573,1026,3693,3435,2974,3300,2298,4105, // 1440
|
||||
854,2937,2463, 393,2581,2417, 539, 752,1280,2750,2480, 140,1161, 440, 708,1569, // 1456
|
||||
665,2497,1746,1291,1523,3000, 164,1603, 847,1331, 537,1997, 486, 508,1693,2418, // 1472
|
||||
1970,2227, 878,1220, 299,1030, 969, 652,2751, 624,1137,3301,2619, 65,3302,2045, // 1488
|
||||
1761,1859,3120,1930,3694,3516, 663,1767, 852, 835,3695, 269, 767,2826,2339,1305, // 1504
|
||||
896,1150, 770,1616,6118, 506,1502,2075,1012,2519, 775,2520,2975,2340,2938,4314, // 1520
|
||||
3028,2086,1224,1943,2286,6119,3072,4315,2240,1273,1987,3935,1557, 175, 597, 985, // 1536
|
||||
3517,2419,2521,1416,3029, 585, 938,1931,1007,1052,1932,1685,6120,3379,4316,4623, // 1552
|
||||
804, 599,3121,1333,2128,2539,1159,1554,2032,3810, 687,2033,2904, 952, 675,1467, // 1568
|
||||
3436,6121,2241,1096,1786,2440,1543,1924, 980,1813,2228, 781,2692,1879, 728,1918, // 1584
|
||||
3696,4624, 548,1950,4625,1809,1088,1356,3303,2522,1944, 502, 972, 373, 513,2827, // 1600
|
||||
586,2377,2391,1003,1976,1631,6122,2464,1084, 648,1776,4626,2141, 324, 962,2012, // 1616
|
||||
2177,2076,1384, 742,2178,1448,1173,1810, 222, 102, 301, 445, 125,2420, 662,2498, // 1632
|
||||
277, 200,1476,1165,1068, 224,2562,1378,1446, 450,1880, 659, 791, 582,4627,2939, // 1648
|
||||
3936,1516,1274, 555,2099,3697,1020,1389,1526,3380,1762,1723,1787,2229, 412,2114, // 1664
|
||||
1900,2392,3518, 512,2597, 427,1925,2341,3122,1653,1686,2465,2499, 697, 330, 273, // 1680
|
||||
380,2162, 951, 832, 780, 991,1301,3073, 965,2270,3519, 668,2523,2636,1286, 535, // 1696
|
||||
1407, 518, 671, 957,2658,2378, 267, 611,2197,3030,6123, 248,2299, 967,1799,2356, // 1712
|
||||
850,1418,3437,1876,1256,1480,2828,1718,6124,6125,1755,1664,2405,6126,4628,2879, // 1728
|
||||
2829, 499,2179, 676,4629, 557,2329,2214,2090, 325,3234, 464, 811,3001, 992,2342, // 1744
|
||||
2481,1232,1469, 303,2242, 466,1070,2163, 603,1777,2091,4630,2752,4631,2714, 322, // 1760
|
||||
2659,1964,1768, 481,2188,1463,2330,2857,3600,2092,3031,2421,4632,2318,2070,1849, // 1776
|
||||
2598,4633,1302,2254,1668,1701,2422,3811,2905,3032,3123,2046,4106,1763,1694,4634, // 1792
|
||||
1604, 943,1724,1454, 917, 868,2215,1169,2940, 552,1145,1800,1228,1823,1955, 316, // 1808
|
||||
1080,2510, 361,1807,2830,4107,2660,3381,1346,1423,1134,4108,6127, 541,1263,1229, // 1824
|
||||
1148,2540, 545, 465,1833,2880,3438,1901,3074,2482, 816,3937, 713,1788,2500, 122, // 1840
|
||||
1575, 195,1451,2501,1111,6128, 859, 374,1225,2243,2483,4317, 390,1033,3439,3075, // 1856
|
||||
2524,1687, 266, 793,1440,2599, 946, 779, 802, 507, 897,1081, 528,2189,1292, 711, // 1872
|
||||
1866,1725,1167,1640, 753, 398,2661,1053, 246, 348,4318, 137,1024,3440,1600,2077, // 1888
|
||||
2129, 825,4319, 698, 238, 521, 187,2300,1157,2423,1641,1605,1464,1610,1097,2541, // 1904
|
||||
1260,1436, 759,2255,1814,2150, 705,3235, 409,2563,3304, 561,3033,2005,2564, 726, // 1920
|
||||
1956,2343,3698,4109, 949,3812,3813,3520,1669, 653,1379,2525, 881,2198, 632,2256, // 1936
|
||||
1027, 778,1074, 733,1957, 514,1481,2466, 554,2180, 702,3938,1606,1017,1398,6129, // 1952
|
||||
1380,3521, 921, 993,1313, 594, 449,1489,1617,1166, 768,1426,1360, 495,1794,3601, // 1968
|
||||
1177,3602,1170,4320,2344, 476, 425,3167,4635,3168,1424, 401,2662,1171,3382,1998, // 1984
|
||||
1089,4110, 477,3169, 474,6130,1909, 596,2831,1842, 494, 693,1051,1028,1207,3076, // 2000
|
||||
606,2115, 727,2790,1473,1115, 743,3522, 630, 805,1532,4321,2021, 366,1057, 838, // 2016
|
||||
684,1114,2142,4322,2050,1492,1892,1808,2271,3814,2424,1971,1447,1373,3305,1090, // 2032
|
||||
1536,3939,3523,3306,1455,2199, 336, 369,2331,1035, 584,2393, 902, 718,2600,6131, // 2048
|
||||
2753, 463,2151,1149,1611,2467, 715,1308,3124,1268, 343,1413,3236,1517,1347,2663, // 2064
|
||||
2093,3940,2022,1131,1553,2100,2941,1427,3441,2942,1323,2484,6132,1980, 872,2368, // 2080
|
||||
2441,2943, 320,2369,2116,1082, 679,1933,3941,2791,3815, 625,1143,2023, 422,2200, // 2096
|
||||
3816,6133, 730,1695, 356,2257,1626,2301,2858,2637,1627,1778, 937, 883,2906,2693, // 2112
|
||||
3002,1769,1086, 400,1063,1325,3307,2792,4111,3077, 456,2345,1046, 747,6134,1524, // 2128
|
||||
884,1094,3383,1474,2164,1059, 974,1688,2181,2258,1047, 345,1665,1187, 358, 875, // 2144
|
||||
3170, 305, 660,3524,2190,1334,1135,3171,1540,1649,2542,1527, 927, 968,2793, 885, // 2160
|
||||
1972,1850, 482, 500,2638,1218,1109,1085,2543,1654,2034, 876, 78,2287,1482,1277, // 2176
|
||||
861,1675,1083,1779, 724,2754, 454, 397,1132,1612,2332, 893, 672,1237, 257,2259, // 2192
|
||||
2370, 135,3384, 337,2244, 547, 352, 340, 709,2485,1400, 788,1138,2511, 540, 772, // 2208
|
||||
1682,2260,2272,2544,2013,1843,1902,4636,1999,1562,2288,4637,2201,1403,1533, 407, // 2224
|
||||
576,3308,1254,2071, 978,3385, 170, 136,1201,3125,2664,3172,2394, 213, 912, 873, // 2240
|
||||
3603,1713,2202, 699,3604,3699, 813,3442, 493, 531,1054, 468,2907,1483, 304, 281, // 2256
|
||||
4112,1726,1252,2094, 339,2319,2130,2639, 756,1563,2944, 748, 571,2976,1588,2425, // 2272
|
||||
2715,1851,1460,2426,1528,1392,1973,3237, 288,3309, 685,3386, 296, 892,2716,2216, // 2288
|
||||
1570,2245, 722,1747,2217, 905,3238,1103,6135,1893,1441,1965, 251,1805,2371,3700, // 2304
|
||||
2601,1919,1078, 75,2182,1509,1592,1270,2640,4638,2152,6136,3310,3817, 524, 706, // 2320
|
||||
1075, 292,3818,1756,2602, 317, 98,3173,3605,3525,1844,2218,3819,2502, 814, 567, // 2336
|
||||
385,2908,1534,6137, 534,1642,3239, 797,6138,1670,1529, 953,4323, 188,1071, 538, // 2352
|
||||
178, 729,3240,2109,1226,1374,2000,2357,2977, 731,2468,1116,2014,2051,6139,1261, // 2368
|
||||
1593, 803,2859,2736,3443, 556, 682, 823,1541,6140,1369,2289,1706,2794, 845, 462, // 2384
|
||||
2603,2665,1361, 387, 162,2358,1740, 739,1770,1720,1304,1401,3241,1049, 627,1571, // 2400
|
||||
2427,3526,1877,3942,1852,1500, 431,1910,1503, 677, 297,2795, 286,1433,1038,1198, // 2416
|
||||
2290,1133,1596,4113,4639,2469,1510,1484,3943,6141,2442, 108, 712,4640,2372, 866, // 2432
|
||||
3701,2755,3242,1348, 834,1945,1408,3527,2395,3243,1811, 824, 994,1179,2110,1548, // 2448
|
||||
1453, 790,3003, 690,4324,4325,2832,2909,3820,1860,3821, 225,1748, 310, 346,1780, // 2464
|
||||
2470, 821,1993,2717,2796, 828, 877,3528,2860,2471,1702,2165,2910,2486,1789, 453, // 2480
|
||||
359,2291,1676, 73,1164,1461,1127,3311, 421, 604, 314,1037, 589, 116,2487, 737, // 2496
|
||||
837,1180, 111, 244, 735,6142,2261,1861,1362, 986, 523, 418, 581,2666,3822, 103, // 2512
|
||||
855, 503,1414,1867,2488,1091, 657,1597, 979, 605,1316,4641,1021,2443,2078,2001, // 2528
|
||||
1209, 96, 587,2166,1032, 260,1072,2153, 173, 94, 226,3244, 819,2006,4642,4114, // 2544
|
||||
2203, 231,1744, 782, 97,2667, 786,3387, 887, 391, 442,2219,4326,1425,6143,2694, // 2560
|
||||
633,1544,1202, 483,2015, 592,2052,1958,2472,1655, 419, 129,4327,3444,3312,1714, // 2576
|
||||
1257,3078,4328,1518,1098, 865,1310,1019,1885,1512,1734, 469,2444, 148, 773, 436, // 2592
|
||||
1815,1868,1128,1055,4329,1245,2756,3445,2154,1934,1039,4643, 579,1238, 932,2320, // 2608
|
||||
353, 205, 801, 115,2428, 944,2321,1881, 399,2565,1211, 678, 766,3944, 335,2101, // 2624
|
||||
1459,1781,1402,3945,2737,2131,1010, 844, 981,1326,1013, 550,1816,1545,2620,1335, // 2640
|
||||
1008, 371,2881, 936,1419,1613,3529,1456,1395,2273,1834,2604,1317,2738,2503, 416, // 2656
|
||||
1643,4330, 806,1126, 229, 591,3946,1314,1981,1576,1837,1666, 347,1790, 977,3313, // 2672
|
||||
764,2861,1853, 688,2429,1920,1462, 77, 595, 415,2002,3034, 798,1192,4115,6144, // 2688
|
||||
2978,4331,3035,2695,2582,2072,2566, 430,2430,1727, 842,1396,3947,3702, 613, 377, // 2704
|
||||
278, 236,1417,3388,3314,3174, 757,1869, 107,3530,6145,1194, 623,2262, 207,1253, // 2720
|
||||
2167,3446,3948, 492,1117,1935, 536,1838,2757,1246,4332, 696,2095,2406,1393,1572, // 2736
|
||||
3175,1782, 583, 190, 253,1390,2230, 830,3126,3389, 934,3245,1703,1749,2979,1870, // 2752
|
||||
2545,1656,2204, 869,2346,4116,3176,1817, 496,1764,4644, 942,1504, 404,1903,1122, // 2768
|
||||
1580,3606,2945,1022, 515, 372,1735, 955,2431,3036,6146,2797,1110,2302,2798, 617, // 2784
|
||||
6147, 441, 762,1771,3447,3607,3608,1904, 840,3037, 86, 939,1385, 572,1370,2445, // 2800
|
||||
1336, 114,3703, 898, 294, 203,3315, 703,1583,2274, 429, 961,4333,1854,1951,3390, // 2816
|
||||
2373,3704,4334,1318,1381, 966,1911,2322,1006,1155, 309, 989, 458,2718,1795,1372, // 2832
|
||||
1203, 252,1689,1363,3177, 517,1936, 168,1490, 562, 193,3823,1042,4117,1835, 551, // 2848
|
||||
470,4645, 395, 489,3448,1871,1465,2583,2641, 417,1493, 279,1295, 511,1236,1119, // 2864
|
||||
72,1231,1982,1812,3004, 871,1564, 984,3449,1667,2696,2096,4646,2347,2833,1673, // 2880
|
||||
3609, 695,3246,2668, 807,1183,4647, 890, 388,2333,1801,1457,2911,1765,1477,1031, // 2896
|
||||
3316,3317,1278,3391,2799,2292,2526, 163,3450,4335,2669,1404,1802,6148,2323,2407, // 2912
|
||||
1584,1728,1494,1824,1269, 298, 909,3318,1034,1632, 375, 776,1683,2061, 291, 210, // 2928
|
||||
1123, 809,1249,1002,2642,3038, 206,1011,2132, 144, 975, 882,1565, 342, 667, 754, // 2944
|
||||
1442,2143,1299,2303,2062, 447, 626,2205,1221,2739,2912,1144,1214,2206,2584, 760, // 2960
|
||||
1715, 614, 950,1281,2670,2621, 810, 577,1287,2546,4648, 242,2168, 250,2643, 691, // 2976
|
||||
123,2644, 647, 313,1029, 689,1357,2946,1650, 216, 771,1339,1306, 808,2063, 549, // 2992
|
||||
913,1371,2913,2914,6149,1466,1092,1174,1196,1311,2605,2396,1783,1796,3079, 406, // 3008
|
||||
2671,2117,3949,4649, 487,1825,2220,6150,2915, 448,2348,1073,6151,2397,1707, 130, // 3024
|
||||
900,1598, 329, 176,1959,2527,1620,6152,2275,4336,3319,1983,2191,3705,3610,2155, // 3040
|
||||
3706,1912,1513,1614,6153,1988, 646, 392,2304,1589,3320,3039,1826,1239,1352,1340, // 3056
|
||||
2916, 505,2567,1709,1437,2408,2547, 906,6154,2672, 384,1458,1594,1100,1329, 710, // 3072
|
||||
423,3531,2064,2231,2622,1989,2673,1087,1882, 333, 841,3005,1296,2882,2379, 580, // 3088
|
||||
1937,1827,1293,2585, 601, 574, 249,1772,4118,2079,1120, 645, 901,1176,1690, 795, // 3104
|
||||
2207, 478,1434, 516,1190,1530, 761,2080, 930,1264, 355, 435,1552, 644,1791, 987, // 3120
|
||||
220,1364,1163,1121,1538, 306,2169,1327,1222, 546,2645, 218, 241, 610,1704,3321, // 3136
|
||||
1984,1839,1966,2528, 451,6155,2586,3707,2568, 907,3178, 254,2947, 186,1845,4650, // 3152
|
||||
745, 432,1757, 428,1633, 888,2246,2221,2489,3611,2118,1258,1265, 956,3127,1784, // 3168
|
||||
4337,2490, 319, 510, 119, 457,3612, 274,2035,2007,4651,1409,3128, 970,2758, 590, // 3184
|
||||
2800, 661,2247,4652,2008,3950,1420,1549,3080,3322,3951,1651,1375,2111, 485,2491, // 3200
|
||||
1429,1156,6156,2548,2183,1495, 831,1840,2529,2446, 501,1657, 307,1894,3247,1341, // 3216
|
||||
666, 899,2156,1539,2549,1559, 886, 349,2208,3081,2305,1736,3824,2170,2759,1014, // 3232
|
||||
1913,1386, 542,1397,2948, 490, 368, 716, 362, 159, 282,2569,1129,1658,1288,1750, // 3248
|
||||
2674, 276, 649,2016, 751,1496, 658,1818,1284,1862,2209,2087,2512,3451, 622,2834, // 3264
|
||||
376, 117,1060,2053,1208,1721,1101,1443, 247,1250,3179,1792,3952,2760,2398,3953, // 3280
|
||||
6157,2144,3708, 446,2432,1151,2570,3452,2447,2761,2835,1210,2448,3082, 424,2222, // 3296
|
||||
1251,2449,2119,2836, 504,1581,4338, 602, 817, 857,3825,2349,2306, 357,3826,1470, // 3312
|
||||
1883,2883, 255, 958, 929,2917,3248, 302,4653,1050,1271,1751,2307,1952,1430,2697, // 3328
|
||||
2719,2359, 354,3180, 777, 158,2036,4339,1659,4340,4654,2308,2949,2248,1146,2232, // 3344
|
||||
3532,2720,1696,2623,3827,6158,3129,1550,2698,1485,1297,1428, 637, 931,2721,2145, // 3360
|
||||
914,2550,2587, 81,2450, 612, 827,2646,1242,4655,1118,2884, 472,1855,3181,3533, // 3376
|
||||
3534, 569,1353,2699,1244,1758,2588,4119,2009,2762,2171,3709,1312,1531,6159,1152, // 3392
|
||||
1938, 134,1830, 471,3710,2276,1112,1535,3323,3453,3535, 982,1337,2950, 488, 826, // 3408
|
||||
674,1058,1628,4120,2017, 522,2399, 211, 568,1367,3454, 350, 293,1872,1139,3249, // 3424
|
||||
1399,1946,3006,1300,2360,3324, 588, 736,6160,2606, 744, 669,3536,3828,6161,1358, // 3440
|
||||
199, 723, 848, 933, 851,1939,1505,1514,1338,1618,1831,4656,1634,3613, 443,2740, // 3456
|
||||
3829, 717,1947, 491,1914,6162,2551,1542,4121,1025,6163,1099,1223, 198,3040,2722, // 3472
|
||||
370, 410,1905,2589, 998,1248,3182,2380, 519,1449,4122,1710, 947, 928,1153,4341, // 3488
|
||||
2277, 344,2624,1511, 615, 105, 161,1212,1076,1960,3130,2054,1926,1175,1906,2473, // 3504
|
||||
414,1873,2801,6164,2309, 315,1319,3325, 318,2018,2146,2157, 963, 631, 223,4342, // 3520
|
||||
4343,2675, 479,3711,1197,2625,3712,2676,2361,6165,4344,4123,6166,2451,3183,1886, // 3536
|
||||
2184,1674,1330,1711,1635,1506, 799, 219,3250,3083,3954,1677,3713,3326,2081,3614, // 3552
|
||||
1652,2073,4657,1147,3041,1752, 643,1961, 147,1974,3955,6167,1716,2037, 918,3007, // 3568
|
||||
1994, 120,1537, 118, 609,3184,4345, 740,3455,1219, 332,1615,3830,6168,1621,2980, // 3584
|
||||
1582, 783, 212, 553,2350,3714,1349,2433,2082,4124, 889,6169,2310,1275,1410, 973, // 3600
|
||||
166,1320,3456,1797,1215,3185,2885,1846,2590,2763,4658, 629, 822,3008, 763, 940, // 3616
|
||||
1990,2862, 439,2409,1566,1240,1622, 926,1282,1907,2764, 654,2210,1607, 327,1130, // 3632
|
||||
3956,1678,1623,6170,2434,2192, 686, 608,3831,3715, 903,3957,3042,6171,2741,1522, // 3648
|
||||
1915,1105,1555,2552,1359, 323,3251,4346,3457, 738,1354,2553,2311,2334,1828,2003, // 3664
|
||||
3832,1753,2351,1227,6172,1887,4125,1478,6173,2410,1874,1712,1847, 520,1204,2607, // 3680
|
||||
264,4659, 836,2677,2102, 600,4660,3833,2278,3084,6174,4347,3615,1342, 640, 532, // 3696
|
||||
543,2608,1888,2400,2591,1009,4348,1497, 341,1737,3616,2723,1394, 529,3252,1321, // 3712
|
||||
983,4661,1515,2120, 971,2592, 924, 287,1662,3186,4349,2700,4350,1519, 908,1948, // 3728
|
||||
2452, 156, 796,1629,1486,2223,2055, 694,4126,1259,1036,3392,1213,2249,2742,1889, // 3744
|
||||
1230,3958,1015, 910, 408, 559,3617,4662, 746, 725, 935,4663,3959,3009,1289, 563, // 3760
|
||||
867,4664,3960,1567,2981,2038,2626, 988,2263,2381,4351, 143,2374, 704,1895,6175, // 3776
|
||||
1188,3716,2088, 673,3085,2362,4352, 484,1608,1921,2765,2918, 215, 904,3618,3537, // 3792
|
||||
894, 509, 976,3043,2701,3961,4353,2837,2982, 498,6176,6177,1102,3538,1332,3393, // 3808
|
||||
1487,1636,1637, 233, 245,3962, 383, 650, 995,3044, 460,1520,1206,2352, 749,3327, // 3824
|
||||
530, 700, 389,1438,1560,1773,3963,2264, 719,2951,2724,3834, 870,1832,1644,1000, // 3840
|
||||
839,2474,3717, 197,1630,3394, 365,2886,3964,1285,2133, 734, 922, 818,1106, 732, // 3856
|
||||
480,2083,1774,3458, 923,2279,1350, 221,3086, 85,2233,2234,3835,1585,3010,2147, // 3872
|
||||
1387,1705,2382,1619,2475, 133, 239,2802,1991,1016,2084,2383, 411,2838,1113, 651, // 3888
|
||||
1985,1160,3328, 990,1863,3087,1048,1276,2647, 265,2627,1599,3253,2056, 150, 638, // 3904
|
||||
2019, 656, 853, 326,1479, 680,1439,4354,1001,1759, 413,3459,3395,2492,1431, 459, // 3920
|
||||
4355,1125,3329,2265,1953,1450,2065,2863, 849, 351,2678,3131,3254,3255,1104,1577, // 3936
|
||||
227,1351,1645,2453,2193,1421,2887, 812,2121, 634, 95,2435, 201,2312,4665,1646, // 3952
|
||||
1671,2743,1601,2554,2702,2648,2280,1315,1366,2089,3132,1573,3718,3965,1729,1189, // 3968
|
||||
328,2679,1077,1940,1136, 558,1283, 964,1195, 621,2074,1199,1743,3460,3619,1896, // 3984
|
||||
1916,1890,3836,2952,1154,2112,1064, 862, 378,3011,2066,2113,2803,1568,2839,6178, // 4000
|
||||
3088,2919,1941,1660,2004,1992,2194, 142, 707,1590,1708,1624,1922,1023,1836,1233, // 4016
|
||||
1004,2313, 789, 741,3620,6179,1609,2411,1200,4127,3719,3720,4666,2057,3721, 593, // 4032
|
||||
2840, 367,2920,1878,6180,3461,1521, 628,1168, 692,2211,2649, 300, 720,2067,2571, // 4048
|
||||
2953,3396, 959,2504,3966,3539,3462,1977, 701,6181, 954,1043, 800, 681, 183,3722, // 4064
|
||||
1803,1730,3540,4128,2103, 815,2314, 174, 467, 230,2454,1093,2134, 755,3541,3397, // 4080
|
||||
1141,1162,6182,1738,2039, 270,3256,2513,1005,1647,2185,3837, 858,1679,1897,1719, // 4096
|
||||
2954,2324,1806, 402, 670, 167,4129,1498,2158,2104, 750,6183, 915, 189,1680,1551, // 4112
|
||||
455,4356,1501,2455, 405,1095,2955, 338,1586,1266,1819, 570, 641,1324, 237,1556, // 4128
|
||||
2650,1388,3723,6184,1368,2384,1343,1978,3089,2436, 879,3724, 792,1191, 758,3012, // 4144
|
||||
1411,2135,1322,4357, 240,4667,1848,3725,1574,6185, 420,3045,1546,1391, 714,4358, // 4160
|
||||
1967, 941,1864, 863, 664, 426, 560,1731,2680,1785,2864,1949,2363, 403,3330,1415, // 4176
|
||||
1279,2136,1697,2335, 204, 721,2097,3838, 90,6186,2085,2505, 191,3967, 124,2148, // 4192
|
||||
1376,1798,1178,1107,1898,1405, 860,4359,1243,1272,2375,2983,1558,2456,1638, 113, // 4208
|
||||
3621, 578,1923,2609, 880, 386,4130, 784,2186,2266,1422,2956,2172,1722, 497, 263, // 4224
|
||||
2514,1267,2412,2610, 177,2703,3542, 774,1927,1344, 616,1432,1595,1018, 172,4360, // 4240
|
||||
2325, 911,4361, 438,1468,3622, 794,3968,2024,2173,1681,1829,2957, 945, 895,3090, // 4256
|
||||
575,2212,2476, 475,2401,2681, 785,2744,1745,2293,2555,1975,3133,2865, 394,4668, // 4272
|
||||
3839, 635,4131, 639, 202,1507,2195,2766,1345,1435,2572,3726,1908,1184,1181,2457, // 4288
|
||||
3727,3134,4362, 843,2611, 437, 916,4669, 234, 769,1884,3046,3047,3623, 833,6187, // 4304
|
||||
1639,2250,2402,1355,1185,2010,2047, 999, 525,1732,1290,1488,2612, 948,1578,3728, // 4320
|
||||
2413,2477,1216,2725,2159, 334,3840,1328,3624,2921,1525,4132, 564,1056, 891,4363, // 4336
|
||||
1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, // 4352
|
||||
2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, // 4368 //last 512
|
||||
|
||||
/***************************************************************************************
|
||||
*Everything below is of no interest for detection purpose *
|
||||
***************************************************************************************
|
||||
|
||||
2138,2122,3730,2888,1995,1820,1044,6190,6191,6192,6193,6194,6195,6196,6197,6198, // 4384
|
||||
6199,6200,6201,6202,6203,6204,6205,4670,6206,6207,6208,6209,6210,6211,6212,6213, // 4400
|
||||
6214,6215,6216,6217,6218,6219,6220,6221,6222,6223,6224,6225,6226,6227,6228,6229, // 4416
|
||||
6230,6231,6232,6233,6234,6235,6236,6237,3187,6238,6239,3969,6240,6241,6242,6243, // 4432
|
||||
6244,4671,6245,6246,4672,6247,6248,4133,6249,6250,4364,6251,2923,2556,2613,4673, // 4448
|
||||
4365,3970,6252,6253,6254,6255,4674,6256,6257,6258,2768,2353,4366,4675,4676,3188, // 4464
|
||||
4367,3463,6259,4134,4677,4678,6260,2267,6261,3842,3332,4368,3543,6262,6263,6264, // 4480
|
||||
3013,1954,1928,4135,4679,6265,6266,2478,3091,6267,4680,4369,6268,6269,1699,6270, // 4496
|
||||
3544,4136,4681,6271,4137,6272,4370,2804,6273,6274,2593,3971,3972,4682,6275,2236, // 4512
|
||||
4683,6276,6277,4684,6278,6279,4138,3973,4685,6280,6281,3258,6282,6283,6284,6285, // 4528
|
||||
3974,4686,2841,3975,6286,6287,3545,6288,6289,4139,4687,4140,6290,4141,6291,4142, // 4544
|
||||
6292,6293,3333,6294,6295,6296,4371,6297,3399,6298,6299,4372,3976,6300,6301,6302, // 4560
|
||||
4373,6303,6304,3843,3731,6305,4688,4374,6306,6307,3259,2294,6308,3732,2530,4143, // 4576
|
||||
6309,4689,6310,6311,6312,3048,6313,6314,4690,3733,2237,6315,6316,2282,3334,6317, // 4592
|
||||
6318,3844,6319,6320,4691,6321,3400,4692,6322,4693,6323,3049,6324,4375,6325,3977, // 4608
|
||||
6326,6327,6328,3546,6329,4694,3335,6330,4695,4696,6331,6332,6333,6334,4376,3978, // 4624
|
||||
6335,4697,3979,4144,6336,3980,4698,6337,6338,6339,6340,6341,4699,4700,4701,6342, // 4640
|
||||
6343,4702,6344,6345,4703,6346,6347,4704,6348,4705,4706,3135,6349,4707,6350,4708, // 4656
|
||||
6351,4377,6352,4709,3734,4145,6353,2506,4710,3189,6354,3050,4711,3981,6355,3547, // 4672
|
||||
3014,4146,4378,3735,2651,3845,3260,3136,2224,1986,6356,3401,6357,4712,2594,3627, // 4688
|
||||
3137,2573,3736,3982,4713,3628,4714,4715,2682,3629,4716,6358,3630,4379,3631,6359, // 4704
|
||||
6360,6361,3983,6362,6363,6364,6365,4147,3846,4717,6366,6367,3737,2842,6368,4718, // 4720
|
||||
2628,6369,3261,6370,2386,6371,6372,3738,3984,4719,3464,4720,3402,6373,2924,3336, // 4736
|
||||
4148,2866,6374,2805,3262,4380,2704,2069,2531,3138,2806,2984,6375,2769,6376,4721, // 4752
|
||||
4722,3403,6377,6378,3548,6379,6380,2705,3092,1979,4149,2629,3337,2889,6381,3338, // 4768
|
||||
4150,2557,3339,4381,6382,3190,3263,3739,6383,4151,4723,4152,2558,2574,3404,3191, // 4784
|
||||
6384,6385,4153,6386,4724,4382,6387,6388,4383,6389,6390,4154,6391,4725,3985,6392, // 4800
|
||||
3847,4155,6393,6394,6395,6396,6397,3465,6398,4384,6399,6400,6401,6402,6403,6404, // 4816
|
||||
4156,6405,6406,6407,6408,2123,6409,6410,2326,3192,4726,6411,6412,6413,6414,4385, // 4832
|
||||
4157,6415,6416,4158,6417,3093,3848,6418,3986,6419,6420,3849,6421,6422,6423,4159, // 4848
|
||||
6424,6425,4160,6426,3740,6427,6428,6429,6430,3987,6431,4727,6432,2238,6433,6434, // 4864
|
||||
4386,3988,6435,6436,3632,6437,6438,2843,6439,6440,6441,6442,3633,6443,2958,6444, // 4880
|
||||
6445,3466,6446,2364,4387,3850,6447,4388,2959,3340,6448,3851,6449,4728,6450,6451, // 4896
|
||||
3264,4729,6452,3193,6453,4389,4390,2706,3341,4730,6454,3139,6455,3194,6456,3051, // 4912
|
||||
2124,3852,1602,4391,4161,3853,1158,3854,4162,3989,4392,3990,4731,4732,4393,2040, // 4928
|
||||
4163,4394,3265,6457,2807,3467,3855,6458,6459,6460,3991,3468,4733,4734,6461,3140, // 4944
|
||||
2960,6462,4735,6463,6464,6465,6466,4736,4737,4738,4739,6467,6468,4164,2403,3856, // 4960
|
||||
6469,6470,2770,2844,6471,4740,6472,6473,6474,6475,6476,6477,6478,3195,6479,4741, // 4976
|
||||
4395,6480,2867,6481,4742,2808,6482,2493,4165,6483,6484,6485,6486,2295,4743,6487, // 4992
|
||||
6488,6489,3634,6490,6491,6492,6493,6494,6495,6496,2985,4744,6497,6498,4745,6499, // 5008
|
||||
6500,2925,3141,4166,6501,6502,4746,6503,6504,4747,6505,6506,6507,2890,6508,6509, // 5024
|
||||
6510,6511,6512,6513,6514,6515,6516,6517,6518,6519,3469,4167,6520,6521,6522,4748, // 5040
|
||||
4396,3741,4397,4749,4398,3342,2125,4750,6523,4751,4752,4753,3052,6524,2961,4168, // 5056
|
||||
6525,4754,6526,4755,4399,2926,4169,6527,3857,6528,4400,4170,6529,4171,6530,6531, // 5072
|
||||
2595,6532,6533,6534,6535,3635,6536,6537,6538,6539,6540,6541,6542,4756,6543,6544, // 5088
|
||||
6545,6546,6547,6548,4401,6549,6550,6551,6552,4402,3405,4757,4403,6553,6554,6555, // 5104
|
||||
4172,3742,6556,6557,6558,3992,3636,6559,6560,3053,2726,6561,3549,4173,3054,4404, // 5120
|
||||
6562,6563,3993,4405,3266,3550,2809,4406,6564,6565,6566,4758,4759,6567,3743,6568, // 5136
|
||||
4760,3744,4761,3470,6569,6570,6571,4407,6572,3745,4174,6573,4175,2810,4176,3196, // 5152
|
||||
4762,6574,4177,6575,6576,2494,2891,3551,6577,6578,3471,6579,4408,6580,3015,3197, // 5168
|
||||
6581,3343,2532,3994,3858,6582,3094,3406,4409,6583,2892,4178,4763,4410,3016,4411, // 5184
|
||||
6584,3995,3142,3017,2683,6585,4179,6586,6587,4764,4412,6588,6589,4413,6590,2986, // 5200
|
||||
6591,2962,3552,6592,2963,3472,6593,6594,4180,4765,6595,6596,2225,3267,4414,6597, // 5216
|
||||
3407,3637,4766,6598,6599,3198,6600,4415,6601,3859,3199,6602,3473,4767,2811,4416, // 5232
|
||||
1856,3268,3200,2575,3996,3997,3201,4417,6603,3095,2927,6604,3143,6605,2268,6606, // 5248
|
||||
3998,3860,3096,2771,6607,6608,3638,2495,4768,6609,3861,6610,3269,2745,4769,4181, // 5264
|
||||
3553,6611,2845,3270,6612,6613,6614,3862,6615,6616,4770,4771,6617,3474,3999,4418, // 5280
|
||||
4419,6618,3639,3344,6619,4772,4182,6620,2126,6621,6622,6623,4420,4773,6624,3018, // 5296
|
||||
6625,4774,3554,6626,4183,2025,3746,6627,4184,2707,6628,4421,4422,3097,1775,4185, // 5312
|
||||
3555,6629,6630,2868,6631,6632,4423,6633,6634,4424,2414,2533,2928,6635,4186,2387, // 5328
|
||||
6636,4775,6637,4187,6638,1891,4425,3202,3203,6639,6640,4776,6641,3345,6642,6643, // 5344
|
||||
3640,6644,3475,3346,3641,4000,6645,3144,6646,3098,2812,4188,3642,3204,6647,3863, // 5360
|
||||
3476,6648,3864,6649,4426,4001,6650,6651,6652,2576,6653,4189,4777,6654,6655,6656, // 5376
|
||||
2846,6657,3477,3205,4002,6658,4003,6659,3347,2252,6660,6661,6662,4778,6663,6664, // 5392
|
||||
6665,6666,6667,6668,6669,4779,4780,2048,6670,3478,3099,6671,3556,3747,4004,6672, // 5408
|
||||
6673,6674,3145,4005,3748,6675,6676,6677,6678,6679,3408,6680,6681,6682,6683,3206, // 5424
|
||||
3207,6684,6685,4781,4427,6686,4782,4783,4784,6687,6688,6689,4190,6690,6691,3479, // 5440
|
||||
6692,2746,6693,4428,6694,6695,6696,6697,6698,6699,4785,6700,6701,3208,2727,6702, // 5456
|
||||
3146,6703,6704,3409,2196,6705,4429,6706,6707,6708,2534,1996,6709,6710,6711,2747, // 5472
|
||||
6712,6713,6714,4786,3643,6715,4430,4431,6716,3557,6717,4432,4433,6718,6719,6720, // 5488
|
||||
6721,3749,6722,4006,4787,6723,6724,3644,4788,4434,6725,6726,4789,2772,6727,6728, // 5504
|
||||
6729,6730,6731,2708,3865,2813,4435,6732,6733,4790,4791,3480,6734,6735,6736,6737, // 5520
|
||||
4436,3348,6738,3410,4007,6739,6740,4008,6741,6742,4792,3411,4191,6743,6744,6745, // 5536
|
||||
6746,6747,3866,6748,3750,6749,6750,6751,6752,6753,6754,6755,3867,6756,4009,6757, // 5552
|
||||
4793,4794,6758,2814,2987,6759,6760,6761,4437,6762,6763,6764,6765,3645,6766,6767, // 5568
|
||||
3481,4192,6768,3751,6769,6770,2174,6771,3868,3752,6772,6773,6774,4193,4795,4438, // 5584
|
||||
3558,4796,4439,6775,4797,6776,6777,4798,6778,4799,3559,4800,6779,6780,6781,3482, // 5600
|
||||
6782,2893,6783,6784,4194,4801,4010,6785,6786,4440,6787,4011,6788,6789,6790,6791, // 5616
|
||||
6792,6793,4802,6794,6795,6796,4012,6797,6798,6799,6800,3349,4803,3483,6801,4804, // 5632
|
||||
4195,6802,4013,6803,6804,4196,6805,4014,4015,6806,2847,3271,2848,6807,3484,6808, // 5648
|
||||
6809,6810,4441,6811,4442,4197,4443,3272,4805,6812,3412,4016,1579,6813,6814,4017, // 5664
|
||||
6815,3869,6816,2964,6817,4806,6818,6819,4018,3646,6820,6821,4807,4019,4020,6822, // 5680
|
||||
6823,3560,6824,6825,4021,4444,6826,4198,6827,6828,4445,6829,6830,4199,4808,6831, // 5696
|
||||
6832,6833,3870,3019,2458,6834,3753,3413,3350,6835,4809,3871,4810,3561,4446,6836, // 5712
|
||||
6837,4447,4811,4812,6838,2459,4448,6839,4449,6840,6841,4022,3872,6842,4813,4814, // 5728
|
||||
6843,6844,4815,4200,4201,4202,6845,4023,6846,6847,4450,3562,3873,6848,6849,4816, // 5744
|
||||
4817,6850,4451,4818,2139,6851,3563,6852,6853,3351,6854,6855,3352,4024,2709,3414, // 5760
|
||||
4203,4452,6856,4204,6857,6858,3874,3875,6859,6860,4819,6861,6862,6863,6864,4453, // 5776
|
||||
3647,6865,6866,4820,6867,6868,6869,6870,4454,6871,2869,6872,6873,4821,6874,3754, // 5792
|
||||
6875,4822,4205,6876,6877,6878,3648,4206,4455,6879,4823,6880,4824,3876,6881,3055, // 5808
|
||||
4207,6882,3415,6883,6884,6885,4208,4209,6886,4210,3353,6887,3354,3564,3209,3485, // 5824
|
||||
2652,6888,2728,6889,3210,3755,6890,4025,4456,6891,4825,6892,6893,6894,6895,4211, // 5840
|
||||
6896,6897,6898,4826,6899,6900,4212,6901,4827,6902,2773,3565,6903,4828,6904,6905, // 5856
|
||||
6906,6907,3649,3650,6908,2849,3566,6909,3567,3100,6910,6911,6912,6913,6914,6915, // 5872
|
||||
4026,6916,3355,4829,3056,4457,3756,6917,3651,6918,4213,3652,2870,6919,4458,6920, // 5888
|
||||
2438,6921,6922,3757,2774,4830,6923,3356,4831,4832,6924,4833,4459,3653,2507,6925, // 5904
|
||||
4834,2535,6926,6927,3273,4027,3147,6928,3568,6929,6930,6931,4460,6932,3877,4461, // 5920
|
||||
2729,3654,6933,6934,6935,6936,2175,4835,2630,4214,4028,4462,4836,4215,6937,3148, // 5936
|
||||
4216,4463,4837,4838,4217,6938,6939,2850,4839,6940,4464,6941,6942,6943,4840,6944, // 5952
|
||||
4218,3274,4465,6945,6946,2710,6947,4841,4466,6948,6949,2894,6950,6951,4842,6952, // 5968
|
||||
4219,3057,2871,6953,6954,6955,6956,4467,6957,2711,6958,6959,6960,3275,3101,4843, // 5984
|
||||
6961,3357,3569,6962,4844,6963,6964,4468,4845,3570,6965,3102,4846,3758,6966,4847, // 6000
|
||||
3878,4848,4849,4029,6967,2929,3879,4850,4851,6968,6969,1733,6970,4220,6971,6972, // 6016
|
||||
6973,6974,6975,6976,4852,6977,6978,6979,6980,6981,6982,3759,6983,6984,6985,3486, // 6032
|
||||
3487,6986,3488,3416,6987,6988,6989,6990,6991,6992,6993,6994,6995,6996,6997,4853, // 6048
|
||||
6998,6999,4030,7000,7001,3211,7002,7003,4221,7004,7005,3571,4031,7006,3572,7007, // 6064
|
||||
2614,4854,2577,7008,7009,2965,3655,3656,4855,2775,3489,3880,4222,4856,3881,4032, // 6080
|
||||
3882,3657,2730,3490,4857,7010,3149,7011,4469,4858,2496,3491,4859,2283,7012,7013, // 6096
|
||||
7014,2365,4860,4470,7015,7016,3760,7017,7018,4223,1917,7019,7020,7021,4471,7022, // 6112
|
||||
2776,4472,7023,7024,7025,7026,4033,7027,3573,4224,4861,4034,4862,7028,7029,1929, // 6128
|
||||
3883,4035,7030,4473,3058,7031,2536,3761,3884,7032,4036,7033,2966,2895,1968,4474, // 6144
|
||||
3276,4225,3417,3492,4226,2105,7034,7035,1754,2596,3762,4227,4863,4475,3763,4864, // 6160
|
||||
3764,2615,2777,3103,3765,3658,3418,4865,2296,3766,2815,7036,7037,7038,3574,2872, // 6176
|
||||
3277,4476,7039,4037,4477,7040,7041,4038,7042,7043,7044,7045,7046,7047,2537,7048, // 6192
|
||||
7049,7050,7051,7052,7053,7054,4478,7055,7056,3767,3659,4228,3575,7057,7058,4229, // 6208
|
||||
7059,7060,7061,3660,7062,3212,7063,3885,4039,2460,7064,7065,7066,7067,7068,7069, // 6224
|
||||
7070,7071,7072,7073,7074,4866,3768,4867,7075,7076,7077,7078,4868,3358,3278,2653, // 6240
|
||||
7079,7080,4479,3886,7081,7082,4869,7083,7084,7085,7086,7087,7088,2538,7089,7090, // 6256
|
||||
7091,4040,3150,3769,4870,4041,2896,3359,4230,2930,7092,3279,7093,2967,4480,3213, // 6272
|
||||
4481,3661,7094,7095,7096,7097,7098,7099,7100,7101,7102,2461,3770,7103,7104,4231, // 6288
|
||||
3151,7105,7106,7107,4042,3662,7108,7109,4871,3663,4872,4043,3059,7110,7111,7112, // 6304
|
||||
3493,2988,7113,4873,7114,7115,7116,3771,4874,7117,7118,4232,4875,7119,3576,2336, // 6320
|
||||
4876,7120,4233,3419,4044,4877,4878,4482,4483,4879,4484,4234,7121,3772,4880,1045, // 6336
|
||||
3280,3664,4881,4882,7122,7123,7124,7125,4883,7126,2778,7127,4485,4486,7128,4884, // 6352
|
||||
3214,3887,7129,7130,3215,7131,4885,4045,7132,7133,4046,7134,7135,7136,7137,7138, // 6368
|
||||
7139,7140,7141,7142,7143,4235,7144,4886,7145,7146,7147,4887,7148,7149,7150,4487, // 6384
|
||||
4047,4488,7151,7152,4888,4048,2989,3888,7153,3665,7154,4049,7155,7156,7157,7158, // 6400
|
||||
7159,7160,2931,4889,4890,4489,7161,2631,3889,4236,2779,7162,7163,4891,7164,3060, // 6416
|
||||
7165,1672,4892,7166,4893,4237,3281,4894,7167,7168,3666,7169,3494,7170,7171,4050, // 6432
|
||||
7172,7173,3104,3360,3420,4490,4051,2684,4052,7174,4053,7175,7176,7177,2253,4054, // 6448
|
||||
7178,7179,4895,7180,3152,3890,3153,4491,3216,7181,7182,7183,2968,4238,4492,4055, // 6464
|
||||
7184,2990,7185,2479,7186,7187,4493,7188,7189,7190,7191,7192,4896,7193,4897,2969, // 6480
|
||||
4494,4898,7194,3495,7195,7196,4899,4495,7197,3105,2731,7198,4900,7199,7200,7201, // 6496
|
||||
4056,7202,3361,7203,7204,4496,4901,4902,7205,4497,7206,7207,2315,4903,7208,4904, // 6512
|
||||
7209,4905,2851,7210,7211,3577,7212,3578,4906,7213,4057,3667,4907,7214,4058,2354, // 6528
|
||||
3891,2376,3217,3773,7215,7216,7217,7218,7219,4498,7220,4908,3282,2685,7221,3496, // 6544
|
||||
4909,2632,3154,4910,7222,2337,7223,4911,7224,7225,7226,4912,4913,3283,4239,4499, // 6560
|
||||
7227,2816,7228,7229,7230,7231,7232,7233,7234,4914,4500,4501,7235,7236,7237,2686, // 6576
|
||||
7238,4915,7239,2897,4502,7240,4503,7241,2516,7242,4504,3362,3218,7243,7244,7245, // 6592
|
||||
4916,7246,7247,4505,3363,7248,7249,7250,7251,3774,4506,7252,7253,4917,7254,7255, // 6608
|
||||
3284,2991,4918,4919,3219,3892,4920,3106,3497,4921,7256,7257,7258,4922,7259,4923, // 6624
|
||||
3364,4507,4508,4059,7260,4240,3498,7261,7262,4924,7263,2992,3893,4060,3220,7264, // 6640
|
||||
7265,7266,7267,7268,7269,4509,3775,7270,2817,7271,4061,4925,4510,3776,7272,4241, // 6656
|
||||
4511,3285,7273,7274,3499,7275,7276,7277,4062,4512,4926,7278,3107,3894,7279,7280, // 6672
|
||||
4927,7281,4513,7282,7283,3668,7284,7285,4242,4514,4243,7286,2058,4515,4928,4929, // 6688
|
||||
4516,7287,3286,4244,7288,4517,7289,7290,7291,3669,7292,7293,4930,4931,4932,2355, // 6704
|
||||
4933,7294,2633,4518,7295,4245,7296,7297,4519,7298,7299,4520,4521,4934,7300,4246, // 6720
|
||||
4522,7301,7302,7303,3579,7304,4247,4935,7305,4936,7306,7307,7308,7309,3777,7310, // 6736
|
||||
4523,7311,7312,7313,4248,3580,7314,4524,3778,4249,7315,3581,7316,3287,7317,3221, // 6752
|
||||
7318,4937,7319,7320,7321,7322,7323,7324,4938,4939,7325,4525,7326,7327,7328,4063, // 6768
|
||||
7329,7330,4940,7331,7332,4941,7333,4526,7334,3500,2780,1741,4942,2026,1742,7335, // 6784
|
||||
7336,3582,4527,2388,7337,7338,7339,4528,7340,4250,4943,7341,7342,7343,4944,7344, // 6800
|
||||
7345,7346,3020,7347,4945,7348,7349,7350,7351,3895,7352,3896,4064,3897,7353,7354, // 6816
|
||||
7355,4251,7356,7357,3898,7358,3779,7359,3780,3288,7360,7361,4529,7362,4946,4530, // 6832
|
||||
2027,7363,3899,4531,4947,3222,3583,7364,4948,7365,7366,7367,7368,4949,3501,4950, // 6848
|
||||
3781,4951,4532,7369,2517,4952,4252,4953,3155,7370,4954,4955,4253,2518,4533,7371, // 6864
|
||||
7372,2712,4254,7373,7374,7375,3670,4956,3671,7376,2389,3502,4065,7377,2338,7378, // 6880
|
||||
7379,7380,7381,3061,7382,4957,7383,7384,7385,7386,4958,4534,7387,7388,2993,7389, // 6896
|
||||
3062,7390,4959,7391,7392,7393,4960,3108,4961,7394,4535,7395,4962,3421,4536,7396, // 6912
|
||||
4963,7397,4964,1857,7398,4965,7399,7400,2176,3584,4966,7401,7402,3422,4537,3900, // 6928
|
||||
3585,7403,3782,7404,2852,7405,7406,7407,4538,3783,2654,3423,4967,4539,7408,3784, // 6944
|
||||
3586,2853,4540,4541,7409,3901,7410,3902,7411,7412,3785,3109,2327,3903,7413,7414, // 6960
|
||||
2970,4066,2932,7415,7416,7417,3904,3672,3424,7418,4542,4543,4544,7419,4968,7420, // 6976
|
||||
7421,4255,7422,7423,7424,7425,7426,4067,7427,3673,3365,4545,7428,3110,2559,3674, // 6992
|
||||
7429,7430,3156,7431,7432,3503,7433,3425,4546,7434,3063,2873,7435,3223,4969,4547, // 7008
|
||||
4548,2898,4256,4068,7436,4069,3587,3786,2933,3787,4257,4970,4971,3788,7437,4972, // 7024
|
||||
3064,7438,4549,7439,7440,7441,7442,7443,4973,3905,7444,2874,7445,7446,7447,7448, // 7040
|
||||
3021,7449,4550,3906,3588,4974,7450,7451,3789,3675,7452,2578,7453,4070,7454,7455, // 7056
|
||||
7456,4258,3676,7457,4975,7458,4976,4259,3790,3504,2634,4977,3677,4551,4260,7459, // 7072
|
||||
7460,7461,7462,3907,4261,4978,7463,7464,7465,7466,4979,4980,7467,7468,2213,4262, // 7088
|
||||
7469,7470,7471,3678,4981,7472,2439,7473,4263,3224,3289,7474,3908,2415,4982,7475, // 7104
|
||||
4264,7476,4983,2655,7477,7478,2732,4552,2854,2875,7479,7480,4265,7481,4553,4984, // 7120
|
||||
7482,7483,4266,7484,3679,3366,3680,2818,2781,2782,3367,3589,4554,3065,7485,4071, // 7136
|
||||
2899,7486,7487,3157,2462,4072,4555,4073,4985,4986,3111,4267,2687,3368,4556,4074, // 7152
|
||||
3791,4268,7488,3909,2783,7489,2656,1962,3158,4557,4987,1963,3159,3160,7490,3112, // 7168
|
||||
4988,4989,3022,4990,4991,3792,2855,7491,7492,2971,4558,7493,7494,4992,7495,7496, // 7184
|
||||
7497,7498,4993,7499,3426,4559,4994,7500,3681,4560,4269,4270,3910,7501,4075,4995, // 7200
|
||||
4271,7502,7503,4076,7504,4996,7505,3225,4997,4272,4077,2819,3023,7506,7507,2733, // 7216
|
||||
4561,7508,4562,7509,3369,3793,7510,3590,2508,7511,7512,4273,3113,2994,2616,7513, // 7232
|
||||
7514,7515,7516,7517,7518,2820,3911,4078,2748,7519,7520,4563,4998,7521,7522,7523, // 7248
|
||||
7524,4999,4274,7525,4564,3682,2239,4079,4565,7526,7527,7528,7529,5000,7530,7531, // 7264
|
||||
5001,4275,3794,7532,7533,7534,3066,5002,4566,3161,7535,7536,4080,7537,3162,7538, // 7280
|
||||
7539,4567,7540,7541,7542,7543,7544,7545,5003,7546,4568,7547,7548,7549,7550,7551, // 7296
|
||||
7552,7553,7554,7555,7556,5004,7557,7558,7559,5005,7560,3795,7561,4569,7562,7563, // 7312
|
||||
7564,2821,3796,4276,4277,4081,7565,2876,7566,5006,7567,7568,2900,7569,3797,3912, // 7328
|
||||
7570,7571,7572,4278,7573,7574,7575,5007,7576,7577,5008,7578,7579,4279,2934,7580, // 7344
|
||||
7581,5009,7582,4570,7583,4280,7584,7585,7586,4571,4572,3913,7587,4573,3505,7588, // 7360
|
||||
5010,7589,7590,7591,7592,3798,4574,7593,7594,5011,7595,4281,7596,7597,7598,4282, // 7376
|
||||
5012,7599,7600,5013,3163,7601,5014,7602,3914,7603,7604,2734,4575,4576,4577,7605, // 7392
|
||||
7606,7607,7608,7609,3506,5015,4578,7610,4082,7611,2822,2901,2579,3683,3024,4579, // 7408
|
||||
3507,7612,4580,7613,3226,3799,5016,7614,7615,7616,7617,7618,7619,7620,2995,3290, // 7424
|
||||
7621,4083,7622,5017,7623,7624,7625,7626,7627,4581,3915,7628,3291,7629,5018,7630, // 7440
|
||||
7631,7632,7633,4084,7634,7635,3427,3800,7636,7637,4582,7638,5019,4583,5020,7639, // 7456
|
||||
3916,7640,3801,5021,4584,4283,7641,7642,3428,3591,2269,7643,2617,7644,4585,3592, // 7472
|
||||
7645,4586,2902,7646,7647,3227,5022,7648,4587,7649,4284,7650,7651,7652,4588,2284, // 7488
|
||||
7653,5023,7654,7655,7656,4589,5024,3802,7657,7658,5025,3508,4590,7659,7660,7661, // 7504
|
||||
1969,5026,7662,7663,3684,1821,2688,7664,2028,2509,4285,7665,2823,1841,7666,2689, // 7520
|
||||
3114,7667,3917,4085,2160,5027,5028,2972,7668,5029,7669,7670,7671,3593,4086,7672, // 7536
|
||||
4591,4087,5030,3803,7673,7674,7675,7676,7677,7678,7679,4286,2366,4592,4593,3067, // 7552
|
||||
2328,7680,7681,4594,3594,3918,2029,4287,7682,5031,3919,3370,4288,4595,2856,7683, // 7568
|
||||
3509,7684,7685,5032,5033,7686,7687,3804,2784,7688,7689,7690,7691,3371,7692,7693, // 7584
|
||||
2877,5034,7694,7695,3920,4289,4088,7696,7697,7698,5035,7699,5036,4290,5037,5038, // 7600
|
||||
5039,7700,7701,7702,5040,5041,3228,7703,1760,7704,5042,3229,4596,2106,4089,7705, // 7616
|
||||
4597,2824,5043,2107,3372,7706,4291,4090,5044,7707,4091,7708,5045,3025,3805,4598, // 7632
|
||||
4292,4293,4294,3373,7709,4599,7710,5046,7711,7712,5047,5048,3806,7713,7714,7715, // 7648
|
||||
5049,7716,7717,7718,7719,4600,5050,7720,7721,7722,5051,7723,4295,3429,7724,7725, // 7664
|
||||
7726,7727,3921,7728,3292,5052,4092,7729,7730,7731,7732,7733,7734,7735,5053,5054, // 7680
|
||||
7736,7737,7738,7739,3922,3685,7740,7741,7742,7743,2635,5055,7744,5056,4601,7745, // 7696
|
||||
7746,2560,7747,7748,7749,7750,3923,7751,7752,7753,7754,7755,4296,2903,7756,7757, // 7712
|
||||
7758,7759,7760,3924,7761,5057,4297,7762,7763,5058,4298,7764,4093,7765,7766,5059, // 7728
|
||||
3925,7767,7768,7769,7770,7771,7772,7773,7774,7775,7776,3595,7777,4299,5060,4094, // 7744
|
||||
7778,3293,5061,7779,7780,4300,7781,7782,4602,7783,3596,7784,7785,3430,2367,7786, // 7760
|
||||
3164,5062,5063,4301,7787,7788,4095,5064,5065,7789,3374,3115,7790,7791,7792,7793, // 7776
|
||||
7794,7795,7796,3597,4603,7797,7798,3686,3116,3807,5066,7799,7800,5067,7801,7802, // 7792
|
||||
4604,4302,5068,4303,4096,7803,7804,3294,7805,7806,5069,4605,2690,7807,3026,7808, // 7808
|
||||
7809,7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,7824, // 7824
|
||||
7825,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,7840, // 7840
|
||||
7841,7842,7843,7844,7845,7846,7847,7848,7849,7850,7851,7852,7853,7854,7855,7856, // 7856
|
||||
7857,7858,7859,7860,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870,7871,7872, // 7872
|
||||
7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886,7887,7888, // 7888
|
||||
7889,7890,7891,7892,7893,7894,7895,7896,7897,7898,7899,7900,7901,7902,7903,7904, // 7904
|
||||
7905,7906,7907,7908,7909,7910,7911,7912,7913,7914,7915,7916,7917,7918,7919,7920, // 7920
|
||||
7921,7922,7923,7924,3926,7925,7926,7927,7928,7929,7930,7931,7932,7933,7934,7935, // 7936
|
||||
7936,7937,7938,7939,7940,7941,7942,7943,7944,7945,7946,7947,7948,7949,7950,7951, // 7952
|
||||
7952,7953,7954,7955,7956,7957,7958,7959,7960,7961,7962,7963,7964,7965,7966,7967, // 7968
|
||||
7968,7969,7970,7971,7972,7973,7974,7975,7976,7977,7978,7979,7980,7981,7982,7983, // 7984
|
||||
7984,7985,7986,7987,7988,7989,7990,7991,7992,7993,7994,7995,7996,7997,7998,7999, // 8000
|
||||
8000,8001,8002,8003,8004,8005,8006,8007,8008,8009,8010,8011,8012,8013,8014,8015, // 8016
|
||||
8016,8017,8018,8019,8020,8021,8022,8023,8024,8025,8026,8027,8028,8029,8030,8031, // 8032
|
||||
8032,8033,8034,8035,8036,8037,8038,8039,8040,8041,8042,8043,8044,8045,8046,8047, // 8048
|
||||
8048,8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063, // 8064
|
||||
8064,8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079, // 8080
|
||||
8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095, // 8096
|
||||
8096,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110,8111, // 8112
|
||||
8112,8113,8114,8115,8116,8117,8118,8119,8120,8121,8122,8123,8124,8125,8126,8127, // 8128
|
||||
8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141,8142,8143, // 8144
|
||||
8144,8145,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155,8156,8157,8158,8159, // 8160
|
||||
8160,8161,8162,8163,8164,8165,8166,8167,8168,8169,8170,8171,8172,8173,8174,8175, // 8176
|
||||
8176,8177,8178,8179,8180,8181,8182,8183,8184,8185,8186,8187,8188,8189,8190,8191, // 8192
|
||||
8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207, // 8208
|
||||
8208,8209,8210,8211,8212,8213,8214,8215,8216,8217,8218,8219,8220,8221,8222,8223, // 8224
|
||||
8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239, // 8240
|
||||
8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, // 8256
|
||||
8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271, // 8272
|
||||
****************************************************************************************/
|
||||
|
||||
};
|
||||
|
|
@ -0,0 +1,600 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nscore.h"
|
||||
#include "JpCntx.h"
|
||||
|
||||
// This is hiragana 2-char sequence table, the number in each cell represents
|
||||
// its frequency category
|
||||
const uint8_t jp2CharContext[83][83] = {
|
||||
{
|
||||
0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
|
||||
},
|
||||
{
|
||||
2, 4, 0, 4, 0, 3, 0, 4, 0, 3, 4, 4, 4, 2, 4, 3, 3, 4, 3, 2, 3,
|
||||
3, 4, 2, 3, 3, 3, 2, 4, 1, 4, 3, 3, 1, 5, 4, 3, 4, 3, 4, 3, 5,
|
||||
3, 0, 3, 5, 4, 2, 0, 3, 1, 0, 3, 3, 0, 3, 3, 0, 1, 1, 0, 4, 3,
|
||||
0, 3, 3, 0, 4, 0, 2, 0, 3, 5, 5, 5, 5, 4, 0, 4, 1, 0, 3, 4,
|
||||
},
|
||||
{
|
||||
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 5, 0, 5, 0, 4, 0, 4, 5, 4, 4, 3, 5, 3, 5, 1, 5, 3, 4,
|
||||
3, 4, 4, 3, 4, 3, 3, 4, 3, 5, 4, 4, 3, 5, 5, 3, 5, 5, 5, 3, 5,
|
||||
5, 3, 4, 5, 5, 3, 1, 3, 2, 0, 3, 4, 0, 4, 2, 0, 4, 2, 1, 5, 3,
|
||||
2, 3, 5, 0, 4, 0, 2, 0, 5, 4, 4, 5, 4, 5, 0, 4, 0, 0, 4, 4,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 4, 0, 3, 0, 3, 0, 4, 5, 4, 3, 3, 3, 3, 4, 3, 5, 4, 4,
|
||||
3, 5, 4, 4, 3, 4, 3, 4, 4, 4, 4, 5, 3, 4, 4, 3, 4, 5, 5, 4, 5,
|
||||
5, 1, 4, 5, 4, 3, 0, 3, 3, 1, 3, 3, 0, 4, 4, 0, 3, 3, 1, 5, 3,
|
||||
3, 3, 5, 0, 4, 0, 3, 0, 4, 4, 3, 4, 3, 3, 0, 4, 1, 1, 3, 4,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 3, 0, 3, 0, 4, 0, 3, 4, 4, 3, 2, 2, 1, 2, 1, 3, 1, 3,
|
||||
3, 3, 3, 3, 4, 3, 1, 3, 3, 5, 3, 3, 0, 4, 3, 0, 5, 4, 3, 3, 5,
|
||||
4, 4, 3, 4, 4, 5, 0, 1, 2, 0, 1, 2, 0, 2, 2, 0, 1, 0, 0, 5, 2,
|
||||
2, 1, 4, 0, 3, 0, 1, 0, 4, 4, 3, 5, 4, 3, 0, 2, 1, 0, 4, 3,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 5, 0, 4, 0, 2, 1, 4, 4, 2, 4, 1, 4, 2, 4, 2, 4, 3, 3,
|
||||
3, 4, 3, 3, 3, 3, 1, 4, 2, 3, 3, 3, 1, 4, 4, 1, 1, 1, 4, 3, 3,
|
||||
2, 0, 2, 4, 3, 2, 0, 3, 3, 0, 3, 1, 1, 0, 0, 0, 3, 3, 0, 4, 2,
|
||||
2, 3, 4, 0, 4, 0, 3, 0, 4, 4, 5, 3, 4, 4, 0, 3, 0, 0, 1, 4,
|
||||
},
|
||||
{
|
||||
1, 4, 0, 4, 0, 4, 0, 4, 0, 3, 5, 4, 4, 3, 4, 3, 5, 4, 3, 3, 4,
|
||||
3, 5, 4, 4, 4, 4, 3, 4, 2, 4, 3, 3, 1, 5, 4, 3, 2, 4, 5, 4, 5,
|
||||
5, 4, 4, 5, 4, 4, 0, 3, 2, 2, 3, 3, 0, 4, 3, 1, 3, 2, 1, 4, 3,
|
||||
3, 4, 5, 0, 3, 0, 2, 0, 4, 5, 5, 4, 5, 4, 0, 4, 0, 0, 5, 4,
|
||||
},
|
||||
{
|
||||
0, 5, 0, 5, 0, 4, 0, 3, 0, 4, 4, 3, 4, 3, 3, 3, 4, 0, 4, 4, 4,
|
||||
3, 4, 3, 4, 3, 3, 1, 4, 2, 4, 3, 4, 0, 5, 4, 1, 4, 5, 4, 4, 5,
|
||||
3, 2, 4, 3, 4, 3, 2, 4, 1, 3, 3, 3, 2, 3, 2, 0, 4, 3, 3, 4, 3,
|
||||
3, 3, 4, 0, 4, 0, 3, 0, 4, 5, 4, 4, 4, 3, 0, 4, 1, 0, 1, 3,
|
||||
},
|
||||
{
|
||||
0, 3, 1, 4, 0, 3, 0, 2, 0, 3, 4, 4, 3, 1, 4, 2, 3, 3, 4, 3, 4,
|
||||
3, 4, 3, 4, 4, 3, 2, 3, 1, 5, 4, 4, 1, 4, 4, 3, 5, 4, 4, 3, 5,
|
||||
5, 4, 3, 4, 4, 3, 1, 2, 3, 1, 2, 2, 0, 3, 2, 0, 3, 1, 0, 5, 3,
|
||||
3, 3, 4, 3, 3, 3, 3, 4, 4, 4, 4, 5, 4, 2, 0, 3, 3, 2, 4, 3,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 3, 0, 1, 0, 1, 0, 0, 3, 2, 0, 0, 2, 0, 1, 0, 2, 1, 3,
|
||||
3, 3, 1, 2, 3, 1, 0, 1, 0, 4, 2, 1, 1, 3, 3, 0, 4, 3, 3, 1, 4,
|
||||
3, 3, 0, 3, 3, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 4, 1,
|
||||
0, 2, 3, 2, 2, 2, 1, 3, 3, 3, 4, 4, 3, 2, 0, 3, 1, 0, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 4, 0, 3, 0, 3, 0, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 3, 4,
|
||||
2, 4, 3, 4, 3, 3, 2, 4, 3, 4, 5, 4, 1, 4, 5, 3, 5, 4, 5, 3, 5,
|
||||
4, 0, 3, 5, 5, 3, 1, 3, 3, 2, 2, 3, 0, 3, 4, 1, 3, 3, 2, 4, 3,
|
||||
3, 3, 4, 0, 4, 0, 3, 0, 4, 5, 4, 4, 5, 3, 0, 4, 1, 0, 3, 4,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 3, 0, 3, 0, 0, 0, 2, 2, 2, 1, 0, 1, 0, 0, 0, 3, 0, 3,
|
||||
0, 3, 0, 1, 3, 1, 0, 3, 1, 3, 3, 3, 1, 3, 3, 3, 0, 1, 3, 1, 3,
|
||||
4, 0, 0, 3, 1, 1, 0, 3, 2, 0, 0, 0, 0, 1, 3, 0, 1, 0, 0, 3, 3,
|
||||
2, 0, 3, 0, 0, 0, 0, 0, 3, 4, 3, 4, 3, 3, 0, 3, 0, 0, 2, 3,
|
||||
},
|
||||
{
|
||||
2, 3, 0, 3, 0, 2, 0, 1, 0, 3, 3, 4, 3, 1, 3, 1, 1, 1, 3, 1, 4,
|
||||
3, 4, 3, 3, 3, 0, 0, 3, 1, 5, 4, 3, 1, 4, 3, 2, 5, 5, 4, 4, 4,
|
||||
4, 3, 3, 4, 4, 4, 0, 2, 1, 1, 3, 2, 0, 1, 2, 0, 0, 1, 0, 4, 1,
|
||||
3, 3, 3, 0, 3, 0, 1, 0, 4, 4, 4, 5, 5, 3, 0, 2, 0, 0, 4, 4,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 1, 0, 3, 1, 3, 0, 2, 3, 3, 3, 0, 3, 1, 0, 0, 3, 0, 3,
|
||||
2, 3, 1, 3, 2, 1, 1, 0, 0, 4, 2, 1, 0, 2, 3, 1, 4, 3, 2, 0, 4,
|
||||
4, 3, 1, 3, 1, 3, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 4, 1,
|
||||
1, 1, 2, 0, 3, 0, 0, 0, 3, 4, 2, 4, 3, 2, 0, 1, 0, 0, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 1, 0, 4, 0, 5, 0, 4, 0, 2, 4, 4, 2, 3, 3, 2, 3, 3, 5, 3, 3,
|
||||
3, 4, 3, 4, 2, 3, 0, 4, 3, 3, 3, 4, 1, 4, 3, 2, 1, 5, 5, 3, 4,
|
||||
5, 1, 3, 5, 4, 2, 0, 3, 3, 0, 1, 3, 0, 4, 2, 0, 1, 3, 1, 4, 3,
|
||||
3, 3, 3, 0, 3, 0, 1, 0, 3, 4, 4, 4, 5, 5, 0, 3, 0, 1, 4, 5,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 3, 0, 3, 0, 0, 0, 2, 3, 1, 3, 0, 4, 0, 1, 1, 3, 0, 3,
|
||||
4, 3, 2, 3, 1, 0, 3, 3, 2, 3, 1, 3, 0, 2, 3, 0, 2, 1, 4, 1, 2,
|
||||
2, 0, 0, 3, 3, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 2, 2, 0, 3, 2,
|
||||
1, 3, 3, 0, 2, 0, 2, 0, 0, 3, 3, 1, 2, 4, 0, 3, 0, 2, 2, 3,
|
||||
},
|
||||
{
|
||||
2, 4, 0, 5, 0, 4, 0, 4, 0, 2, 4, 4, 4, 3, 4, 3, 3, 3, 1, 2, 4,
|
||||
3, 4, 3, 4, 4, 5, 0, 3, 3, 3, 3, 2, 0, 4, 3, 1, 4, 3, 4, 1, 4,
|
||||
4, 3, 3, 4, 4, 3, 1, 2, 3, 0, 4, 2, 0, 4, 1, 0, 3, 3, 0, 4, 3,
|
||||
3, 3, 4, 0, 4, 0, 2, 0, 3, 5, 3, 4, 5, 2, 0, 3, 0, 0, 4, 5,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 4, 0, 1, 0, 1, 0, 1, 3, 2, 2, 1, 3, 0, 3, 0, 2, 0, 2,
|
||||
0, 3, 0, 2, 0, 0, 0, 1, 0, 1, 1, 0, 0, 3, 1, 0, 0, 0, 4, 0, 3,
|
||||
1, 0, 2, 1, 3, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 2,
|
||||
2, 3, 1, 0, 3, 0, 0, 0, 1, 4, 4, 4, 3, 0, 0, 4, 0, 0, 1, 4,
|
||||
},
|
||||
{
|
||||
1, 4, 1, 5, 0, 3, 0, 3, 0, 4, 5, 4, 4, 3, 5, 3, 3, 4, 4, 3, 4,
|
||||
1, 3, 3, 3, 3, 2, 1, 4, 1, 5, 4, 3, 1, 4, 4, 3, 5, 4, 4, 3, 5,
|
||||
4, 3, 3, 4, 4, 4, 0, 3, 3, 1, 2, 3, 0, 3, 1, 0, 3, 3, 0, 5, 4,
|
||||
4, 4, 4, 4, 4, 3, 3, 5, 4, 4, 3, 3, 5, 4, 0, 3, 2, 0, 4, 4,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 3, 0, 1, 0, 0, 0, 1, 3, 3, 3, 2, 4, 1, 3, 0, 3, 1, 3,
|
||||
0, 2, 2, 1, 1, 0, 0, 2, 0, 4, 3, 1, 0, 4, 3, 0, 4, 4, 4, 1, 4,
|
||||
3, 1, 1, 3, 3, 1, 0, 2, 0, 0, 1, 3, 0, 0, 0, 0, 2, 0, 0, 4, 3,
|
||||
2, 4, 3, 5, 4, 3, 3, 3, 4, 3, 3, 4, 3, 3, 0, 2, 1, 0, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 4, 0, 3, 0, 2, 0, 2, 5, 5, 3, 4, 4, 4, 4, 1, 4, 3, 3,
|
||||
0, 4, 3, 4, 3, 1, 3, 3, 2, 4, 3, 0, 3, 4, 3, 0, 3, 4, 4, 2, 4,
|
||||
4, 0, 4, 5, 3, 3, 2, 2, 1, 1, 1, 2, 0, 1, 5, 0, 3, 3, 2, 4, 3,
|
||||
3, 3, 4, 0, 3, 0, 2, 0, 4, 4, 3, 5, 5, 0, 0, 3, 0, 2, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 4, 0, 3, 0, 1, 0, 3, 4, 3, 3, 1, 3, 3, 3, 0, 3, 1, 3,
|
||||
0, 4, 3, 3, 1, 1, 0, 3, 0, 3, 3, 0, 0, 4, 4, 0, 1, 5, 4, 3, 3,
|
||||
5, 0, 3, 3, 4, 3, 0, 2, 0, 1, 1, 1, 0, 1, 3, 0, 1, 2, 1, 3, 3,
|
||||
2, 3, 3, 0, 3, 0, 1, 0, 1, 3, 3, 4, 4, 1, 0, 1, 2, 2, 1, 3,
|
||||
},
|
||||
{
|
||||
0, 1, 0, 4, 0, 4, 0, 3, 0, 1, 3, 3, 3, 2, 3, 1, 1, 0, 3, 0, 3,
|
||||
3, 4, 3, 2, 4, 2, 0, 1, 0, 4, 3, 2, 0, 4, 3, 0, 5, 3, 3, 2, 4,
|
||||
4, 4, 3, 3, 3, 4, 0, 1, 3, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 4, 2,
|
||||
3, 3, 3, 0, 3, 0, 0, 0, 4, 4, 4, 5, 3, 2, 0, 3, 3, 0, 3, 5,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 3, 0, 0, 0, 3, 0, 1, 3, 0, 2, 0, 0, 0, 1, 0, 3, 1, 1,
|
||||
3, 3, 0, 0, 3, 0, 0, 3, 0, 2, 3, 1, 0, 3, 1, 0, 3, 3, 2, 0, 4,
|
||||
2, 2, 0, 2, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1,
|
||||
2, 0, 1, 0, 1, 0, 0, 0, 1, 3, 1, 2, 0, 0, 0, 1, 0, 0, 1, 4,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 3, 0, 5, 0, 1, 0, 2, 4, 3, 1, 3, 3, 2, 1, 1, 5, 2, 1,
|
||||
0, 5, 1, 2, 0, 0, 0, 3, 3, 2, 2, 3, 2, 4, 3, 0, 0, 3, 3, 1, 3,
|
||||
3, 0, 2, 5, 3, 4, 0, 3, 3, 0, 1, 2, 0, 2, 2, 0, 3, 2, 0, 2, 2,
|
||||
3, 3, 3, 0, 2, 0, 1, 0, 3, 4, 4, 2, 5, 4, 0, 3, 0, 0, 3, 5,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 3, 0, 3, 0, 1, 0, 3, 3, 3, 3, 0, 3, 0, 2, 0, 2, 1, 1,
|
||||
0, 2, 0, 1, 0, 0, 0, 2, 1, 0, 0, 1, 0, 3, 2, 0, 0, 3, 3, 1, 2,
|
||||
3, 1, 0, 3, 3, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 3,
|
||||
1, 2, 3, 0, 3, 0, 1, 0, 3, 2, 1, 0, 4, 3, 0, 1, 1, 0, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 5, 0, 3, 0, 3, 0, 4, 5, 5, 4, 3, 5, 3, 4, 3, 5, 3, 3,
|
||||
2, 5, 3, 4, 4, 4, 3, 4, 3, 4, 5, 5, 3, 4, 4, 3, 4, 4, 5, 4, 4,
|
||||
4, 3, 4, 5, 5, 4, 2, 3, 4, 2, 3, 4, 0, 3, 3, 1, 4, 3, 2, 4, 3,
|
||||
3, 5, 5, 0, 3, 0, 3, 0, 5, 5, 5, 5, 4, 4, 0, 4, 0, 1, 4, 4,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 4, 0, 3, 0, 3, 0, 3, 5, 4, 4, 2, 3, 2, 5, 1, 3, 2, 5,
|
||||
1, 4, 2, 3, 2, 3, 3, 4, 3, 3, 3, 3, 2, 5, 4, 1, 3, 3, 5, 3, 4,
|
||||
4, 0, 4, 4, 3, 1, 1, 3, 1, 0, 2, 3, 0, 2, 3, 0, 3, 0, 0, 4, 3,
|
||||
1, 3, 4, 0, 3, 0, 2, 0, 4, 4, 4, 3, 4, 5, 0, 4, 0, 0, 3, 4,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 3, 0, 3, 1, 2, 0, 3, 4, 4, 3, 3, 3, 0, 2, 2, 4, 3, 3,
|
||||
1, 3, 3, 3, 1, 1, 0, 3, 1, 4, 3, 2, 3, 4, 4, 2, 4, 4, 4, 3, 4,
|
||||
4, 3, 2, 4, 4, 3, 1, 3, 3, 1, 3, 3, 0, 4, 1, 0, 2, 2, 1, 4, 3,
|
||||
2, 3, 3, 5, 4, 3, 3, 5, 4, 4, 3, 3, 0, 4, 0, 3, 2, 2, 4, 4,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 1, 0, 0, 0, 0, 0, 1, 2, 1, 3, 0, 0, 0, 0, 0, 2, 0, 1,
|
||||
2, 1, 0, 0, 1, 0, 0, 0, 0, 3, 0, 0, 1, 0, 1, 1, 3, 1, 0, 0, 0,
|
||||
1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
|
||||
2, 2, 0, 3, 4, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1,
|
||||
},
|
||||
{
|
||||
0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 4, 1, 4, 0, 3, 0, 4, 0, 3,
|
||||
0, 4, 0, 3, 0, 3, 0, 4, 1, 5, 1, 4, 0, 0, 3, 0, 5, 0, 5, 2, 0,
|
||||
1, 0, 0, 0, 2, 1, 4, 0, 1, 3, 0, 0, 3, 0, 0, 3, 1, 1, 4, 1, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
},
|
||||
{
|
||||
1, 4, 0, 5, 0, 3, 0, 2, 0, 3, 5, 4, 4, 3, 4, 3, 5, 3, 4, 3, 3,
|
||||
0, 4, 3, 3, 3, 3, 3, 3, 2, 4, 4, 3, 1, 3, 4, 4, 5, 4, 4, 3, 4,
|
||||
4, 1, 3, 5, 4, 3, 3, 3, 1, 2, 2, 3, 3, 1, 3, 1, 3, 3, 3, 5, 3,
|
||||
3, 4, 5, 0, 3, 0, 3, 0, 3, 4, 3, 4, 4, 3, 0, 3, 0, 2, 4, 3,
|
||||
},
|
||||
{
|
||||
0, 1, 0, 4, 0, 0, 0, 0, 0, 1, 4, 0, 4, 1, 4, 2, 4, 0, 3, 0, 1,
|
||||
0, 1, 0, 0, 0, 0, 0, 2, 0, 3, 1, 1, 1, 0, 3, 0, 0, 0, 1, 2, 1,
|
||||
0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 0, 3, 2,
|
||||
0, 2, 2, 0, 1, 0, 0, 0, 2, 3, 2, 3, 3, 0, 0, 0, 0, 2, 1, 0,
|
||||
},
|
||||
{
|
||||
0, 5, 1, 5, 0, 3, 0, 3, 0, 5, 4, 4, 5, 1, 5, 3, 3, 0, 4, 3, 4,
|
||||
3, 5, 3, 4, 3, 3, 2, 4, 3, 4, 3, 3, 0, 3, 3, 1, 4, 4, 3, 4, 4,
|
||||
4, 3, 4, 5, 5, 3, 2, 3, 1, 1, 3, 3, 1, 3, 1, 1, 3, 3, 2, 4, 5,
|
||||
3, 3, 5, 0, 4, 0, 3, 0, 4, 4, 3, 5, 3, 3, 0, 3, 4, 0, 4, 3,
|
||||
},
|
||||
{
|
||||
0, 5, 0, 5, 0, 3, 0, 2, 0, 4, 4, 3, 5, 2, 4, 3, 3, 3, 4, 4, 4,
|
||||
3, 5, 3, 5, 3, 3, 1, 4, 0, 4, 3, 3, 0, 3, 3, 0, 4, 4, 4, 4, 5,
|
||||
4, 3, 3, 5, 5, 3, 2, 3, 1, 2, 3, 2, 0, 1, 0, 0, 3, 2, 2, 4, 4,
|
||||
3, 1, 5, 0, 4, 0, 3, 0, 4, 3, 1, 3, 2, 1, 0, 3, 3, 0, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 5, 0, 5, 0, 4, 0, 4, 5, 5, 5, 3, 4, 3, 3, 2, 5, 4, 4,
|
||||
3, 5, 3, 5, 3, 4, 0, 4, 3, 4, 4, 3, 2, 4, 4, 3, 4, 5, 4, 4, 5,
|
||||
5, 0, 3, 5, 5, 4, 1, 3, 3, 2, 3, 3, 1, 3, 1, 0, 4, 3, 1, 4, 4,
|
||||
3, 4, 5, 0, 4, 0, 2, 0, 4, 3, 4, 4, 3, 3, 0, 4, 0, 0, 5, 5,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 4, 0, 5, 0, 1, 1, 3, 3, 4, 4, 3, 4, 1, 3, 0, 5, 1, 3,
|
||||
0, 3, 1, 3, 1, 1, 0, 3, 0, 3, 3, 4, 0, 4, 3, 0, 4, 4, 4, 3, 4,
|
||||
4, 0, 3, 5, 4, 1, 0, 3, 0, 0, 2, 3, 0, 3, 1, 0, 3, 1, 0, 3, 2,
|
||||
1, 3, 5, 0, 3, 0, 1, 0, 3, 2, 3, 3, 4, 4, 0, 2, 2, 0, 4, 4,
|
||||
},
|
||||
{
|
||||
2, 4, 0, 5, 0, 4, 0, 3, 0, 4, 5, 5, 4, 3, 5, 3, 5, 3, 5, 3, 5,
|
||||
2, 5, 3, 4, 3, 3, 4, 3, 4, 5, 3, 2, 1, 5, 4, 3, 2, 3, 4, 5, 3,
|
||||
4, 1, 2, 5, 4, 3, 0, 3, 3, 0, 3, 2, 0, 2, 3, 0, 4, 1, 0, 3, 4,
|
||||
3, 3, 5, 0, 3, 0, 1, 0, 4, 5, 5, 5, 4, 3, 0, 4, 2, 0, 3, 5,
|
||||
},
|
||||
{
|
||||
0, 5, 0, 4, 0, 4, 0, 2, 0, 5, 4, 3, 4, 3, 4, 3, 3, 3, 4, 3, 4,
|
||||
2, 5, 3, 5, 3, 4, 1, 4, 3, 4, 4, 4, 0, 3, 5, 0, 4, 4, 4, 4, 5,
|
||||
3, 1, 3, 4, 5, 3, 3, 3, 3, 3, 3, 3, 0, 2, 2, 0, 3, 3, 2, 4, 3,
|
||||
3, 3, 5, 3, 4, 1, 3, 3, 5, 3, 2, 0, 0, 0, 0, 4, 3, 1, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 1, 0, 3, 0, 3, 0, 1, 0, 1, 3, 3, 3, 2, 3, 3, 3, 0, 3, 0, 0,
|
||||
0, 3, 1, 3, 0, 0, 0, 2, 2, 2, 3, 0, 0, 3, 2, 0, 1, 2, 4, 1, 3,
|
||||
3, 0, 0, 3, 3, 3, 0, 1, 0, 0, 2, 1, 0, 0, 3, 0, 3, 1, 0, 3, 0,
|
||||
0, 1, 3, 0, 2, 0, 1, 0, 3, 3, 1, 3, 3, 0, 0, 1, 1, 0, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 3, 0, 2, 1, 4, 0, 2, 2, 3, 1, 1, 3, 1, 1, 0, 2, 0, 3,
|
||||
1, 2, 3, 1, 3, 0, 0, 1, 0, 4, 3, 2, 3, 3, 3, 1, 4, 2, 3, 3, 3,
|
||||
3, 1, 0, 3, 1, 4, 0, 1, 1, 0, 1, 2, 0, 1, 1, 0, 1, 1, 0, 3, 1,
|
||||
3, 2, 2, 0, 1, 0, 0, 0, 2, 3, 3, 3, 1, 0, 0, 0, 0, 0, 2, 3,
|
||||
},
|
||||
{
|
||||
0, 5, 0, 4, 0, 5, 0, 2, 0, 4, 5, 5, 3, 3, 4, 3, 3, 1, 5, 4, 4,
|
||||
2, 4, 4, 4, 3, 4, 2, 4, 3, 5, 5, 4, 3, 3, 4, 3, 3, 5, 5, 4, 5,
|
||||
5, 1, 3, 4, 5, 3, 1, 4, 3, 1, 3, 3, 0, 3, 3, 1, 4, 3, 1, 4, 5,
|
||||
3, 3, 5, 0, 4, 0, 3, 0, 5, 3, 3, 1, 4, 3, 0, 4, 0, 1, 5, 3,
|
||||
},
|
||||
{
|
||||
0, 5, 0, 5, 0, 4, 0, 2, 0, 4, 4, 3, 4, 3, 3, 3, 3, 3, 5, 4, 4,
|
||||
4, 4, 4, 4, 5, 3, 3, 5, 2, 4, 4, 4, 3, 4, 4, 3, 3, 4, 4, 5, 5,
|
||||
3, 3, 4, 3, 4, 3, 3, 4, 3, 3, 3, 3, 1, 2, 2, 1, 4, 3, 3, 5, 4,
|
||||
4, 3, 4, 0, 4, 0, 3, 0, 4, 4, 4, 4, 4, 1, 0, 4, 2, 0, 2, 4,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 4, 0, 3, 0, 1, 0, 3, 5, 2, 3, 0, 3, 0, 2, 1, 4, 2, 3,
|
||||
3, 4, 1, 4, 3, 3, 2, 4, 1, 3, 3, 3, 0, 3, 3, 0, 0, 3, 3, 3, 5,
|
||||
3, 3, 3, 3, 3, 2, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 1, 0, 0, 3, 1,
|
||||
2, 2, 3, 0, 3, 0, 2, 0, 4, 4, 3, 3, 4, 1, 0, 3, 0, 0, 2, 4,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1,
|
||||
0, 2, 0, 1, 0, 0, 0, 0, 0, 3, 1, 3, 0, 3, 2, 0, 0, 0, 1, 0, 3,
|
||||
2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 0, 2, 0, 0, 0, 0, 0, 0, 2,
|
||||
},
|
||||
{
|
||||
0, 2, 1, 3, 0, 2, 0, 2, 0, 3, 3, 3, 3, 1, 3, 1, 3, 3, 3, 3, 3,
|
||||
3, 4, 2, 2, 1, 2, 1, 4, 0, 4, 3, 1, 3, 3, 3, 2, 4, 3, 5, 4, 3,
|
||||
3, 3, 3, 3, 3, 3, 0, 1, 3, 0, 2, 0, 0, 1, 0, 0, 1, 0, 0, 4, 2,
|
||||
0, 2, 3, 0, 3, 3, 0, 3, 3, 4, 2, 3, 1, 4, 0, 1, 2, 0, 2, 3,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 3, 0, 1, 0, 3, 0, 2, 3, 3, 3, 0, 3, 1, 2, 0, 3, 3, 2,
|
||||
3, 3, 2, 3, 2, 3, 1, 3, 0, 4, 3, 2, 0, 3, 3, 1, 4, 3, 3, 2, 3,
|
||||
4, 3, 1, 3, 3, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 4, 1,
|
||||
1, 0, 3, 0, 3, 1, 0, 2, 3, 3, 3, 3, 3, 1, 0, 0, 2, 0, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 2, 0, 3, 0, 0, 0, 0, 0, 0,
|
||||
0, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 3, 1, 0, 1, 0, 1, 0, 0,
|
||||
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 3, 0, 2, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 3,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 3, 1, 3, 0, 3, 0, 2, 3, 3, 3, 1, 3, 1, 3, 1, 3, 1, 3,
|
||||
3, 3, 1, 3, 0, 2, 3, 1, 1, 4, 3, 3, 2, 3, 3, 1, 2, 2, 4, 1, 3,
|
||||
3, 0, 1, 4, 2, 3, 0, 1, 3, 0, 3, 0, 0, 1, 3, 0, 2, 0, 0, 3, 3,
|
||||
2, 1, 3, 0, 3, 0, 2, 0, 3, 4, 4, 4, 3, 1, 0, 3, 0, 0, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 1, 0, 2, 0, 0, 0, 1, 3, 2, 2, 1, 3, 0, 1, 1, 3, 0, 3,
|
||||
2, 3, 1, 2, 0, 2, 0, 1, 1, 3, 3, 3, 0, 3, 3, 1, 1, 2, 3, 2, 3,
|
||||
3, 1, 2, 3, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 2, 1,
|
||||
2, 1, 3, 0, 3, 0, 0, 0, 3, 4, 4, 4, 3, 2, 0, 2, 0, 0, 2, 4,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 3,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 3, 0, 2, 0, 3, 0, 3, 3, 3, 2, 3, 2, 2, 2, 0, 3, 1, 3,
|
||||
3, 3, 2, 3, 3, 0, 0, 3, 0, 3, 2, 2, 0, 2, 3, 1, 4, 3, 4, 3, 3,
|
||||
2, 3, 1, 5, 4, 4, 0, 3, 1, 2, 1, 3, 0, 3, 1, 1, 2, 0, 2, 3, 1,
|
||||
3, 1, 3, 0, 3, 0, 1, 0, 3, 3, 4, 4, 2, 1, 0, 2, 1, 0, 2, 4,
|
||||
},
|
||||
{
|
||||
0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 4, 2, 5, 1, 4, 0, 2, 0, 2, 1, 3,
|
||||
1, 4, 0, 2, 1, 0, 0, 2, 1, 4, 1, 1, 0, 3, 3, 0, 5, 1, 3, 2, 3,
|
||||
3, 1, 0, 3, 2, 3, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0,
|
||||
1, 0, 3, 0, 2, 0, 1, 0, 3, 3, 3, 4, 3, 3, 0, 0, 0, 0, 2, 3,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 1, 0, 0, 0, 0, 0, 3,
|
||||
},
|
||||
{
|
||||
0, 1, 0, 3, 0, 4, 0, 3, 0, 2, 4, 3, 1, 0, 3, 2, 2, 1, 3, 1, 2,
|
||||
2, 3, 1, 1, 1, 2, 1, 3, 0, 1, 2, 0, 1, 3, 2, 1, 3, 0, 5, 5, 1,
|
||||
0, 0, 1, 3, 2, 1, 0, 3, 0, 0, 1, 0, 0, 0, 0, 0, 3, 4, 0, 1, 1,
|
||||
1, 3, 2, 0, 2, 0, 1, 0, 2, 3, 3, 1, 2, 3, 0, 1, 0, 1, 0, 4,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 1, 0, 3, 0, 3, 0, 2, 2, 1, 0, 0, 4, 0, 3, 0, 3, 1, 3,
|
||||
0, 3, 0, 3, 0, 1, 0, 3, 0, 3, 1, 3, 0, 3, 3, 0, 0, 1, 2, 1, 1,
|
||||
1, 0, 1, 2, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2,
|
||||
1, 2, 0, 0, 2, 0, 0, 0, 0, 2, 3, 3, 3, 3, 0, 0, 0, 0, 1, 4,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 3, 0, 3, 0, 0, 0, 0, 3, 1, 1, 0, 3, 0, 1, 0, 2, 0, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 2, 0, 2, 3, 0, 0, 2, 2, 3, 1,
|
||||
2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 2, 0, 0, 0, 0, 2, 3,
|
||||
},
|
||||
{
|
||||
2, 4, 0, 5, 0, 5, 0, 4, 0, 3, 4, 3, 3, 3, 4, 3, 3, 3, 4, 3, 4,
|
||||
4, 5, 4, 5, 5, 5, 2, 3, 0, 5, 5, 4, 1, 5, 4, 3, 1, 5, 4, 3, 4,
|
||||
4, 3, 3, 4, 3, 3, 0, 3, 2, 0, 2, 3, 0, 3, 0, 0, 3, 3, 0, 5, 3,
|
||||
2, 3, 3, 0, 3, 0, 3, 0, 3, 4, 5, 4, 5, 3, 0, 4, 3, 0, 3, 4,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 3, 4, 3, 2, 3, 2, 3, 0, 4, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 0, 3, 2, 4, 3, 3, 1, 3, 4, 3, 4, 4, 4, 3, 4,
|
||||
4, 3, 2, 4, 4, 1, 0, 2, 0, 0, 1, 1, 0, 2, 0, 0, 3, 1, 0, 5, 3,
|
||||
2, 1, 3, 0, 3, 0, 1, 2, 4, 3, 2, 4, 3, 3, 0, 3, 2, 0, 4, 4,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 3, 0, 1, 0, 0, 0, 1, 4, 3, 3, 2, 3, 1, 3, 1, 4, 2, 3,
|
||||
2, 4, 2, 3, 4, 3, 0, 2, 2, 3, 3, 3, 0, 3, 3, 3, 0, 3, 4, 1, 3,
|
||||
3, 0, 3, 4, 3, 3, 0, 1, 1, 0, 1, 0, 0, 0, 4, 0, 3, 0, 0, 3, 1,
|
||||
2, 1, 3, 0, 4, 0, 1, 0, 4, 3, 3, 4, 3, 3, 0, 2, 0, 0, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 4, 0, 1, 0, 3, 0, 3, 4, 3, 3, 0, 3, 3, 3, 1, 3, 1, 3,
|
||||
3, 4, 3, 3, 3, 0, 0, 3, 1, 5, 3, 3, 1, 3, 3, 2, 5, 4, 3, 3, 4,
|
||||
5, 3, 2, 5, 3, 4, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 1, 1, 0, 4, 2,
|
||||
2, 1, 3, 0, 3, 0, 2, 0, 4, 4, 3, 5, 3, 2, 0, 1, 1, 0, 3, 4,
|
||||
},
|
||||
{
|
||||
0, 5, 0, 4, 0, 5, 0, 2, 0, 4, 4, 3, 3, 2, 3, 3, 3, 1, 4, 3, 4,
|
||||
1, 5, 3, 4, 3, 4, 0, 4, 2, 4, 3, 4, 1, 5, 4, 0, 4, 4, 4, 4, 5,
|
||||
4, 1, 3, 5, 4, 2, 1, 4, 1, 1, 3, 2, 0, 3, 1, 0, 3, 2, 1, 4, 3,
|
||||
3, 3, 4, 0, 4, 0, 3, 0, 4, 4, 4, 3, 3, 3, 0, 4, 2, 0, 3, 4,
|
||||
},
|
||||
{
|
||||
1, 4, 0, 4, 0, 3, 0, 1, 0, 3, 3, 3, 1, 1, 3, 3, 2, 2, 3, 3, 1,
|
||||
0, 3, 2, 2, 1, 2, 0, 3, 1, 2, 1, 2, 0, 3, 2, 0, 2, 2, 3, 3, 4,
|
||||
3, 0, 3, 3, 1, 2, 0, 1, 1, 3, 1, 2, 0, 0, 3, 0, 1, 1, 0, 3, 2,
|
||||
2, 3, 3, 0, 3, 0, 0, 0, 2, 3, 3, 4, 3, 3, 0, 1, 0, 0, 1, 4,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 4, 0, 4, 0, 0, 0, 3, 4, 4, 3, 1, 4, 2, 3, 2, 3, 3, 3,
|
||||
1, 4, 3, 4, 0, 3, 0, 4, 2, 3, 3, 2, 2, 5, 4, 2, 1, 3, 4, 3, 4,
|
||||
3, 1, 3, 3, 4, 2, 0, 2, 1, 0, 3, 3, 0, 0, 2, 0, 3, 1, 0, 4, 4,
|
||||
3, 4, 3, 0, 4, 0, 1, 0, 2, 4, 4, 4, 4, 4, 0, 3, 2, 0, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 0, 0, 1, 0, 0, 0,
|
||||
1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 3, 0, 4, 0, 4, 0, 1, 3, 3, 3, 0, 4, 0, 2, 1, 2, 1, 1,
|
||||
1, 2, 0, 3, 1, 1, 0, 1, 0, 3, 1, 0, 0, 3, 3, 2, 0, 1, 1, 0, 0,
|
||||
0, 0, 0, 1, 0, 2, 0, 2, 2, 0, 3, 1, 0, 0, 1, 0, 1, 1, 0, 1, 2,
|
||||
0, 3, 0, 0, 0, 0, 1, 0, 0, 3, 3, 4, 3, 1, 0, 1, 0, 3, 0, 2,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 3, 0, 5, 0, 0, 0, 0, 1, 0, 2, 0, 3, 1, 0, 1, 3, 0, 0,
|
||||
0, 2, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 4, 0, 0, 0, 2, 3, 0, 1,
|
||||
4, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0,
|
||||
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 3, 0, 0, 0, 0, 0, 3,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 5, 0, 5, 0, 1, 0, 2, 4, 3, 3, 2, 5, 1, 3, 2, 3, 3, 3,
|
||||
0, 4, 1, 2, 0, 3, 0, 4, 0, 2, 2, 1, 1, 5, 3, 0, 0, 1, 4, 2, 3,
|
||||
2, 0, 3, 3, 3, 2, 0, 2, 4, 1, 1, 2, 0, 1, 1, 0, 3, 1, 0, 1, 3,
|
||||
1, 2, 3, 0, 2, 0, 0, 0, 1, 3, 5, 4, 4, 4, 0, 3, 0, 0, 1, 3,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 5, 0, 4, 0, 4, 0, 4, 5, 4, 3, 3, 4, 3, 3, 3, 4, 3, 4,
|
||||
4, 5, 3, 4, 5, 4, 2, 4, 2, 3, 4, 3, 1, 4, 4, 1, 3, 5, 4, 4, 5,
|
||||
5, 4, 4, 5, 5, 5, 2, 3, 3, 1, 4, 3, 1, 3, 3, 0, 3, 3, 1, 4, 3,
|
||||
4, 4, 4, 0, 3, 0, 4, 0, 3, 3, 4, 4, 5, 0, 0, 4, 3, 0, 4, 5,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 4, 0, 3, 0, 3, 0, 3, 4, 4, 4, 3, 3, 2, 4, 3, 4, 3, 4,
|
||||
3, 5, 3, 4, 3, 2, 1, 4, 2, 4, 4, 3, 1, 3, 4, 2, 4, 5, 5, 3, 4,
|
||||
5, 4, 1, 5, 4, 3, 0, 3, 2, 2, 3, 2, 1, 3, 1, 0, 3, 3, 3, 5, 3,
|
||||
3, 3, 5, 4, 4, 2, 3, 3, 4, 3, 3, 3, 2, 1, 0, 3, 2, 1, 4, 3,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 5, 0, 4, 0, 3, 0, 3, 5, 5, 3, 2, 4, 3, 4, 0, 5, 4, 4,
|
||||
1, 4, 4, 4, 3, 3, 3, 4, 3, 5, 5, 2, 3, 3, 4, 1, 2, 5, 5, 3, 5,
|
||||
5, 2, 3, 5, 5, 4, 0, 3, 2, 0, 3, 3, 1, 1, 5, 1, 4, 1, 0, 4, 3,
|
||||
2, 3, 5, 0, 4, 0, 3, 0, 5, 4, 3, 4, 3, 0, 0, 4, 1, 0, 4, 4,
|
||||
},
|
||||
{
|
||||
1, 3, 0, 4, 0, 2, 0, 2, 0, 2, 5, 5, 3, 3, 3, 3, 3, 0, 4, 2, 3,
|
||||
4, 4, 4, 3, 4, 0, 0, 3, 4, 5, 4, 3, 3, 3, 3, 2, 5, 5, 4, 5, 5,
|
||||
5, 4, 3, 5, 5, 5, 1, 3, 1, 0, 1, 0, 0, 3, 2, 0, 4, 2, 0, 5, 2,
|
||||
3, 2, 4, 1, 3, 0, 3, 0, 4, 5, 4, 5, 4, 3, 0, 4, 2, 0, 5, 4,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 4, 0, 5, 0, 3, 0, 3, 4, 4, 3, 2, 3, 2, 3, 3, 3, 3, 3,
|
||||
2, 4, 3, 3, 2, 2, 0, 3, 3, 3, 3, 3, 1, 3, 3, 3, 0, 4, 4, 3, 4,
|
||||
4, 1, 1, 4, 4, 2, 0, 3, 1, 0, 1, 1, 0, 4, 1, 0, 2, 3, 1, 3, 3,
|
||||
1, 3, 4, 0, 3, 0, 1, 0, 3, 1, 3, 0, 0, 1, 0, 2, 0, 0, 4, 4,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 3, 0, 2, 0, 3, 0, 1, 5, 4, 3, 3, 3, 1, 4, 2, 1, 2, 3,
|
||||
4, 4, 2, 4, 4, 5, 0, 3, 1, 4, 3, 4, 0, 4, 3, 3, 3, 2, 3, 2, 5,
|
||||
3, 4, 3, 2, 2, 3, 0, 0, 3, 0, 2, 1, 0, 1, 2, 0, 0, 0, 0, 2, 1,
|
||||
1, 3, 1, 0, 2, 0, 4, 0, 3, 4, 4, 4, 5, 2, 0, 2, 0, 0, 1, 3,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,
|
||||
1, 1, 0, 0, 1, 1, 0, 0, 0, 4, 2, 1, 1, 0, 1, 0, 3, 2, 0, 0, 3,
|
||||
1, 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0,
|
||||
1, 0, 0, 0, 2, 0, 0, 0, 1, 4, 0, 4, 2, 1, 0, 0, 0, 0, 0, 1,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
|
||||
0, 0, 1, 0, 1, 0, 0, 0, 0, 3, 1, 0, 0, 0, 2, 0, 2, 1, 0, 0, 1,
|
||||
2, 1, 0, 1, 1, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3,
|
||||
1, 0, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 4, 0, 4, 0, 3, 0, 4, 4, 3, 4, 2, 4, 3, 2, 0, 4, 4, 4,
|
||||
3, 5, 3, 5, 3, 3, 2, 4, 2, 4, 3, 4, 3, 1, 4, 0, 2, 3, 4, 4, 4,
|
||||
3, 3, 3, 4, 4, 4, 3, 4, 1, 3, 4, 3, 2, 1, 2, 1, 3, 3, 3, 4, 4,
|
||||
3, 3, 5, 0, 4, 0, 3, 0, 4, 3, 3, 3, 2, 1, 0, 3, 0, 0, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 3, 0, 3, 0, 3, 0, 3, 5, 5, 3, 3, 3, 3, 4, 3, 4, 3, 3,
|
||||
3, 4, 4, 4, 3, 3, 3, 3, 4, 3, 5, 3, 3, 1, 3, 2, 4, 5, 5, 5, 5,
|
||||
4, 3, 4, 5, 5, 3, 2, 2, 3, 3, 3, 3, 2, 3, 3, 1, 2, 3, 2, 4, 3,
|
||||
3, 3, 4, 0, 4, 0, 2, 0, 4, 3, 2, 2, 1, 2, 0, 3, 0, 0, 4, 1,
|
||||
},
|
||||
};
|
||||
|
||||
#define MINIMUM_DATA_THRESHOLD 4
|
||||
|
||||
void JapaneseContextAnalysis::HandleData(const char* aBuf, uint32_t aLen) {
|
||||
uint32_t charLen;
|
||||
int32_t order;
|
||||
uint32_t i;
|
||||
|
||||
if (mDone) return;
|
||||
|
||||
// The buffer we got is byte oriented, and a character may span in more than
|
||||
// one buffers. In case the last one or two byte in last buffer is not
|
||||
// complete, we record how many byte needed to complete that character and
|
||||
// skip these bytes here. We can choose to record those bytes as well and
|
||||
// analyse the character once it is complete, but since a character will not
|
||||
// make much difference, by simply skipping this character will simply our
|
||||
// logic and improve performance.
|
||||
for (i = mNeedToSkipCharNum; i < aLen;) {
|
||||
order = GetOrder(aBuf + i, &charLen);
|
||||
i += charLen;
|
||||
if (i > aLen) {
|
||||
mNeedToSkipCharNum = i - aLen;
|
||||
mLastCharOrder = -1;
|
||||
} else {
|
||||
if (order != -1 && mLastCharOrder != -1) {
|
||||
mTotalRel++;
|
||||
if (mTotalRel > MAX_REL_THRESHOLD) {
|
||||
mDone = true;
|
||||
break;
|
||||
}
|
||||
mRelSample[jp2CharContext[mLastCharOrder][order]]++;
|
||||
}
|
||||
mLastCharOrder = order;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void JapaneseContextAnalysis::Reset() {
|
||||
mTotalRel = 0;
|
||||
for (uint32_t i = 0; i < NUM_OF_CATEGORY; i++) mRelSample[i] = 0;
|
||||
mNeedToSkipCharNum = 0;
|
||||
mLastCharOrder = -1;
|
||||
mDone = false;
|
||||
mDataThreshold = 0;
|
||||
}
|
||||
#define DONT_KNOW (float)-1
|
||||
|
||||
float JapaneseContextAnalysis::GetConfidence(void) {
|
||||
// This is just one way to calculate confidence. It works well for me.
|
||||
if (mTotalRel > mDataThreshold)
|
||||
return ((float)(mTotalRel - mRelSample[0])) / mTotalRel;
|
||||
else
|
||||
return (float)DONT_KNOW;
|
||||
}
|
||||
|
||||
int32_t SJISContextAnalysis::GetOrder(const char* str, uint32_t* charLen) {
|
||||
// find out current char's byte length
|
||||
if (((unsigned char)*str >= (unsigned char)0x81 &&
|
||||
(unsigned char)*str <= (unsigned char)0x9f) ||
|
||||
((unsigned char)*str >= (unsigned char)0xe0 &&
|
||||
(unsigned char)*str <= (unsigned char)0xfc))
|
||||
*charLen = 2;
|
||||
else
|
||||
*charLen = 1;
|
||||
|
||||
// return its order if it is hiragana
|
||||
if (*str == '\202' && (unsigned char)*(str + 1) >= (unsigned char)0x9f &&
|
||||
(unsigned char)*(str + 1) <= (unsigned char)0xf1)
|
||||
return (unsigned char)*(str + 1) - (unsigned char)0x9f;
|
||||
return -1;
|
||||
}
|
||||
|
||||
int32_t EUCJPContextAnalysis::GetOrder(const char* str, uint32_t* charLen) {
|
||||
// find out current char's byte length
|
||||
if ((unsigned char)*str == (unsigned char)0x8e ||
|
||||
((unsigned char)*str >= (unsigned char)0xa1 &&
|
||||
(unsigned char)*str <= (unsigned char)0xfe))
|
||||
*charLen = 2;
|
||||
else if ((unsigned char)*str == (unsigned char)0x8f)
|
||||
*charLen = 3;
|
||||
else
|
||||
*charLen = 1;
|
||||
|
||||
// return its order if it is hiragana
|
||||
if ((unsigned char)*str == (unsigned char)0xa4 &&
|
||||
(unsigned char)*(str + 1) >= (unsigned char)0xa1 &&
|
||||
(unsigned char)*(str + 1) <= (unsigned char)0xf3)
|
||||
return (unsigned char)*(str + 1) - (unsigned char)0xa1;
|
||||
return -1;
|
||||
}
|
|
@ -0,0 +1,97 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef __JPCNTX_H__
|
||||
#define __JPCNTX_H__
|
||||
|
||||
#define NUM_OF_CATEGORY 6
|
||||
|
||||
#include "nscore.h"
|
||||
|
||||
#define ENOUGH_REL_THRESHOLD 100
|
||||
#define MAX_REL_THRESHOLD 1000
|
||||
|
||||
// hiragana frequency category table
|
||||
extern const uint8_t jp2CharContext[83][83];
|
||||
|
||||
class JapaneseContextAnalysis {
|
||||
public:
|
||||
JapaneseContextAnalysis() { Reset(); }
|
||||
|
||||
void HandleData(const char* aBuf, uint32_t aLen);
|
||||
|
||||
void HandleOneChar(const char* aStr, uint32_t aCharLen) {
|
||||
int32_t order;
|
||||
|
||||
// if we received enough data, stop here
|
||||
if (mTotalRel > MAX_REL_THRESHOLD) mDone = true;
|
||||
if (mDone) return;
|
||||
|
||||
// Only 2-bytes characters are of our interest
|
||||
order = (aCharLen == 2) ? GetOrder(aStr) : -1;
|
||||
if (order != -1 && mLastCharOrder != -1) {
|
||||
mTotalRel++;
|
||||
// count this sequence to its category counter
|
||||
mRelSample[jp2CharContext[mLastCharOrder][order]]++;
|
||||
}
|
||||
mLastCharOrder = order;
|
||||
}
|
||||
|
||||
float GetConfidence(void);
|
||||
void Reset();
|
||||
bool GotEnoughData() { return mTotalRel > ENOUGH_REL_THRESHOLD; }
|
||||
|
||||
protected:
|
||||
virtual int32_t GetOrder(const char* str, uint32_t* charLen) = 0;
|
||||
virtual int32_t GetOrder(const char* str) = 0;
|
||||
|
||||
// category counters, each integer counts sequences in its category
|
||||
uint32_t mRelSample[NUM_OF_CATEGORY];
|
||||
|
||||
// total sequence received
|
||||
uint32_t mTotalRel;
|
||||
|
||||
// Number of sequences needed to trigger detection
|
||||
uint32_t mDataThreshold;
|
||||
|
||||
// The order of previous char
|
||||
int32_t mLastCharOrder;
|
||||
|
||||
// if last byte in current buffer is not the last byte of a character, we
|
||||
// need to know how many byte to skip in next buffer.
|
||||
uint32_t mNeedToSkipCharNum;
|
||||
|
||||
// If this flag is set to true, detection is done and conclusion has been made
|
||||
bool mDone;
|
||||
};
|
||||
|
||||
class SJISContextAnalysis : public JapaneseContextAnalysis {
|
||||
// SJISContextAnalysis(){};
|
||||
protected:
|
||||
int32_t GetOrder(const char* str, uint32_t* charLen) override;
|
||||
|
||||
int32_t GetOrder(const char* str) override {
|
||||
// We only interested in Hiragana, so first byte is '\202'
|
||||
if (*str == '\202' && (unsigned char)*(str + 1) >= (unsigned char)0x9f &&
|
||||
(unsigned char)*(str + 1) <= (unsigned char)0xf1)
|
||||
return (unsigned char)*(str + 1) - (unsigned char)0x9f;
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
class EUCJPContextAnalysis : public JapaneseContextAnalysis {
|
||||
protected:
|
||||
int32_t GetOrder(const char* str, uint32_t* charLen) override;
|
||||
int32_t GetOrder(const char* str) override
|
||||
// We only interested in Hiragana, so first byte is '\244'
|
||||
{
|
||||
if (*str == '\244' && (unsigned char)*(str + 1) >= (unsigned char)0xa1 &&
|
||||
(unsigned char)*(str + 1) <= (unsigned char)0xf3)
|
||||
return (unsigned char)*(str + 1) - (unsigned char)0xa1;
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
#endif /* __JPCNTX_H__ */
|
|
@ -0,0 +1,21 @@
|
|||
# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
|
||||
# vim: set filetype=python:
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
UNIFIED_SOURCES += [
|
||||
'CharDistribution.cpp',
|
||||
'JpCntx.cpp',
|
||||
'nsCharSetProber.cpp',
|
||||
'nsEscCharsetProber.cpp',
|
||||
'nsEscSM.cpp',
|
||||
'nsEUCJPProber.cpp',
|
||||
'nsMBCSGroupProber.cpp',
|
||||
'nsMBCSSM.cpp',
|
||||
'nsSJISProber.cpp',
|
||||
'nsUniversalDetector.cpp',
|
||||
'nsUTF8Prober.cpp',
|
||||
]
|
||||
|
||||
FINAL_LIBRARY = 'xul'
|
|
@ -0,0 +1,88 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nsCharSetProber.h"
|
||||
|
||||
// This filter applies to all scripts which do not use English characters
|
||||
bool nsCharSetProber::FilterWithoutEnglishLetters(const char* aBuf,
|
||||
uint32_t aLen, char** newBuf,
|
||||
uint32_t& newLen) {
|
||||
char* newptr;
|
||||
char *prevPtr, *curPtr;
|
||||
|
||||
bool meetMSB = false;
|
||||
newptr = *newBuf = (char*)malloc(aLen);
|
||||
if (!newptr) return false;
|
||||
|
||||
for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf + aLen; curPtr++) {
|
||||
if (*curPtr & 0x80) {
|
||||
meetMSB = true;
|
||||
} else if (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') ||
|
||||
*curPtr > 'z') {
|
||||
// current char is a symbol, most likely a punctuation. we treat it as
|
||||
// segment delimiter
|
||||
if (meetMSB && curPtr > prevPtr)
|
||||
// this segment contains more than single symbol, and it has upper ASCII,
|
||||
// we need to keep it
|
||||
{
|
||||
while (prevPtr < curPtr) *newptr++ = *prevPtr++;
|
||||
prevPtr++;
|
||||
*newptr++ = ' ';
|
||||
meetMSB = false;
|
||||
} else // ignore current segment. (either because it is just a symbol or
|
||||
// just an English word)
|
||||
prevPtr = curPtr + 1;
|
||||
}
|
||||
}
|
||||
if (meetMSB && curPtr > prevPtr)
|
||||
while (prevPtr < curPtr) *newptr++ = *prevPtr++;
|
||||
|
||||
newLen = newptr - *newBuf;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// This filter applies to all scripts which contain both English characters and
|
||||
// upper ASCII characters.
|
||||
bool nsCharSetProber::FilterWithEnglishLetters(const char* aBuf, uint32_t aLen,
|
||||
char** newBuf,
|
||||
uint32_t& newLen) {
|
||||
// do filtering to reduce load to probers
|
||||
char* newptr;
|
||||
char *prevPtr, *curPtr;
|
||||
bool isInTag = false;
|
||||
|
||||
newptr = *newBuf = (char*)malloc(aLen);
|
||||
if (!newptr) return false;
|
||||
|
||||
for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf + aLen; curPtr++) {
|
||||
if (*curPtr == '>')
|
||||
isInTag = false;
|
||||
else if (*curPtr == '<')
|
||||
isInTag = true;
|
||||
|
||||
if (!(*curPtr & 0x80) &&
|
||||
(*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z')) {
|
||||
if (curPtr > prevPtr &&
|
||||
!isInTag) // Current segment contains more than just a symbol
|
||||
// and it is not inside a tag, keep it.
|
||||
{
|
||||
while (prevPtr < curPtr) *newptr++ = *prevPtr++;
|
||||
prevPtr++;
|
||||
*newptr++ = ' ';
|
||||
} else
|
||||
prevPtr = curPtr + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// If the current segment contains more than just a symbol
|
||||
// and it is not inside a tag then keep it.
|
||||
if (!isInTag)
|
||||
while (prevPtr < curPtr) *newptr++ = *prevPtr++;
|
||||
|
||||
newLen = newptr - *newBuf;
|
||||
|
||||
return true;
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
#ifndef nsCharSetProber_h__
|
||||
#define nsCharSetProber_h__
|
||||
|
||||
#include "nscore.h"
|
||||
|
||||
//#define DEBUG_chardet // Uncomment this for debug dump.
|
||||
|
||||
typedef enum {
|
||||
eDetecting = 0, // We are still detecting, no sure answer yet, but caller can
|
||||
// ask for confidence.
|
||||
eFoundIt = 1, // That's a positive answer
|
||||
eNotMe = 2 // Negative answer
|
||||
} nsProbingState;
|
||||
|
||||
#define SHORTCUT_THRESHOLD (float)0.95
|
||||
|
||||
class nsCharSetProber {
|
||||
public:
|
||||
virtual ~nsCharSetProber() {}
|
||||
virtual const char* GetCharSetName() = 0;
|
||||
virtual nsProbingState HandleData(const char* aBuf, uint32_t aLen) = 0;
|
||||
virtual nsProbingState GetState(void) = 0;
|
||||
virtual void Reset(void) = 0;
|
||||
virtual float GetConfidence(void) = 0;
|
||||
|
||||
#ifdef DEBUG_chardet
|
||||
virtual void DumpStatus(){};
|
||||
#endif
|
||||
|
||||
// Helper functions used in the Latin1 and Group probers.
|
||||
// both functions Allocate a new buffer for newBuf. This buffer should be
|
||||
// freed by the caller using free().
|
||||
// Both functions return false in case of memory allocation failure.
|
||||
static bool FilterWithoutEnglishLetters(const char* aBuf, uint32_t aLen,
|
||||
char** newBuf, uint32_t& newLen);
|
||||
static bool FilterWithEnglishLetters(const char* aBuf, uint32_t aLen,
|
||||
char** newBuf, uint32_t& newLen);
|
||||
};
|
||||
|
||||
#endif /* nsCharSetProber_h__ */
|
|
@ -0,0 +1,85 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
#ifndef nsCodingStateMachine_h__
|
||||
#define nsCodingStateMachine_h__
|
||||
|
||||
#include "mozilla/ArrayUtils.h"
|
||||
|
||||
#include "nsPkgInt.h"
|
||||
|
||||
/* Apart from these 3 generic states, machine states are specific to
|
||||
* each charset prober.
|
||||
*/
|
||||
#define eStart 0
|
||||
#define eError 1
|
||||
#define eItsMe 2
|
||||
|
||||
#define GETCLASS(c) GETFROMPCK(((unsigned char)(c)), mModel->classTable)
|
||||
|
||||
// state machine model
|
||||
typedef struct {
|
||||
nsPkgInt classTable;
|
||||
uint32_t classFactor;
|
||||
nsPkgInt stateTable;
|
||||
const uint32_t* charLenTable;
|
||||
#ifdef DEBUG
|
||||
const size_t charLenTableLength;
|
||||
#endif
|
||||
const char* name;
|
||||
} SMModel;
|
||||
|
||||
class nsCodingStateMachine {
|
||||
public:
|
||||
explicit nsCodingStateMachine(const SMModel* sm) : mModel(sm) {
|
||||
mCurrentState = eStart;
|
||||
}
|
||||
uint32_t NextState(char c) {
|
||||
// for each byte we get its class , if it is first byte, we also get byte
|
||||
// length
|
||||
uint32_t byteCls = GETCLASS(c);
|
||||
if (mCurrentState == eStart) {
|
||||
mCurrentBytePos = 0;
|
||||
MOZ_ASSERT(byteCls < mModel->charLenTableLength);
|
||||
mCurrentCharLen = mModel->charLenTable[byteCls];
|
||||
}
|
||||
// from byte's class and stateTable, we get its next state
|
||||
mCurrentState = GETFROMPCK(mCurrentState * mModel->classFactor + byteCls,
|
||||
mModel->stateTable);
|
||||
mCurrentBytePos++;
|
||||
return mCurrentState;
|
||||
}
|
||||
uint32_t GetCurrentCharLen(void) { return mCurrentCharLen; }
|
||||
void Reset(void) { mCurrentState = eStart; }
|
||||
const char* GetCodingStateMachine() { return mModel->name; }
|
||||
|
||||
protected:
|
||||
uint32_t mCurrentState;
|
||||
uint32_t mCurrentCharLen;
|
||||
uint32_t mCurrentBytePos;
|
||||
|
||||
const SMModel* mModel;
|
||||
};
|
||||
|
||||
extern const SMModel UTF8SMModel;
|
||||
extern const SMModel Big5SMModel;
|
||||
extern const SMModel EUCJPSMModel;
|
||||
extern const SMModel EUCKRSMModel;
|
||||
extern const SMModel EUCTWSMModel;
|
||||
extern const SMModel GB18030SMModel;
|
||||
extern const SMModel SJISSMModel;
|
||||
|
||||
extern const SMModel HZSMModel;
|
||||
extern const SMModel ISO2022CNSMModel;
|
||||
extern const SMModel ISO2022JPSMModel;
|
||||
extern const SMModel ISO2022KRSMModel;
|
||||
|
||||
#undef CHAR_LEN_TABLE
|
||||
#ifdef DEBUG
|
||||
# define CHAR_LEN_TABLE(x) x, mozilla::ArrayLength(x)
|
||||
#else
|
||||
# define CHAR_LEN_TABLE(x) x
|
||||
#endif
|
||||
|
||||
#endif /* nsCodingStateMachine_h__ */
|
|
@ -0,0 +1,60 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
// for japanese encoding, obeserve characteristic:
|
||||
// 1, kana character (or hankaku?) often have hight frequency of appereance
|
||||
// 2, kana character often exist in group
|
||||
// 3, certain combination of kana is never used in japanese language
|
||||
|
||||
#include "nsEUCJPProber.h"
|
||||
#include "nsDebug.h"
|
||||
|
||||
void nsEUCJPProber::Reset(void) {
|
||||
mCodingSM->Reset();
|
||||
mState = eDetecting;
|
||||
mContextAnalyser.Reset();
|
||||
mDistributionAnalyser.Reset();
|
||||
}
|
||||
|
||||
nsProbingState nsEUCJPProber::HandleData(const char* aBuf, uint32_t aLen) {
|
||||
NS_ASSERTION(aLen, "HandleData called with empty buffer");
|
||||
uint32_t codingState;
|
||||
|
||||
for (uint32_t i = 0; i < aLen; i++) {
|
||||
codingState = mCodingSM->NextState(aBuf[i]);
|
||||
if (codingState == eItsMe) {
|
||||
mState = eFoundIt;
|
||||
break;
|
||||
}
|
||||
if (codingState == eStart) {
|
||||
uint32_t charLen = mCodingSM->GetCurrentCharLen();
|
||||
|
||||
if (i == 0) {
|
||||
mLastChar[1] = aBuf[0];
|
||||
mContextAnalyser.HandleOneChar(mLastChar, charLen);
|
||||
mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
|
||||
} else {
|
||||
mContextAnalyser.HandleOneChar(aBuf + i - 1, charLen);
|
||||
mDistributionAnalyser.HandleOneChar(aBuf + i - 1, charLen);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mLastChar[0] = aBuf[aLen - 1];
|
||||
|
||||
if (mState == eDetecting)
|
||||
if (mContextAnalyser.GotEnoughData() &&
|
||||
GetConfidence() > SHORTCUT_THRESHOLD)
|
||||
mState = eFoundIt;
|
||||
|
||||
return mState;
|
||||
}
|
||||
|
||||
float nsEUCJPProber::GetConfidence(void) {
|
||||
float contxtCf = mContextAnalyser.GetConfidence();
|
||||
float distribCf = mDistributionAnalyser.GetConfidence();
|
||||
|
||||
return (contxtCf > distribCf ? contxtCf : distribCf);
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
// for S-JIS encoding, obeserve characteristic:
|
||||
// 1, kana character (or hankaku?) often have hight frequency of appereance
|
||||
// 2, kana character often exist in group
|
||||
// 3, certain combination of kana is never used in japanese language
|
||||
|
||||
#ifndef nsEUCJPProber_h__
|
||||
#define nsEUCJPProber_h__
|
||||
|
||||
#include "nsCharSetProber.h"
|
||||
#include "nsCodingStateMachine.h"
|
||||
#include "JpCntx.h"
|
||||
#include "CharDistribution.h"
|
||||
|
||||
class nsEUCJPProber : public nsCharSetProber {
|
||||
public:
|
||||
nsEUCJPProber() {
|
||||
mCodingSM = new nsCodingStateMachine(&EUCJPSMModel);
|
||||
Reset();
|
||||
}
|
||||
virtual ~nsEUCJPProber(void) { delete mCodingSM; }
|
||||
nsProbingState HandleData(const char* aBuf, uint32_t aLen) override;
|
||||
const char* GetCharSetName() override { return "EUC-JP"; }
|
||||
nsProbingState GetState(void) override { return mState; }
|
||||
void Reset(void) override;
|
||||
float GetConfidence(void) override;
|
||||
|
||||
protected:
|
||||
nsCodingStateMachine* mCodingSM;
|
||||
nsProbingState mState;
|
||||
|
||||
EUCJPContextAnalysis mContextAnalyser;
|
||||
EUCJPDistributionAnalysis mDistributionAnalyser;
|
||||
|
||||
char mLastChar[2];
|
||||
};
|
||||
|
||||
#endif /* nsEUCJPProber_h__ */
|
|
@ -0,0 +1,37 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nsEscCharsetProber.h"
|
||||
#include "nsUniversalDetector.h"
|
||||
|
||||
nsEscCharSetProber::nsEscCharSetProber() {
|
||||
mCodingSM = new nsCodingStateMachine(&ISO2022JPSMModel);
|
||||
mState = eDetecting;
|
||||
mDetectedCharset = nullptr;
|
||||
}
|
||||
|
||||
nsEscCharSetProber::~nsEscCharSetProber(void) {}
|
||||
|
||||
void nsEscCharSetProber::Reset(void) {
|
||||
mState = eDetecting;
|
||||
mCodingSM->Reset();
|
||||
mDetectedCharset = nullptr;
|
||||
}
|
||||
|
||||
nsProbingState nsEscCharSetProber::HandleData(const char* aBuf, uint32_t aLen) {
|
||||
uint32_t codingState;
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < aLen && mState == eDetecting; i++) {
|
||||
codingState = mCodingSM->NextState(aBuf[i]);
|
||||
if (codingState == eItsMe) {
|
||||
mState = eFoundIt;
|
||||
mDetectedCharset = mCodingSM->GetCodingStateMachine();
|
||||
return mState;
|
||||
}
|
||||
}
|
||||
|
||||
return mState;
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef nsEscCharSetProber_h__
|
||||
#define nsEscCharSetProber_h__
|
||||
|
||||
#include "nsCharSetProber.h"
|
||||
#include "nsCodingStateMachine.h"
|
||||
#include "nsAutoPtr.h"
|
||||
|
||||
class nsEscCharSetProber : public nsCharSetProber {
|
||||
public:
|
||||
nsEscCharSetProber();
|
||||
virtual ~nsEscCharSetProber(void);
|
||||
nsProbingState HandleData(const char* aBuf, uint32_t aLen) override;
|
||||
const char* GetCharSetName() override { return mDetectedCharset; }
|
||||
nsProbingState GetState(void) override { return mState; }
|
||||
void Reset(void) override;
|
||||
float GetConfidence(void) override { return (float)0.99; }
|
||||
|
||||
protected:
|
||||
void GetDistribution(uint32_t aCharLen, const char* aStr);
|
||||
|
||||
nsAutoPtr<nsCodingStateMachine> mCodingSM;
|
||||
nsProbingState mState;
|
||||
const char* mDetectedCharset;
|
||||
};
|
||||
|
||||
#endif /* nsEscCharSetProber_h__ */
|
|
@ -0,0 +1,70 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
#include "nsCodingStateMachine.h"
|
||||
|
||||
static const uint32_t ISO2022JP_cls[256 / 8] = {
|
||||
PCK4BITS(2, 0, 0, 0, 0, 0, 0, 0), // 00 - 07
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 2, 2), // 08 - 0f
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 10 - 17
|
||||
PCK4BITS(0, 0, 0, 1, 0, 0, 0, 0), // 18 - 1f
|
||||
PCK4BITS(0, 0, 0, 0, 7, 0, 0, 0), // 20 - 27
|
||||
PCK4BITS(3, 0, 0, 0, 0, 0, 0, 0), // 28 - 2f
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 30 - 37
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 38 - 3f
|
||||
PCK4BITS(6, 0, 4, 0, 8, 0, 0, 0), // 40 - 47
|
||||
PCK4BITS(0, 9, 5, 0, 0, 0, 0, 0), // 48 - 4f
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 50 - 57
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 58 - 5f
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 60 - 67
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 68 - 6f
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 70 - 77
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 78 - 7f
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 80 - 87
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 88 - 8f
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 90 - 97
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 98 - 9f
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a0 - a7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e0 - e7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e8 - ef
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // f0 - f7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2) // f8 - ff
|
||||
};
|
||||
|
||||
static const uint32_t ISO2022JP_st[9] = {
|
||||
PCK4BITS(eStart, 3, eError, eStart, eStart, eStart, eStart,
|
||||
eStart), // 00-07
|
||||
PCK4BITS(eStart, eStart, eError, eError, eError, eError, eError,
|
||||
eError), // 08-0f
|
||||
PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe,
|
||||
eItsMe), // 10-17
|
||||
PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eError,
|
||||
eError), // 18-1f
|
||||
PCK4BITS(eError, 5, eError, eError, eError, 4, eError, eError), // 20-27
|
||||
PCK4BITS(eError, eError, eError, 6, eItsMe, eError, eItsMe,
|
||||
eError), // 28-2f
|
||||
PCK4BITS(eError, eError, eError, eError, eError, eError, eItsMe,
|
||||
eItsMe), // 30-37
|
||||
PCK4BITS(eError, eError, eError, eItsMe, eError, eError, eError,
|
||||
eError), // 38-3f
|
||||
PCK4BITS(eError, eError, eError, eError, eItsMe, eError, eStart,
|
||||
eStart) // 40-47
|
||||
};
|
||||
|
||||
static const uint32_t ISO2022JPCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
const SMModel ISO2022JPSMModel = {
|
||||
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_cls},
|
||||
10,
|
||||
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_st},
|
||||
CHAR_LEN_TABLE(ISO2022JPCharLenTable),
|
||||
"ISO-2022-JP",
|
||||
};
|
|
@ -0,0 +1,149 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
#include <stdio.h>
|
||||
|
||||
#include "nsMBCSGroupProber.h"
|
||||
#include "nsUniversalDetector.h"
|
||||
|
||||
#if defined(DEBUG_chardet) || defined(DEBUG_jgmyers)
|
||||
const char* ProberName[] = {
|
||||
"UTF8",
|
||||
"SJIS",
|
||||
"EUCJP",
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
nsMBCSGroupProber::nsMBCSGroupProber() {
|
||||
mProbers[0] = new nsUTF8Prober();
|
||||
mProbers[1] = new nsSJISProber();
|
||||
mProbers[2] = new nsEUCJPProber();
|
||||
Reset();
|
||||
}
|
||||
|
||||
nsMBCSGroupProber::~nsMBCSGroupProber() {
|
||||
for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) {
|
||||
delete mProbers[i];
|
||||
}
|
||||
}
|
||||
|
||||
const char* nsMBCSGroupProber::GetCharSetName() {
|
||||
if (mBestGuess == -1) {
|
||||
GetConfidence();
|
||||
if (mBestGuess == -1) mBestGuess = 0;
|
||||
}
|
||||
return mProbers[mBestGuess]->GetCharSetName();
|
||||
}
|
||||
|
||||
void nsMBCSGroupProber::Reset(void) {
|
||||
mActiveNum = 0;
|
||||
for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) {
|
||||
if (mProbers[i]) {
|
||||
mProbers[i]->Reset();
|
||||
mIsActive[i] = true;
|
||||
++mActiveNum;
|
||||
} else
|
||||
mIsActive[i] = false;
|
||||
}
|
||||
mBestGuess = -1;
|
||||
mState = eDetecting;
|
||||
mKeepNext = 0;
|
||||
}
|
||||
|
||||
nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, uint32_t aLen) {
|
||||
nsProbingState st;
|
||||
uint32_t start = 0;
|
||||
uint32_t keepNext = mKeepNext;
|
||||
|
||||
// do filtering to reduce load to probers
|
||||
for (uint32_t pos = 0; pos < aLen; ++pos) {
|
||||
if (aBuf[pos] & 0x80) {
|
||||
if (!keepNext) start = pos;
|
||||
keepNext = 2;
|
||||
} else if (keepNext) {
|
||||
if (--keepNext == 0) {
|
||||
for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) {
|
||||
if (!mIsActive[i]) continue;
|
||||
st = mProbers[i]->HandleData(aBuf + start, pos + 1 - start);
|
||||
if (st == eFoundIt) {
|
||||
mBestGuess = i;
|
||||
mState = eFoundIt;
|
||||
return mState;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (keepNext) {
|
||||
for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) {
|
||||
if (!mIsActive[i]) continue;
|
||||
st = mProbers[i]->HandleData(aBuf + start, aLen - start);
|
||||
if (st == eFoundIt) {
|
||||
mBestGuess = i;
|
||||
mState = eFoundIt;
|
||||
return mState;
|
||||
}
|
||||
}
|
||||
}
|
||||
mKeepNext = keepNext;
|
||||
|
||||
return mState;
|
||||
}
|
||||
|
||||
float nsMBCSGroupProber::GetConfidence(void) {
|
||||
uint32_t i;
|
||||
float bestConf = 0.0, cf;
|
||||
|
||||
switch (mState) {
|
||||
case eFoundIt:
|
||||
return (float)0.99;
|
||||
case eNotMe:
|
||||
return (float)0.01;
|
||||
default:
|
||||
for (i = 0; i < NUM_OF_PROBERS; i++) {
|
||||
if (!mIsActive[i]) continue;
|
||||
cf = mProbers[i]->GetConfidence();
|
||||
if (bestConf < cf) {
|
||||
bestConf = cf;
|
||||
mBestGuess = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
return bestConf;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_chardet
|
||||
void nsMBCSGroupProber::DumpStatus() {
|
||||
uint32_t i;
|
||||
float cf;
|
||||
|
||||
GetConfidence();
|
||||
for (i = 0; i < NUM_OF_PROBERS; i++) {
|
||||
if (!mIsActive[i])
|
||||
printf(" MBCS inactive: [%s] (confidence is too low).\r\n",
|
||||
ProberName[i]);
|
||||
else {
|
||||
cf = mProbers[i]->GetConfidence();
|
||||
printf(" MBCS %1.3f: [%s]\r\n", cf, ProberName[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_jgmyers
|
||||
void nsMBCSGroupProber::GetDetectorState(
|
||||
nsUniversalDetector::DetectorState (
|
||||
&states)[nsUniversalDetector::NumDetectors],
|
||||
uint32_t& offset) {
|
||||
for (uint32_t i = 0; i < NUM_OF_PROBERS; ++i) {
|
||||
states[offset].name = ProberName[i];
|
||||
states[offset].isActive = mIsActive[i];
|
||||
states[offset].confidence =
|
||||
mIsActive[i] ? mProbers[i]->GetConfidence() : 0.0;
|
||||
++offset;
|
||||
}
|
||||
}
|
||||
#endif /* DEBUG_jgmyers */
|
|
@ -0,0 +1,43 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef nsMBCSGroupProber_h__
|
||||
#define nsMBCSGroupProber_h__
|
||||
|
||||
#include "nsSJISProber.h"
|
||||
#include "nsUTF8Prober.h"
|
||||
#include "nsEUCJPProber.h"
|
||||
|
||||
#define NUM_OF_PROBERS 3
|
||||
|
||||
class nsMBCSGroupProber : public nsCharSetProber {
|
||||
public:
|
||||
nsMBCSGroupProber();
|
||||
virtual ~nsMBCSGroupProber();
|
||||
nsProbingState HandleData(const char* aBuf, uint32_t aLen) override;
|
||||
const char* GetCharSetName() override;
|
||||
nsProbingState GetState(void) override { return mState; }
|
||||
void Reset(void) override;
|
||||
float GetConfidence(void) override;
|
||||
|
||||
#ifdef DEBUG_chardet
|
||||
void DumpStatus();
|
||||
#endif
|
||||
#ifdef DEBUG_jgmyers
|
||||
void GetDetectorState(nsUniversalDetector::DetectorState (
|
||||
&states)[nsUniversalDetector::NumDetectors],
|
||||
uint32_t& offset);
|
||||
#endif
|
||||
|
||||
protected:
|
||||
nsProbingState mState;
|
||||
nsCharSetProber* mProbers[NUM_OF_PROBERS];
|
||||
bool mIsActive[NUM_OF_PROBERS];
|
||||
int32_t mBestGuess;
|
||||
uint32_t mActiveNum;
|
||||
uint32_t mKeepNext;
|
||||
};
|
||||
|
||||
#endif /* nsMBCSGroupProber_h__ */
|
|
@ -0,0 +1,200 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
#include "nsCodingStateMachine.h"
|
||||
|
||||
/*
|
||||
Modification from frank tang's original work:
|
||||
. 0x00 is allowed as a legal character. Since some web pages contains this char
|
||||
in text stream.
|
||||
*/
|
||||
|
||||
static const uint32_t EUCJP_cls[256 / 8] = {
|
||||
// PCK4BITS(5,4,4,4,4,4,4,4), // 00 - 07
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 00 - 07
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 5, 5), // 08 - 0f
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 10 - 17
|
||||
PCK4BITS(4, 4, 4, 5, 4, 4, 4, 4), // 18 - 1f
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 20 - 27
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 28 - 2f
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 30 - 37
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 38 - 3f
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 40 - 47
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 48 - 4f
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 50 - 57
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 58 - 5f
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 60 - 67
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 68 - 6f
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 70 - 77
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 78 - 7f
|
||||
PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // 80 - 87
|
||||
PCK4BITS(5, 5, 5, 5, 5, 5, 1, 3), // 88 - 8f
|
||||
PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // 90 - 97
|
||||
PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // 98 - 9f
|
||||
PCK4BITS(5, 2, 2, 2, 2, 2, 2, 2), // a0 - a7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // e0 - e7
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // e8 - ef
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // f0 - f7
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 5) // f8 - ff
|
||||
};
|
||||
|
||||
static const uint32_t EUCJP_st[5] = {
|
||||
PCK4BITS(3, 4, 3, 5, eStart, eError, eError, eError), // 00-07
|
||||
PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe,
|
||||
eItsMe), // 08-0f
|
||||
PCK4BITS(eItsMe, eItsMe, eStart, eError, eStart, eError, eError,
|
||||
eError), // 10-17
|
||||
PCK4BITS(eError, eError, eStart, eError, eError, eError, 3,
|
||||
eError), // 18-1f
|
||||
PCK4BITS(3, eError, eError, eError, eStart, eStart, eStart,
|
||||
eStart) // 20-27
|
||||
};
|
||||
|
||||
static const uint32_t EUCJPCharLenTable[] = {2, 2, 2, 3, 1, 0};
|
||||
|
||||
const SMModel EUCJPSMModel = {
|
||||
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_cls},
|
||||
6,
|
||||
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_st},
|
||||
CHAR_LEN_TABLE(EUCJPCharLenTable),
|
||||
"EUC-JP",
|
||||
};
|
||||
|
||||
// sjis
|
||||
|
||||
static const uint32_t SJIS_cls[256 / 8] = {
|
||||
// PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 00 - 07
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 0, 0), // 08 - 0f
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 10 - 17
|
||||
PCK4BITS(1, 1, 1, 0, 1, 1, 1, 1), // 18 - 1f
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 20 - 27
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 28 - 2f
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 30 - 37
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 38 - 3f
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 40 - 47
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 48 - 4f
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 50 - 57
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 58 - 5f
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 60 - 67
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 68 - 6f
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 70 - 77
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 1), // 78 - 7f
|
||||
PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 80 - 87
|
||||
PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 88 - 8f
|
||||
PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 90 - 97
|
||||
PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 98 - 9f
|
||||
// 0xa0 is illegal in sjis encoding, but some pages does
|
||||
// contain such byte. We need to be more error forgiven.
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a0 - a7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df
|
||||
PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // e0 - e7
|
||||
PCK4BITS(3, 3, 3, 3, 3, 4, 4, 4), // e8 - ef
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // f0 - f7
|
||||
PCK4BITS(4, 4, 4, 4, 4, 0, 0, 0) // f8 - ff
|
||||
};
|
||||
|
||||
static const uint32_t SJIS_st[3] = {
|
||||
PCK4BITS(eError, eStart, eStart, 3, eError, eError, eError,
|
||||
eError), // 00-07
|
||||
PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe,
|
||||
eItsMe), // 08-0f
|
||||
PCK4BITS(eItsMe, eItsMe, eError, eError, eStart, eStart, eStart,
|
||||
eStart) // 10-17
|
||||
};
|
||||
|
||||
static const uint32_t SJISCharLenTable[] = {0, 1, 1, 2, 0, 0};
|
||||
|
||||
const SMModel SJISSMModel = {
|
||||
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_cls},
|
||||
6,
|
||||
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_st},
|
||||
CHAR_LEN_TABLE(SJISCharLenTable),
|
||||
"Shift_JIS",
|
||||
};
|
||||
|
||||
static const uint32_t UTF8_cls[256 / 8] = {
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 00 - 07
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 0, 0), // 08 - 0f
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 10 - 17
|
||||
PCK4BITS(1, 1, 1, 0, 1, 1, 1, 1), // 18 - 1f
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 20 - 27
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 28 - 2f
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 30 - 37
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 38 - 3f
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 40 - 47
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 48 - 4f
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 50 - 57
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 58 - 5f
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 60 - 67
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 68 - 6f
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 70 - 77
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 78 - 7f
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 80 - 87
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 88 - 8f
|
||||
PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 90 - 97
|
||||
PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 98 - 9f
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // a0 - a7
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // a8 - af
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // b0 - b7
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // b8 - bf
|
||||
PCK4BITS(0, 0, 5, 5, 5, 5, 5, 5), // c0 - c7
|
||||
PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // c8 - cf
|
||||
PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // d0 - d7
|
||||
PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // d8 - df
|
||||
PCK4BITS(6, 7, 7, 7, 7, 7, 7, 7), // e0 - e7
|
||||
PCK4BITS(7, 7, 7, 7, 7, 8, 7, 7), // e8 - ef
|
||||
PCK4BITS(9, 10, 10, 10, 11, 0, 0, 0), // f0 - f7
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0) // f8 - ff
|
||||
};
|
||||
|
||||
static const uint32_t UTF8_st[15] = {
|
||||
PCK4BITS(eError, eStart, eError, eError, eError, 3, 4, 5), // 00 - 07
|
||||
PCK4BITS(6, 7, 8, 9, eError, eError, eError, eError), // 08 - 0f
|
||||
PCK4BITS(eError, eError, eError, eError, eError, eError, eError,
|
||||
eError), // 10 - 17
|
||||
PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe,
|
||||
eItsMe), // 18 - 1f
|
||||
PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eError, eError, eStart,
|
||||
eStart), // 20 - 27
|
||||
PCK4BITS(eStart, eError, eError, eError, eError, eError, eError,
|
||||
eError), // 28 - 2f
|
||||
PCK4BITS(eError, eError, eError, eError, 3, eError, eError,
|
||||
eError), // 30 - 37
|
||||
PCK4BITS(eError, eError, eError, eError, eError, eError, 3, 3), // 38 - 3f
|
||||
PCK4BITS(3, eError, eError, eError, eError, eError, eError,
|
||||
eError), // 40 - 47
|
||||
PCK4BITS(eError, eError, 3, 3, eError, eError, eError, eError), // 48 - 4f
|
||||
PCK4BITS(eError, eError, eError, eError, eError, eError, 5, 5), // 50 - 57
|
||||
PCK4BITS(eError, eError, eError, eError, eError, eError, eError,
|
||||
eError), // 58 - 5f
|
||||
PCK4BITS(eError, eError, 5, 5, 5, eError, eError, eError), // 60 - 67
|
||||
PCK4BITS(eError, eError, eError, eError, eError, eError, 5,
|
||||
eError), // 68 - 6f
|
||||
PCK4BITS(eError, eError, eError, eError, eError, eError, eError,
|
||||
eError) // 70 - 77
|
||||
};
|
||||
|
||||
static const uint32_t UTF8CharLenTable[] = {0, 1, 0, 0, 0, 2, 3, 3, 3, 4, 4, 4};
|
||||
|
||||
const SMModel UTF8SMModel = {
|
||||
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_cls},
|
||||
12,
|
||||
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_st},
|
||||
CHAR_LEN_TABLE(UTF8CharLenTable),
|
||||
"UTF-8",
|
||||
};
|
|
@ -0,0 +1,43 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef nsPkgInt_h__
|
||||
#define nsPkgInt_h__
|
||||
#include "nscore.h"
|
||||
|
||||
typedef enum { eIdxSft4bits = 3, eIdxSft8bits = 2, eIdxSft16bits = 1 } nsIdxSft;
|
||||
|
||||
typedef enum { eSftMsk4bits = 7, eSftMsk8bits = 3, eSftMsk16bits = 1 } nsSftMsk;
|
||||
|
||||
typedef enum { eBitSft4bits = 2, eBitSft8bits = 3, eBitSft16bits = 4 } nsBitSft;
|
||||
|
||||
typedef enum {
|
||||
eUnitMsk4bits = 0x0000000FL,
|
||||
eUnitMsk8bits = 0x000000FFL,
|
||||
eUnitMsk16bits = 0x0000FFFFL
|
||||
} nsUnitMsk;
|
||||
|
||||
typedef struct nsPkgInt {
|
||||
nsIdxSft idxsft;
|
||||
nsSftMsk sftmsk;
|
||||
nsBitSft bitsft;
|
||||
nsUnitMsk unitmsk;
|
||||
const uint32_t* const data;
|
||||
} nsPkgInt;
|
||||
|
||||
#define PCK16BITS(a, b) ((uint32_t)(((b) << 16) | (a)))
|
||||
|
||||
#define PCK8BITS(a, b, c, d) \
|
||||
PCK16BITS(((uint32_t)(((b) << 8) | (a))), ((uint32_t)(((d) << 8) | (c))))
|
||||
|
||||
#define PCK4BITS(a, b, c, d, e, f, g, h) \
|
||||
PCK8BITS(((uint32_t)(((b) << 4) | (a))), ((uint32_t)(((d) << 4) | (c))), \
|
||||
((uint32_t)(((f) << 4) | (e))), ((uint32_t)(((h) << 4) | (g))))
|
||||
|
||||
#define GETFROMPCK(i, c) \
|
||||
(((((c).data)[(i) >> (c).idxsft]) >> (((i) & (c).sftmsk) << (c).bitsft)) & \
|
||||
(c).unitmsk)
|
||||
|
||||
#endif /* nsPkgInt_h__ */
|
|
@ -0,0 +1,59 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
// for S-JIS encoding, obeserve characteristic:
|
||||
// 1, kana character (or hankaku?) often have hight frequency of appereance
|
||||
// 2, kana character often exist in group
|
||||
// 3, certain combination of kana is never used in japanese language
|
||||
|
||||
#include "nsSJISProber.h"
|
||||
#include "nsDebug.h"
|
||||
|
||||
void nsSJISProber::Reset(void) {
|
||||
mCodingSM->Reset();
|
||||
mState = eDetecting;
|
||||
mContextAnalyser.Reset();
|
||||
mDistributionAnalyser.Reset();
|
||||
}
|
||||
|
||||
nsProbingState nsSJISProber::HandleData(const char* aBuf, uint32_t aLen) {
|
||||
NS_ASSERTION(aLen, "HandleData called with empty buffer");
|
||||
uint32_t codingState;
|
||||
|
||||
for (uint32_t i = 0; i < aLen; i++) {
|
||||
codingState = mCodingSM->NextState(aBuf[i]);
|
||||
if (codingState == eItsMe) {
|
||||
mState = eFoundIt;
|
||||
break;
|
||||
}
|
||||
if (codingState == eStart) {
|
||||
uint32_t charLen = mCodingSM->GetCurrentCharLen();
|
||||
if (i == 0) {
|
||||
mLastChar[1] = aBuf[0];
|
||||
mContextAnalyser.HandleOneChar(mLastChar + 2 - charLen, charLen);
|
||||
mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
|
||||
} else {
|
||||
mContextAnalyser.HandleOneChar(aBuf + i + 1 - charLen, charLen);
|
||||
mDistributionAnalyser.HandleOneChar(aBuf + i - 1, charLen);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mLastChar[0] = aBuf[aLen - 1];
|
||||
|
||||
if (mState == eDetecting)
|
||||
if (mContextAnalyser.GotEnoughData() &&
|
||||
GetConfidence() > SHORTCUT_THRESHOLD)
|
||||
mState = eFoundIt;
|
||||
|
||||
return mState;
|
||||
}
|
||||
|
||||
float nsSJISProber::GetConfidence(void) {
|
||||
float contxtCf = mContextAnalyser.GetConfidence();
|
||||
float distribCf = mDistributionAnalyser.GetConfidence();
|
||||
|
||||
return (contxtCf > distribCf ? contxtCf : distribCf);
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
// for S-JIS encoding, obeserve characteristic:
|
||||
// 1, kana character (or hankaku?) often have hight frequency of appereance
|
||||
// 2, kana character often exist in group
|
||||
// 3, certain combination of kana is never used in japanese language
|
||||
|
||||
#ifndef nsSJISProber_h__
|
||||
#define nsSJISProber_h__
|
||||
|
||||
#include "nsCharSetProber.h"
|
||||
#include "nsCodingStateMachine.h"
|
||||
#include "JpCntx.h"
|
||||
#include "CharDistribution.h"
|
||||
|
||||
class nsSJISProber : public nsCharSetProber {
|
||||
public:
|
||||
nsSJISProber() {
|
||||
mCodingSM = new nsCodingStateMachine(&SJISSMModel);
|
||||
Reset();
|
||||
}
|
||||
virtual ~nsSJISProber(void) { delete mCodingSM; }
|
||||
nsProbingState HandleData(const char* aBuf, uint32_t aLen) override;
|
||||
const char* GetCharSetName() override { return "Shift_JIS"; }
|
||||
nsProbingState GetState(void) override { return mState; }
|
||||
void Reset(void) override;
|
||||
float GetConfidence(void) override;
|
||||
|
||||
protected:
|
||||
nsCodingStateMachine* mCodingSM;
|
||||
nsProbingState mState;
|
||||
|
||||
SJISContextAnalysis mContextAnalyser;
|
||||
SJISDistributionAnalysis mDistributionAnalyser;
|
||||
|
||||
char mLastChar[2];
|
||||
};
|
||||
|
||||
#endif /* nsSJISProber_h__ */
|
|
@ -0,0 +1,43 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nsUTF8Prober.h"
|
||||
|
||||
void nsUTF8Prober::Reset(void) {
|
||||
mCodingSM->Reset();
|
||||
mNumOfMBChar = 0;
|
||||
mState = eDetecting;
|
||||
}
|
||||
|
||||
nsProbingState nsUTF8Prober::HandleData(const char* aBuf, uint32_t aLen) {
|
||||
uint32_t codingState;
|
||||
|
||||
for (uint32_t i = 0; i < aLen; i++) {
|
||||
codingState = mCodingSM->NextState(aBuf[i]);
|
||||
if (codingState == eItsMe) {
|
||||
mState = eFoundIt;
|
||||
break;
|
||||
}
|
||||
if (codingState == eStart) {
|
||||
if (mCodingSM->GetCurrentCharLen() >= 2) mNumOfMBChar++;
|
||||
}
|
||||
}
|
||||
|
||||
if (mState == eDetecting)
|
||||
if (GetConfidence() > SHORTCUT_THRESHOLD) mState = eFoundIt;
|
||||
return mState;
|
||||
}
|
||||
|
||||
#define ONE_CHAR_PROB (float)0.50
|
||||
|
||||
float nsUTF8Prober::GetConfidence(void) {
|
||||
float unlike = (float)0.99;
|
||||
|
||||
if (mNumOfMBChar < 6) {
|
||||
for (uint32_t i = 0; i < mNumOfMBChar; i++) unlike *= ONE_CHAR_PROB;
|
||||
return (float)1.0 - unlike;
|
||||
} else
|
||||
return (float)0.99;
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef nsUTF8Prober_h__
|
||||
#define nsUTF8Prober_h__
|
||||
|
||||
#include "nsCharSetProber.h"
|
||||
#include "nsCodingStateMachine.h"
|
||||
|
||||
class nsUTF8Prober : public nsCharSetProber {
|
||||
public:
|
||||
nsUTF8Prober() {
|
||||
mNumOfMBChar = 0;
|
||||
mCodingSM = new nsCodingStateMachine(&UTF8SMModel);
|
||||
Reset();
|
||||
}
|
||||
virtual ~nsUTF8Prober() { delete mCodingSM; }
|
||||
nsProbingState HandleData(const char* aBuf, uint32_t aLen) override;
|
||||
const char* GetCharSetName() override { return "UTF-8"; }
|
||||
nsProbingState GetState(void) override { return mState; }
|
||||
void Reset(void) override;
|
||||
float GetConfidence(void) override;
|
||||
|
||||
protected:
|
||||
nsCodingStateMachine* mCodingSM;
|
||||
nsProbingState mState;
|
||||
uint32_t mNumOfMBChar;
|
||||
};
|
||||
|
||||
#endif /* nsUTF8Prober_h__ */
|
|
@ -0,0 +1,179 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nscore.h"
|
||||
|
||||
#include "nsUniversalDetector.h"
|
||||
|
||||
#include "nsMBCSGroupProber.h"
|
||||
#include "nsEscCharsetProber.h"
|
||||
|
||||
nsUniversalDetector::nsUniversalDetector() {
|
||||
mDone = false;
|
||||
mBestGuess = -1; // illegal value as signal
|
||||
mInTag = false;
|
||||
mMultibyteProber = nullptr;
|
||||
mEscCharSetProber = nullptr;
|
||||
|
||||
mStart = true;
|
||||
mDetectedCharset = nullptr;
|
||||
mGotData = false;
|
||||
mInputState = ePureAscii;
|
||||
mLastChar = '\0';
|
||||
}
|
||||
|
||||
nsUniversalDetector::~nsUniversalDetector() {
|
||||
delete mMultibyteProber;
|
||||
delete mEscCharSetProber;
|
||||
}
|
||||
|
||||
void nsUniversalDetector::Reset() {
|
||||
mDone = false;
|
||||
mBestGuess = -1; // illegal value as signal
|
||||
mInTag = false;
|
||||
|
||||
mStart = true;
|
||||
mDetectedCharset = nullptr;
|
||||
mGotData = false;
|
||||
mInputState = ePureAscii;
|
||||
mLastChar = '\0';
|
||||
|
||||
if (mMultibyteProber) {
|
||||
mMultibyteProber->Reset();
|
||||
}
|
||||
|
||||
if (mEscCharSetProber) {
|
||||
mEscCharSetProber->Reset();
|
||||
}
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
#define SHORTCUT_THRESHOLD (float)0.95
|
||||
#define MINIMUM_THRESHOLD (float)0.20
|
||||
|
||||
nsresult nsUniversalDetector::HandleData(const char* aBuf, uint32_t aLen) {
|
||||
if (mDone) return NS_OK;
|
||||
|
||||
if (aLen > 0) mGotData = true;
|
||||
|
||||
// If the data starts with BOM, we know it is UTF
|
||||
if (mStart) {
|
||||
mStart = false;
|
||||
if (aLen >= 2) {
|
||||
switch (aBuf[0]) {
|
||||
case '\xEF':
|
||||
if ((aLen > 2) && ('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2])) {
|
||||
// EF BB BF UTF-8 encoded BOM
|
||||
mDetectedCharset = "UTF-8";
|
||||
}
|
||||
break;
|
||||
case '\xFE':
|
||||
if ('\xFF' == aBuf[1]) {
|
||||
// FE FF UTF-16, big endian BOM
|
||||
mDetectedCharset = "UTF-16BE";
|
||||
}
|
||||
break;
|
||||
case '\xFF':
|
||||
if ('\xFE' == aBuf[1]) {
|
||||
// FF FE UTF-16, little endian BOM
|
||||
mDetectedCharset = "UTF-16LE";
|
||||
}
|
||||
break;
|
||||
} // switch
|
||||
}
|
||||
|
||||
if (mDetectedCharset) {
|
||||
mDone = true;
|
||||
return NS_OK;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t i;
|
||||
for (i = 0; i < aLen; i++) {
|
||||
// other than 0xa0, if every othe character is ascii, the page is ascii
|
||||
if (aBuf[i] & '\x80' &&
|
||||
aBuf[i] != '\xA0') // Since many Ascii only page contains NBSP
|
||||
{
|
||||
// we got a non-ascii byte (high-byte)
|
||||
if (mInputState != eHighbyte) {
|
||||
// adjust state
|
||||
mInputState = eHighbyte;
|
||||
|
||||
// kill mEscCharSetProber if it is active
|
||||
if (mEscCharSetProber) {
|
||||
delete mEscCharSetProber;
|
||||
mEscCharSetProber = nullptr;
|
||||
}
|
||||
|
||||
// start multibyte charset prober
|
||||
if (!mMultibyteProber) {
|
||||
mMultibyteProber = new nsMBCSGroupProber();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// ok, just pure ascii so far
|
||||
if ((ePureAscii == mInputState) && (aBuf[i] == '\033')) {
|
||||
// found escape character
|
||||
mInputState = eEscAscii;
|
||||
}
|
||||
mLastChar = aBuf[i];
|
||||
}
|
||||
}
|
||||
|
||||
nsProbingState st;
|
||||
switch (mInputState) {
|
||||
case eEscAscii:
|
||||
if (nullptr == mEscCharSetProber) {
|
||||
mEscCharSetProber = new nsEscCharSetProber();
|
||||
if (nullptr == mEscCharSetProber) return NS_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
st = mEscCharSetProber->HandleData(aBuf, aLen);
|
||||
if (st == eFoundIt) {
|
||||
mDone = true;
|
||||
mDetectedCharset = mEscCharSetProber->GetCharSetName();
|
||||
}
|
||||
break;
|
||||
case eHighbyte:
|
||||
st = mMultibyteProber->HandleData(aBuf, aLen);
|
||||
if (st == eFoundIt) {
|
||||
mDone = true;
|
||||
mDetectedCharset = mMultibyteProber->GetCharSetName();
|
||||
return NS_OK;
|
||||
}
|
||||
break;
|
||||
|
||||
default: // pure ascii
|
||||
; // do nothing here
|
||||
}
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
void nsUniversalDetector::DataEnd() {
|
||||
if (!mGotData) {
|
||||
// we haven't got any data yet, return immediately
|
||||
// caller program sometimes call DataEnd before anything has been sent to
|
||||
// detector
|
||||
return;
|
||||
}
|
||||
|
||||
if (mDetectedCharset) {
|
||||
mDone = true;
|
||||
Report(mDetectedCharset);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (mInputState) {
|
||||
case eHighbyte: {
|
||||
// do not report anything because we are not confident of it, that's in
|
||||
// fact a negative answer
|
||||
if (mMultibyteProber->GetConfidence() > MINIMUM_THRESHOLD)
|
||||
Report(mMultibyteProber->GetCharSetName());
|
||||
} break;
|
||||
case eEscAscii:
|
||||
break;
|
||||
default:;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef nsUniversalDetector_h__
|
||||
#define nsUniversalDetector_h__
|
||||
|
||||
class nsCharSetProber;
|
||||
|
||||
typedef enum { ePureAscii = 0, eEscAscii = 1, eHighbyte = 2 } nsInputState;
|
||||
|
||||
class nsUniversalDetector {
|
||||
public:
|
||||
nsUniversalDetector();
|
||||
virtual ~nsUniversalDetector();
|
||||
virtual nsresult HandleData(const char* aBuf, uint32_t aLen);
|
||||
virtual void DataEnd(void);
|
||||
|
||||
protected:
|
||||
virtual void Report(const char* aCharset) = 0;
|
||||
virtual void Reset();
|
||||
nsInputState mInputState;
|
||||
bool mDone;
|
||||
bool mInTag;
|
||||
bool mStart;
|
||||
bool mGotData;
|
||||
char mLastChar;
|
||||
const char* mDetectedCharset;
|
||||
int32_t mBestGuess;
|
||||
uint32_t mLanguageFilter;
|
||||
|
||||
nsCharSetProber* mMultibyteProber;
|
||||
nsCharSetProber* mEscCharSetProber;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,8 @@
|
|||
# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
|
||||
# vim: set filetype=python:
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
DIRS += ['base', 'xpcom']
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
|
||||
# vim: set filetype=python:
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
UNIFIED_SOURCES += [
|
||||
'nsUdetXPCOMWrapper.cpp',
|
||||
]
|
||||
|
||||
FINAL_LIBRARY = 'xul'
|
||||
|
||||
LOCAL_INCLUDES += [
|
||||
'../base',
|
||||
]
|
|
@ -0,0 +1,75 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nscore.h"
|
||||
|
||||
#include "nsUniversalDetector.h"
|
||||
#include "nsUdetXPCOMWrapper.h"
|
||||
#include "nsCharSetProber.h" // for DumpStatus
|
||||
|
||||
#include "nsUniversalCharDetDll.h"
|
||||
//---- for XPCOM
|
||||
#include "nsIFactory.h"
|
||||
#include "nsISupports.h"
|
||||
#include "nsCOMPtr.h"
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
nsXPCOMDetector::nsXPCOMDetector() : nsUniversalDetector() {}
|
||||
//---------------------------------------------------------------------
|
||||
nsXPCOMDetector::~nsXPCOMDetector() {}
|
||||
//---------------------------------------------------------------------
|
||||
|
||||
NS_IMPL_ISUPPORTS(nsXPCOMDetector, nsICharsetDetector)
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
NS_IMETHODIMP nsXPCOMDetector::Init(nsICharsetDetectionObserver* aObserver) {
|
||||
NS_ASSERTION(mObserver == nullptr, "Init twice");
|
||||
if (nullptr == aObserver) return NS_ERROR_ILLEGAL_VALUE;
|
||||
|
||||
mObserver = aObserver;
|
||||
return NS_OK;
|
||||
}
|
||||
//----------------------------------------------------------
|
||||
NS_IMETHODIMP nsXPCOMDetector::DoIt(const char* aBuf, uint32_t aLen,
|
||||
bool* oDontFeedMe) {
|
||||
NS_ASSERTION(mObserver != nullptr, "have not init yet");
|
||||
|
||||
if ((nullptr == aBuf) || (nullptr == oDontFeedMe))
|
||||
return NS_ERROR_ILLEGAL_VALUE;
|
||||
|
||||
this->Reset();
|
||||
nsresult rv = this->HandleData(aBuf, aLen);
|
||||
if (NS_FAILED(rv)) return rv;
|
||||
|
||||
if (mDone) {
|
||||
if (mDetectedCharset) Report(mDetectedCharset);
|
||||
|
||||
*oDontFeedMe = true;
|
||||
}
|
||||
*oDontFeedMe = false;
|
||||
return NS_OK;
|
||||
}
|
||||
//----------------------------------------------------------
|
||||
NS_IMETHODIMP nsXPCOMDetector::Done() {
|
||||
NS_ASSERTION(mObserver != nullptr, "have not init yet");
|
||||
#ifdef DEBUG_chardet
|
||||
for (int32_t i = 0; i < NUM_OF_CHARSET_PROBERS; i++) {
|
||||
// If no data was received the array might stay filled with nulls
|
||||
// the way it was initialized in the constructor.
|
||||
if (mCharSetProbers[i]) mCharSetProbers[i]->DumpStatus();
|
||||
}
|
||||
#endif
|
||||
|
||||
this->DataEnd();
|
||||
return NS_OK;
|
||||
}
|
||||
//----------------------------------------------------------
|
||||
void nsXPCOMDetector::Report(const char* aCharset) {
|
||||
NS_ASSERTION(mObserver != nullptr, "have not init yet");
|
||||
#ifdef DEBUG_chardet
|
||||
printf("Universal Charset Detector report charset %s . \r\n", aCharset);
|
||||
#endif
|
||||
mObserver->Notify(aCharset, eBestAnswer);
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef _nsUdetXPCOMWrapper_h__
|
||||
#define _nsUdetXPCOMWrapper_h__
|
||||
#include "nsISupports.h"
|
||||
#include "nsICharsetDetector.h"
|
||||
#include "nsIStringCharsetDetector.h"
|
||||
#include "nsICharsetDetectionObserver.h"
|
||||
#include "nsCOMPtr.h"
|
||||
#include "nsIFactory.h"
|
||||
#include "nsUniversalDetector.h"
|
||||
|
||||
//=====================================================================
|
||||
class nsXPCOMDetector : public nsUniversalDetector, public nsICharsetDetector {
|
||||
NS_DECL_ISUPPORTS
|
||||
public:
|
||||
nsXPCOMDetector();
|
||||
NS_IMETHOD Init(nsICharsetDetectionObserver* aObserver) override;
|
||||
NS_IMETHOD DoIt(const char* aBuf, uint32_t aLen, bool* oDontFeedMe) override;
|
||||
NS_IMETHOD Done() override;
|
||||
|
||||
protected:
|
||||
virtual ~nsXPCOMDetector();
|
||||
virtual void Report(const char* aCharset) override;
|
||||
|
||||
private:
|
||||
nsCOMPtr<nsICharsetDetectionObserver> mObserver;
|
||||
};
|
||||
|
||||
//=====================================================================
|
||||
|
||||
class nsJAPSMDetector final : public nsXPCOMDetector {
|
||||
public:
|
||||
nsJAPSMDetector() : nsXPCOMDetector() {}
|
||||
};
|
||||
|
||||
#endif //_nsUdetXPCOMWrapper_h__
|
|
@ -0,0 +1,11 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef nsCharDetDll_h__
|
||||
#define nsCharDetDll_h__
|
||||
|
||||
#include "prtypes.h"
|
||||
|
||||
#endif /* nsCharDetDll_h__ */
|
|
@ -252,14 +252,6 @@ class Encoding final {
|
|||
return encoding_is_ascii_compatible(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether this is a Japanese legacy encoding.
|
||||
*/
|
||||
inline bool IsJapaneseLegacy() const {
|
||||
return this == SHIFT_JIS_ENCODING || this == EUC_JP_ENCODING ||
|
||||
this == ISO_2022_JP_ENCODING;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the _output encoding_ of this encoding. This is UTF-8 for
|
||||
* UTF-16BE, UTF-16LE and replacement and the encoding itself otherwise.
|
||||
|
|
|
@ -1,124 +0,0 @@
|
|||
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
// Mostly copied and pasted from
|
||||
// third_party/rust/shift_or_euc/src/lib.rs , so
|
||||
// "top-level directory of this distribution" above refers to
|
||||
// third_party/rust/shift_or_euc/
|
||||
|
||||
#ifndef mozilla_JapaneseDetector_h
|
||||
#define mozilla_JapaneseDetector_h
|
||||
|
||||
#include "mozilla/Encoding.h"
|
||||
|
||||
namespace mozilla {
|
||||
class JapaneseDetector;
|
||||
}; // namespace mozilla
|
||||
|
||||
#define SHIFT_OR_EUC_DETECTOR mozilla::JapaneseDetector
|
||||
|
||||
#include "shift_or_euc.h"
|
||||
|
||||
namespace mozilla {
|
||||
|
||||
/**
|
||||
* A Japanese legacy encoding detector for detecting between Shift_JIS,
|
||||
* EUC-JP, and, optionally, ISO-2022-JP _given_ the assumption that the
|
||||
* encoding is one of those.
|
||||
*
|
||||
* # Principle of Operation
|
||||
*
|
||||
* The detector is based on two observations:
|
||||
*
|
||||
* 1. The ISO-2022-JP escape sequences don't normally occur in Shift_JIS or
|
||||
* EUC-JP, so encountering such an escape sequence (before non-ASCII has been
|
||||
* encountered) can be taken as indication of ISO-2022-JP.
|
||||
* 2. When normal (full-with) kana or common kanji encoded as Shift_JIS is
|
||||
* decoded as EUC-JP, or vice versa, the result is either an error or
|
||||
* half-width katakana, and it's very uncommon for Japanese HTML to have
|
||||
* half-width katakana character before a normal kana or common kanji
|
||||
* character. Therefore, if decoding as Shift_JIS results in error or
|
||||
* have-width katakana, the detector decides that the content is EUC-JP, and
|
||||
* vice versa.
|
||||
*
|
||||
* # Failure Modes
|
||||
*
|
||||
* The detector gives the wrong answer if the text has a half-width katakana
|
||||
* character before normal kana or common kanji. Some uncommon kanji are
|
||||
* undecidable. (All JIS X 0208 Level 1 kanji are decidable.)
|
||||
*
|
||||
* The half-width katakana issue is mainly relevant for old 8-bit JIS X
|
||||
* 0201-only text files that would decode correctly as Shift_JIS but that the
|
||||
* detector detects as EUC-JP.
|
||||
*
|
||||
* The undecidable kanji issue does not realistically show up when a full
|
||||
* document is fed to the detector, because, realistically, in a full
|
||||
* document, there is at least one kana or common kanji. It can occur,
|
||||
* though, if the detector is only run on a prefix of a document and the
|
||||
* prefix only contains the title of the document. It is possible for
|
||||
* document title to consist entirely of undecidable kanji. (Indeed,
|
||||
* Japanese Wikipedia has articles with such titles.) If the detector is
|
||||
* undecided, a fallback to Shift_JIS should be used.
|
||||
*/
|
||||
class JapaneseDetector final {
|
||||
public:
|
||||
~JapaneseDetector() {}
|
||||
|
||||
static void operator delete(void* aDetector) {
|
||||
shift_or_euc_detector_free(reinterpret_cast<JapaneseDetector*>(aDetector));
|
||||
}
|
||||
|
||||
/**
|
||||
* Instantiates the detector. If `aAllow2022` is `true` the possible
|
||||
* guesses are Shift_JIS, EUC-JP, ISO-2022-JP, and undecided. If
|
||||
* `aAllow2022` is `false`, the possible guesses are Shift_JIS, EUC-JP,
|
||||
* and undecided.
|
||||
*/
|
||||
static inline UniquePtr<JapaneseDetector> Create(bool aAllow2022) {
|
||||
UniquePtr<JapaneseDetector> detector(shift_or_euc_detector_new(aAllow2022));
|
||||
return detector;
|
||||
}
|
||||
|
||||
/**
|
||||
* Feeds bytes to the detector. If `aLast` is `true` the end of the stream
|
||||
* is considered to occur immediately after the end of `aBuffer`.
|
||||
* Otherwise, the stream is expected to continue. `aBuffer` may be empty.
|
||||
*
|
||||
* If you're running the detector only on a prefix of a complete
|
||||
* document, _do not_ pass `aLast` as `true` after the prefix if the
|
||||
* stream as a whole still contains more content.
|
||||
*
|
||||
* Returns `SHIFT_JIS_ENCODING` if the detector guessed
|
||||
* Shift_JIS. Returns `EUC_JP_ENCODING` if the detector
|
||||
* guessed EUC-JP. Returns `ISO_2022_JP_ENCODING` if the
|
||||
* detector guessed ISO-2022-JP (only possible if `true` was passed as
|
||||
* `aAllow2022` when instantiating the detector). Returns `nullptr` if the
|
||||
* detector is undecided. If `nullptr` is returned even when passing `true`
|
||||
* as `aLast`, falling back to Shift_JIS is the best guess for Web
|
||||
* purposes.
|
||||
*
|
||||
* Do not call again after the method has returned non-`nullptr` or after
|
||||
* the method has been called with `true` as `aLast`. (Asserts if the
|
||||
* previous sentence isn't adhered to.)
|
||||
*/
|
||||
inline const mozilla::Encoding* Feed(Span<const uint8_t> aBuffer,
|
||||
bool aLast) {
|
||||
return shift_or_euc_detector_feed(this, aBuffer.Elements(),
|
||||
aBuffer.Length(), aLast);
|
||||
}
|
||||
|
||||
private:
|
||||
JapaneseDetector() = delete;
|
||||
JapaneseDetector(const JapaneseDetector&) = delete;
|
||||
JapaneseDetector& operator=(const JapaneseDetector&) = delete;
|
||||
};
|
||||
|
||||
}; // namespace mozilla
|
||||
|
||||
#endif // mozilla_JapaneseDetector_h
|
|
@ -26,13 +26,11 @@ DIRS += [
|
|||
|
||||
EXPORTS.mozilla += [
|
||||
'Encoding.h',
|
||||
'JapaneseDetector.h',
|
||||
]
|
||||
|
||||
EXPORTS += [
|
||||
'../third_party/rust/encoding_c/include/encoding_rs.h',
|
||||
'../third_party/rust/encoding_c/include/encoding_rs_statics.h',
|
||||
'../third_party/rust/shift_or_euc_c/include/shift_or_euc.h',
|
||||
]
|
||||
|
||||
with Files("**"):
|
||||
|
|
|
@ -3116,15 +3116,6 @@ VARCACHE_PREF(
|
|||
RelaxedAtomicBool, false
|
||||
)
|
||||
|
||||
// Whether ISO-2022-JP is a permitted content-based encoding detection
|
||||
// outcome.
|
||||
VARCACHE_PREF(
|
||||
Live,
|
||||
"intl.charset.detector.iso2022jp.allowed",
|
||||
intl_charset_detector_iso2022jp_allowed,
|
||||
bool, true
|
||||
)
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
// Graphic Layers prefs
|
||||
//---------------------------------------------------------------------------
|
||||
|
|
|
@ -95,6 +95,8 @@ FINAL_LIBRARY = 'xul'
|
|||
|
||||
LOCAL_INCLUDES += [
|
||||
'/dom/base',
|
||||
'/extensions/universalchardet/src/base',
|
||||
'/extensions/universalchardet/src/xpcom',
|
||||
'/intl/chardet',
|
||||
]
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include "nsIThreadRetargetableRequest.h"
|
||||
#include "nsPrintfCString.h"
|
||||
#include "nsNetUtil.h"
|
||||
#include "nsUdetXPCOMWrapper.h"
|
||||
#include "nsXULAppAPI.h"
|
||||
#include "mozilla/SchedulerGroup.h"
|
||||
#include "nsJSEnvironment.h"
|
||||
|
@ -155,7 +156,7 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
|
|||
mBomState(eBomState::BOM_SNIFFING_NOT_STARTED),
|
||||
mCharsetSource(kCharsetUninitialized),
|
||||
mEncoding(WINDOWS_1252_ENCODING),
|
||||
mFeedChardet(true),
|
||||
mFeedChardetIfEncoding(nullptr),
|
||||
mReparseForbidden(false),
|
||||
mLastBuffer(nullptr), // Will be filled when starting
|
||||
mExecutor(aExecutor),
|
||||
|
@ -180,8 +181,6 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
|
|||
mEventTarget(nsHtml5Module::GetStreamParserThread()->SerialEventTarget()),
|
||||
mExecutorFlusher(new nsHtml5ExecutorFlusher(aExecutor)),
|
||||
mLoadFlusher(new nsHtml5LoadFlusher(aExecutor)),
|
||||
mJapaneseDetector(mozilla::JapaneseDetector::Create(
|
||||
StaticPrefs::intl_charset_detector_iso2022jp_allowed())),
|
||||
mInitialEncodingWasFromParentFrame(false),
|
||||
mHasHadErrors(false),
|
||||
mDecodingLocalFileAsUTF8(false),
|
||||
|
@ -211,11 +210,16 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
|
|||
nsAutoCString detectorName;
|
||||
Preferences::GetLocalizedCString("intl.charset.detector", detectorName);
|
||||
if (!detectorName.IsEmpty()) {
|
||||
// We recognize one of the two magic strings for Russian and Ukranian.
|
||||
// We recognize one of the three magic strings for the following languages.
|
||||
if (detectorName.EqualsLiteral("ruprob")) {
|
||||
mChardet = new nsRUProbDetector();
|
||||
mFeedChardetIfEncoding = WINDOWS_1251_ENCODING;
|
||||
} else if (detectorName.EqualsLiteral("ukprob")) {
|
||||
mChardet = new nsUKProbDetector();
|
||||
mFeedChardetIfEncoding = WINDOWS_1251_ENCODING;
|
||||
} else if (detectorName.EqualsLiteral("ja_parallel_state_machine")) {
|
||||
mChardet = new nsJAPSMDetector();
|
||||
mFeedChardetIfEncoding = SHIFT_JIS_ENCODING;
|
||||
}
|
||||
if (mChardet) {
|
||||
(void)mChardet->Init(this);
|
||||
|
@ -259,7 +263,7 @@ NS_IMETHODIMP
|
|||
nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf) {
|
||||
NS_ASSERTION(IsParserThread(), "Wrong thread!");
|
||||
if (aConf == eBestAnswer || aConf == eSureAnswer) {
|
||||
mFeedChardet = false; // just in case
|
||||
mFeedChardetIfEncoding = nullptr; // just in case
|
||||
auto encoding =
|
||||
Encoding::ForLabelNoReplacement(nsDependentCString(aCharset));
|
||||
if (!encoding) {
|
||||
|
@ -267,8 +271,8 @@ nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf) {
|
|||
}
|
||||
if (HasDecoder()) {
|
||||
if (mEncoding == encoding) {
|
||||
MOZ_ASSERT(mCharsetSource < kCharsetFromAutoDetection,
|
||||
"Why are we running chardet at all?");
|
||||
NS_ASSERTION(mCharsetSource < kCharsetFromAutoDetection,
|
||||
"Why are we running chardet at all?");
|
||||
mCharsetSource = kCharsetFromAutoDetection;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
} else {
|
||||
|
@ -290,62 +294,6 @@ nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf) {
|
|||
return NS_OK;
|
||||
}
|
||||
|
||||
void nsHtml5StreamParser::FeedJapaneseDetector(Span<const uint8_t> aBuffer,
|
||||
bool aLast) {
|
||||
const Encoding* detected = mJapaneseDetector->Feed(aBuffer, aLast);
|
||||
if (!detected) {
|
||||
return;
|
||||
}
|
||||
mFeedChardet = false;
|
||||
if (mDecodingLocalFileAsUTF8 && detected != ISO_2022_JP_ENCODING) {
|
||||
return;
|
||||
}
|
||||
int32_t source = kCharsetFromAutoDetection;
|
||||
if (mCharsetSource == kCharsetFromParentForced ||
|
||||
mCharsetSource == kCharsetFromUserForced) {
|
||||
source = kCharsetFromUserForcedAutoDetection;
|
||||
}
|
||||
if (detected == mEncoding) {
|
||||
MOZ_ASSERT(mCharsetSource < source, "Why are we running chardet at all?");
|
||||
mCharsetSource = source;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
} else if (HasDecoder()) {
|
||||
// We've already committed to a decoder. Request a reload from the
|
||||
// docshell.
|
||||
mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(detected), source, 0);
|
||||
FlushTreeOpsAndDisarmTimer();
|
||||
Interrupt();
|
||||
} else {
|
||||
// Got a confident answer from the sniffing buffer. That code will
|
||||
// take care of setting up the decoder.
|
||||
mEncoding = WrapNotNull(detected);
|
||||
mCharsetSource = source;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
}
|
||||
}
|
||||
|
||||
void nsHtml5StreamParser::FeedDetector(Span<const uint8_t> aBuffer,
|
||||
bool aLast) {
|
||||
if (mEncoding->IsJapaneseLegacy()) {
|
||||
FeedJapaneseDetector(aBuffer, aLast);
|
||||
} else if (mEncoding == WINDOWS_1251_ENCODING && mChardet &&
|
||||
!mDecodingLocalFileAsUTF8) {
|
||||
if (!aBuffer.IsEmpty()) {
|
||||
bool dontFeed = false;
|
||||
mozilla::Unused << mChardet->DoIt((const char*)aBuffer.Elements(),
|
||||
aBuffer.Length(), &dontFeed);
|
||||
if (dontFeed) {
|
||||
mFeedChardet = false;
|
||||
}
|
||||
}
|
||||
if (aLast) {
|
||||
mozilla::Unused << mChardet->Done();
|
||||
}
|
||||
} else {
|
||||
mFeedChardet = false;
|
||||
}
|
||||
}
|
||||
|
||||
void nsHtml5StreamParser::SetViewSourceTitle(nsIURI* aURL) {
|
||||
if (recordreplay::IsRecordingOrReplaying()) {
|
||||
nsAutoCString spec;
|
||||
|
@ -387,11 +335,6 @@ nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
|
|||
MOZ_ASSERT(mEncoding != UTF_8_ENCODING);
|
||||
mUnicodeDecoder = UTF_8_ENCODING->NewDecoderWithBOMRemoval();
|
||||
} else {
|
||||
if (mCharsetSource >= kCharsetFromAutoDetection &&
|
||||
!(mCharsetSource == kCharsetFromUserForced ||
|
||||
mCharsetSource == kCharsetFromParentForced)) {
|
||||
mFeedChardet = false;
|
||||
}
|
||||
mDecodingLocalFileAsUTF8 = false;
|
||||
mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
|
||||
}
|
||||
|
@ -411,7 +354,7 @@ nsresult nsHtml5StreamParser::SetupDecodingFromBom(
|
|||
mDecodingLocalFileAsUTF8 = false;
|
||||
mUnicodeDecoder = mEncoding->NewDecoderWithoutBOMHandling();
|
||||
mCharsetSource = kCharsetFromByteOrderMark;
|
||||
mFeedChardet = false;
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
mSniffingBuffer = nullptr;
|
||||
mMetaScanner = nullptr;
|
||||
|
@ -469,7 +412,7 @@ void nsHtml5StreamParser::SniffBOMlessUTF16BasicLatin(
|
|||
}
|
||||
mCharsetSource = kCharsetFromIrreversibleAutoDetection;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
mFeedChardet = false;
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
mTreeBuilder->MaybeComplainAboutCharset("EncBomlessUtf16", true, 0);
|
||||
}
|
||||
|
||||
|
@ -536,38 +479,12 @@ static void HandleProcessingInstruction(void* aUserData,
|
|||
XML_StopParser(ud->mExpat, false);
|
||||
}
|
||||
|
||||
void nsHtml5StreamParser::FinalizeSniffingWithDetector(
|
||||
Span<const uint8_t> aFromSegment, uint32_t aCountToSniffingLimit,
|
||||
bool aEof) {
|
||||
if (mSniffingBuffer) {
|
||||
FeedDetector(MakeSpan(mSniffingBuffer.get(), mSniffingLength), false);
|
||||
}
|
||||
if (mFeedChardet && !aFromSegment.IsEmpty()) {
|
||||
// Avoid buffer boundary-dependent behavior when
|
||||
// reparsing is forbidden. If reparse is forbidden,
|
||||
// act as if we only saw the first 1024 bytes.
|
||||
// When reparsing isn't forbidden, buffer boundaries
|
||||
// can have an effect on whether the page is loaded
|
||||
// once or twice. :-(
|
||||
FeedDetector(mReparseForbidden ? aFromSegment.To(aCountToSniffingLimit)
|
||||
: aFromSegment,
|
||||
false);
|
||||
}
|
||||
if (mFeedChardet && aEof &&
|
||||
(!mReparseForbidden || aCountToSniffingLimit == aFromSegment.Length())) {
|
||||
// Don't signal EOF if reparse is forbidden and we didn't pass all input
|
||||
// to the detector above.
|
||||
mFeedChardet = false;
|
||||
FeedDetector(Span<const uint8_t>(), true);
|
||||
}
|
||||
}
|
||||
|
||||
nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
|
||||
uint32_t aCountToSniffingLimit,
|
||||
bool aEof) {
|
||||
MOZ_ASSERT(IsParserThread(), "Wrong thread!");
|
||||
MOZ_ASSERT(mCharsetSource < kCharsetFromUserForcedAutoDetection,
|
||||
"Should not finalize sniffing with strong decision already made.");
|
||||
NS_ASSERTION(IsParserThread(), "Wrong thread!");
|
||||
NS_ASSERTION(mCharsetSource < kCharsetFromParentForced,
|
||||
"Should not finalize sniffing when using forced charset.");
|
||||
if (mMode == VIEW_SOURCE_XML) {
|
||||
static const XML_Memory_Handling_Suite memsuite = {
|
||||
(void* (*)(size_t))moz_xmalloc, (void* (*)(void*, size_t))moz_xrealloc,
|
||||
|
@ -630,15 +547,50 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
|
|||
}
|
||||
|
||||
// meta scan failed.
|
||||
if (mCharsetSource < kCharsetFromMetaPrescan) {
|
||||
// Check for BOMless UTF-16 with Basic
|
||||
// Latin content for compat with IE. See bug 631751.
|
||||
SniffBOMlessUTF16BasicLatin(aFromSegment.To(aCountToSniffingLimit));
|
||||
if (mCharsetSource >= kCharsetFromHintPrevDoc) {
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
|
||||
}
|
||||
// Check for BOMless UTF-16 with Basic
|
||||
// Latin content for compat with IE. See bug 631751.
|
||||
SniffBOMlessUTF16BasicLatin(aFromSegment.To(aCountToSniffingLimit));
|
||||
// the charset may have been set now
|
||||
// maybe try chardet now;
|
||||
if (mFeedChardet) {
|
||||
FinalizeSniffingWithDetector(aFromSegment, aCountToSniffingLimit, aEof);
|
||||
if ((mFeedChardetIfEncoding == mEncoding) && !mDecodingLocalFileAsUTF8) {
|
||||
bool dontFeed;
|
||||
nsresult rv;
|
||||
if (mSniffingBuffer) {
|
||||
rv = mChardet->DoIt((const char*)mSniffingBuffer.get(), mSniffingLength,
|
||||
&dontFeed);
|
||||
if (dontFeed) {
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
}
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
}
|
||||
if ((mFeedChardetIfEncoding == mEncoding) && !aFromSegment.IsEmpty()) {
|
||||
rv = mChardet->DoIt(
|
||||
(const char*)aFromSegment.Elements(),
|
||||
// Avoid buffer boundary-dependent behavior when
|
||||
// reparsing is forbidden. If reparse is forbidden,
|
||||
// act as if we only saw the first 1024 bytes.
|
||||
// When reparsing isn't forbidden, buffer boundaries
|
||||
// can have an effect on whether the page is loaded
|
||||
// once or twice. :-(
|
||||
mReparseForbidden ? aCountToSniffingLimit : aFromSegment.Length(),
|
||||
&dontFeed);
|
||||
if (dontFeed) {
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
}
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
}
|
||||
if ((mFeedChardetIfEncoding == mEncoding) && (aEof || mReparseForbidden)) {
|
||||
// mReparseForbidden is checked so that we get to use the sniffing
|
||||
// buffer with the best guess so far if we aren't allowed to guess
|
||||
// better later.
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
rv = mChardet->Done();
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
}
|
||||
// fall thru; callback may have changed charset
|
||||
}
|
||||
if (mCharsetSource == kCharsetUninitialized) {
|
||||
|
@ -648,7 +600,7 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
|
|||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
} else if (mMode == LOAD_AS_DATA && mCharsetSource == kCharsetFromFallback) {
|
||||
NS_ASSERTION(mReparseForbidden, "Reparse should be forbidden for XHR");
|
||||
NS_ASSERTION(!mFeedChardet, "Should not feed chardet for XHR");
|
||||
NS_ASSERTION(!mFeedChardetIfEncoding, "Should not feed chardet for XHR");
|
||||
NS_ASSERTION(mEncoding == UTF_8_ENCODING, "XHR should default to UTF-8");
|
||||
// Now mark charset source as non-weak to signal that we have a decision
|
||||
mCharsetSource = kCharsetFromDocTypeDefault;
|
||||
|
@ -735,6 +687,7 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
|
|||
// earlier call to SetDocumentCharset(), since we didn't find a BOM and
|
||||
// overwrite mEncoding. (Note that if the user has overridden the charset,
|
||||
// we don't come here but check <meta> for XSS-dangerous charsets first.)
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
|
||||
}
|
||||
|
@ -766,16 +719,12 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
|
|||
(encoding->IsAsciiCompatible() ||
|
||||
encoding == ISO_2022_JP_ENCODING)) {
|
||||
// Honor override
|
||||
if (mEncoding->IsJapaneseLegacy()) {
|
||||
mFeedChardet = true;
|
||||
FinalizeSniffingWithDetector(aFromSegment, countToSniffingLimit,
|
||||
false);
|
||||
}
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
|
||||
aFromSegment);
|
||||
}
|
||||
mEncoding = WrapNotNull(encoding);
|
||||
mCharsetSource = kCharsetFromMetaPrescan;
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
|
||||
aFromSegment);
|
||||
|
@ -784,10 +733,6 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
|
|||
if (mCharsetSource == kCharsetFromParentForced ||
|
||||
mCharsetSource == kCharsetFromUserForced) {
|
||||
// meta not found, honor override
|
||||
if (mEncoding->IsJapaneseLegacy()) {
|
||||
mFeedChardet = true;
|
||||
FinalizeSniffingWithDetector(aFromSegment, countToSniffingLimit, false);
|
||||
}
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
|
||||
}
|
||||
return FinalizeSniffing(aFromSegment, countToSniffingLimit, false);
|
||||
|
@ -816,6 +761,7 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
|
|||
}
|
||||
mEncoding = WrapNotNull(encoding);
|
||||
mCharsetSource = kCharsetFromMetaPrescan;
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
|
||||
}
|
||||
|
@ -895,12 +841,6 @@ void nsHtml5StreamParser::ReDecodeLocalFile() {
|
|||
mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
|
||||
mHasHadErrors = false;
|
||||
|
||||
// We need the detector to start with fresh state.
|
||||
// Turn off ISO-2022-JP detection, because if this doc was
|
||||
// ISO-2022-JP, it would have already been detected.
|
||||
mJapaneseDetector = mozilla::JapaneseDetector::Create(false);
|
||||
mFeedChardet = true;
|
||||
|
||||
// Throw away previous decoded data
|
||||
mLastBuffer = mFirstBuffer;
|
||||
mLastBuffer->next = nullptr;
|
||||
|
@ -916,7 +856,7 @@ void nsHtml5StreamParser::ReDecodeLocalFile() {
|
|||
void nsHtml5StreamParser::CommitLocalFileToUTF8() {
|
||||
MOZ_ASSERT(mDecodingLocalFileAsUTF8);
|
||||
mDecodingLocalFileAsUTF8 = false;
|
||||
mFeedChardet = false;
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
mEncoding = UTF_8_ENCODING;
|
||||
mCharsetSource = kCharsetFromFileURLGuess;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
|
@ -1060,7 +1000,7 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
|
|||
// This is the old Gecko behavior but the HTML5 spec disagrees.
|
||||
// Don't reparse on POST.
|
||||
mReparseForbidden = true;
|
||||
mFeedChardet = false; // can't restart anyway
|
||||
mFeedChardetIfEncoding = nullptr; // can't restart anyway
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1091,10 +1031,8 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
|
|||
mInitialEncodingWasFromParentFrame = true;
|
||||
}
|
||||
|
||||
if (mCharsetSource >= kCharsetFromAutoDetection &&
|
||||
!(mCharsetSource == kCharsetFromParentForced ||
|
||||
mCharsetSource == kCharsetFromUserForced)) {
|
||||
mFeedChardet = false;
|
||||
if (mCharsetSource >= kCharsetFromAutoDetection) {
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
}
|
||||
|
||||
if (mCharsetSource < kCharsetFromUtf8OnlyMime) {
|
||||
|
@ -1103,11 +1041,11 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
|
|||
return NS_OK;
|
||||
}
|
||||
|
||||
// We are loading JSON/WebVTT/etc. into a browsing context.
|
||||
// There's no need to remove the BOM manually here, because
|
||||
// the UTF-8 decoder removes it.
|
||||
// We are reloading a document.open()ed doc or loading JSON/WebVTT/etc. into
|
||||
// a browsing context. In the latter case, there's no need to remove the
|
||||
// BOM manually here, because the UTF-8 decoder removes it.
|
||||
mReparseForbidden = true;
|
||||
mFeedChardet = false;
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
|
||||
// Instantiate the converter here to avoid BOM sniffing.
|
||||
mDecodingLocalFileAsUTF8 = false;
|
||||
|
@ -1147,9 +1085,8 @@ void nsHtml5StreamParser::DoStopRequest() {
|
|||
return;
|
||||
}
|
||||
}
|
||||
if (mFeedChardet) {
|
||||
mFeedChardet = false;
|
||||
FeedDetector(Span<uint8_t>(), true);
|
||||
if ((mFeedChardetIfEncoding == mEncoding) && !mDecodingLocalFileAsUTF8) {
|
||||
mChardet->Done();
|
||||
}
|
||||
|
||||
MOZ_ASSERT(mUnicodeDecoder,
|
||||
|
@ -1309,8 +1246,13 @@ void nsHtml5StreamParser::DoDataAvailable(Span<const uint8_t> aBuffer) {
|
|||
|
||||
nsresult rv;
|
||||
if (HasDecoder()) {
|
||||
if (mFeedChardet) {
|
||||
FeedDetector(aBuffer, false);
|
||||
if ((mFeedChardetIfEncoding == mEncoding) && !mDecodingLocalFileAsUTF8) {
|
||||
bool dontFeed;
|
||||
mChardet->DoIt((const char*)aBuffer.Elements(), aBuffer.Length(),
|
||||
&dontFeed);
|
||||
if (dontFeed) {
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
}
|
||||
}
|
||||
rv = WriteStreamBytes(aBuffer);
|
||||
} else {
|
||||
|
@ -1469,7 +1411,7 @@ const Encoding* nsHtml5StreamParser::PreferredForInternalEncodingDecl(
|
|||
}
|
||||
}
|
||||
mCharsetSource = kCharsetFromMetaTag; // become confident
|
||||
mFeedChardet = false; // don't feed chardet when confident
|
||||
mFeedChardetIfEncoding = nullptr; // don't feed chardet when confident
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -1508,7 +1450,7 @@ bool nsHtml5StreamParser::internalEncodingDeclaration(nsHtml5String aEncoding) {
|
|||
|
||||
// Avoid having the chardet ask for another restart after this restart
|
||||
// request.
|
||||
mFeedChardet = false;
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(encoding), kCharsetFromMetaTag,
|
||||
mTokenizer->getLineNumber());
|
||||
FlushTreeOpsAndDisarmTimer();
|
||||
|
|
|
@ -11,7 +11,6 @@
|
|||
#include "nsICharsetDetectionObserver.h"
|
||||
#include "nsHtml5MetaScanner.h"
|
||||
#include "mozilla/Encoding.h"
|
||||
#include "mozilla/JapaneseDetector.h"
|
||||
#include "nsHtml5TreeOpExecutor.h"
|
||||
#include "nsHtml5OwningUTF16Buffer.h"
|
||||
#include "nsIInputStream.h"
|
||||
|
@ -149,16 +148,6 @@ class nsHtml5StreamParser final : public nsICharsetDetectionObserver {
|
|||
|
||||
// Not from an external interface
|
||||
|
||||
/**
|
||||
* Pass a buffer to the JapaneseDetector.
|
||||
*/
|
||||
void FeedJapaneseDetector(mozilla::Span<const uint8_t> aBuffer, bool aLast);
|
||||
|
||||
/**
|
||||
* Pass a buffer to the Japanese or Cyrillic detector as appropriate.
|
||||
*/
|
||||
void FeedDetector(mozilla::Span<const uint8_t> aBuffer, bool aLast);
|
||||
|
||||
/**
|
||||
* Call this method once you've created a parser, and want to instruct it
|
||||
* about what charset to load
|
||||
|
@ -293,12 +282,6 @@ class nsHtml5StreamParser final : public nsICharsetDetectionObserver {
|
|||
*/
|
||||
void SniffBOMlessUTF16BasicLatin(mozilla::Span<const uint8_t> aFromSegment);
|
||||
|
||||
/**
|
||||
* Write the start of the stream to detector.
|
||||
*/
|
||||
void FinalizeSniffingWithDetector(mozilla::Span<const uint8_t> aFromSegment,
|
||||
uint32_t aCountToSniffingLimit, bool aEof);
|
||||
|
||||
/**
|
||||
* <meta charset> scan failed. Try chardet if applicable. After this, the
|
||||
* the parser will have some encoding even if a last resolt fallback.
|
||||
|
@ -428,9 +411,9 @@ class nsHtml5StreamParser final : public nsICharsetDetectionObserver {
|
|||
NotNull<const Encoding*> mEncoding;
|
||||
|
||||
/**
|
||||
* Whether the Cyrillic or Japanese detector should still be fed.
|
||||
* The character encoding that is the base expectation for detection.
|
||||
*/
|
||||
bool mFeedChardet;
|
||||
const Encoding* mFeedChardetIfEncoding;
|
||||
|
||||
/**
|
||||
* Whether reparse is forbidden
|
||||
|
@ -546,15 +529,10 @@ class nsHtml5StreamParser final : public nsICharsetDetectionObserver {
|
|||
nsCOMPtr<nsIRunnable> mLoadFlusher;
|
||||
|
||||
/**
|
||||
* The Cyrillic detector if enabled.
|
||||
* The chardet instance if chardet is enabled.
|
||||
*/
|
||||
nsCOMPtr<nsICharsetDetector> mChardet;
|
||||
|
||||
/**
|
||||
* The Japanese detector.
|
||||
*/
|
||||
mozilla::UniquePtr<mozilla::JapaneseDetector> mJapaneseDetector;
|
||||
|
||||
/**
|
||||
* Whether the initial charset source was kCharsetFromParentFrame
|
||||
*/
|
||||
|
|
|
@ -14,14 +14,14 @@
|
|||
#define kCharsetFromCache 5
|
||||
#define kCharsetFromParentFrame 6
|
||||
#define kCharsetFromAutoDetection 7
|
||||
#define kCharsetFromMetaPrescan 8 // this one and smaller: HTML5 Tentative
|
||||
#define kCharsetFromMetaTag 9 // this one and greater: HTML5 Confident
|
||||
#define kCharsetFromIrreversibleAutoDetection 10
|
||||
#define kCharsetFromChannel 11
|
||||
#define kCharsetFromOtherComponent 12
|
||||
#define kCharsetFromParentForced 13 // propagates to child frames
|
||||
#define kCharsetFromUserForced 14 // propagates to child frames
|
||||
#define kCharsetFromUserForcedAutoDetection 15
|
||||
#define kCharsetFromHintPrevDoc 8
|
||||
#define kCharsetFromMetaPrescan 9 // this one and smaller: HTML5 Tentative
|
||||
#define kCharsetFromMetaTag 10 // this one and greater: HTML5 Confident
|
||||
#define kCharsetFromIrreversibleAutoDetection 11
|
||||
#define kCharsetFromChannel 12
|
||||
#define kCharsetFromOtherComponent 13
|
||||
#define kCharsetFromParentForced 14 // propagates to child frames
|
||||
#define kCharsetFromUserForced 15 // propagates to child frames
|
||||
#define kCharsetFromByteOrderMark 16
|
||||
#define kCharsetFromUtf8OnlyMime 17 // For JSON, WebVTT and such
|
||||
#define kCharsetFromBuiltIn 18 // resource: URLs
|
||||
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -12,7 +12,7 @@
|
|||
|
||||
[package]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.17"
|
||||
version = "0.8.16"
|
||||
authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
|
||||
description = "A Gecko-oriented implementation of the Encoding Standard"
|
||||
homepage = "https://docs.rs/encoding_rs/"
|
||||
|
@ -35,7 +35,7 @@ optional = true
|
|||
version = "1.0"
|
||||
optional = true
|
||||
[dev-dependencies.bincode]
|
||||
version = "1.0"
|
||||
version = "0.8"
|
||||
|
||||
[dev-dependencies.serde_derive]
|
||||
version = "1.0"
|
||||
|
|
|
@ -404,10 +404,6 @@ To regenerate the generated code:
|
|||
|
||||
## Release Notes
|
||||
|
||||
### 0.8.17
|
||||
|
||||
* Update `bincode` (dev dependency) version requirement to 1.0.
|
||||
|
||||
### 0.8.16
|
||||
|
||||
* Switch from the `simd` crate to `packed_simd`.
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
feature = "cargo-clippy",
|
||||
allow(doc_markdown, inline_always, new_ret_no_self)
|
||||
)]
|
||||
#![doc(html_root_url = "https://docs.rs/encoding_rs/0.8.17")]
|
||||
#![doc(html_root_url = "https://docs.rs/encoding_rs/0.8.16")]
|
||||
|
||||
//! encoding_rs is a Gecko-oriented Free Software / Open Source implementation
|
||||
//! of the [Encoding Standard](https://encoding.spec.whatwg.org/) in Rust.
|
||||
|
@ -5635,7 +5635,7 @@ mod tests {
|
|||
let deserialized: Demo = serde_json::from_str(&serialized).unwrap();
|
||||
assert_eq!(deserialized, demo);
|
||||
|
||||
let bincoded = bincode::serialize(&demo).unwrap();
|
||||
let bincoded = bincode::serialize(&demo, bincode::Infinite).unwrap();
|
||||
let debincoded: Demo = bincode::deserialize(&bincoded[..]).unwrap();
|
||||
assert_eq!(debincoded, demo);
|
||||
}
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
{"files":{"CONTRIBUTING.md":"0e64fb3dd5a00e3fd528de6442de3f2ca851bd718c45cca0871aaf4eedac9ee1","COPYRIGHT":"3a7313aa2f19bf7095a2fd731c3d5e76f38d5e4640bd2a115d53032f24b2aa6c","Cargo.toml":"f9f41b76ecbe257a312ab09ed1208189b8dc9952d12d17a216fe2846d1d471c8","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"dac4dde23582d18b01701032860d8f8a1979fb2cf626060ca8de77e081a2a3d5","README.md":"b7148745a7ef59788e76fbe638d4b41c54dcaa1313a809f4630a020645f892a8","examples/detect.rs":"eb7239ccc802290ef24331db600ca1226198801dd86df86876b4b738ef4b8470","src/lib.rs":"f2a83db125d553af5c6fabae0487ef211aad62f2d93c4418dc510cbd425d472a"},"package":"f930dea4685b9803954b9d74cdc175c6d946a22f2eafe5aa2e9a58cdcae7da8c"}
|
|
@ -1,38 +0,0 @@
|
|||
If you send a pull request / patch, please observe the following.
|
||||
|
||||
## Licensing
|
||||
|
||||
Since this crate is dual-licensed,
|
||||
[section 5 of the Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0#contributions)
|
||||
is considered to apply in the sense of Contributions being automatically
|
||||
under the Apache License 2.0 or MIT dual license (see the `COPYRIGHT` file).
|
||||
That is, by the act of offering a Contribution, you place your Contribution
|
||||
under the Apache License 2.0 or MIT dual license stated in the `COPYRIGHT`
|
||||
file. Please do not contribute if you aren't willing or allowed to license your
|
||||
contributions in this manner.
|
||||
|
||||
You are encouraged to dedicate test code that you contribute to the Public
|
||||
Domain using the CC0 dedication. If you contribute test code that is not
|
||||
dedicated to the Public Domain, please be sure not to put it in a part of
|
||||
source code that the comments designate as being dedicated to the Public
|
||||
Domain.
|
||||
|
||||
## Copyright Notices
|
||||
|
||||
If you require the addition of your copyright notice, it's up to you to edit in
|
||||
your notice as part of your Contribution. Not adding a copyright notice is
|
||||
taken as a waiver of copyright notice.
|
||||
|
||||
## Compatibility with Stable Rust
|
||||
|
||||
Please ensure that your Contribution compiles with the latest stable-channel
|
||||
rustc.
|
||||
|
||||
## rustfmt
|
||||
|
||||
The `rustfmt` version used for this code is `rustfmt-nightly`. Please either
|
||||
use that version or avoid using `rustfmt` (so as not to reformat all the code).
|
||||
|
||||
## Unit tests
|
||||
|
||||
Please ensure that `cargo test` succeeds.
|
|
@ -1,9 +0,0 @@
|
|||
shift_or_euc is copyright 2018 Mozilla Foundation.
|
||||
|
||||
Licensed under the Apache License, Version 2.0
|
||||
<LICENSE-APACHE or
|
||||
https://www.apache.org/licenses/LICENSE-2.0> or the MIT
|
||||
license <LICENSE-MIT or https://opensource.org/licenses/MIT>,
|
||||
at your option. All files in the project carrying such
|
||||
notice may not be copied, modified, or distributed except
|
||||
according to those terms.
|
|
@ -1,30 +0,0 @@
|
|||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||
#
|
||||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g., crates.io) dependencies
|
||||
#
|
||||
# If you believe there's an error in this file please file an
|
||||
# issue against the rust-lang/cargo repository. If you're
|
||||
# editing this file be aware that the upstream Cargo.toml
|
||||
# will likely look very different (and much more reasonable)
|
||||
|
||||
[package]
|
||||
edition = "2018"
|
||||
name = "shift_or_euc"
|
||||
version = "0.1.0"
|
||||
authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
|
||||
description = "Detects among the Japanese legacy encodings"
|
||||
homepage = "https://docs.rs/shift_or_euc/"
|
||||
documentation = "https://docs.rs/shift_or_euc/"
|
||||
readme = "README.md"
|
||||
keywords = ["encoding", "web", "charset"]
|
||||
categories = ["text-processing", "encoding", "web-programming", "internationalization"]
|
||||
license = "MIT/Apache-2.0"
|
||||
repository = "https://github.com/hsivonen/shift_or_euc"
|
||||
[dependencies.encoding_rs]
|
||||
version = "0.8.17"
|
||||
|
||||
[dependencies.memchr]
|
||||
version = "2.2.0"
|
|
@ -1,202 +0,0 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -1,25 +0,0 @@
|
|||
Copyright (c) 2018 Mozilla Foundation
|
||||
|
||||
Permission is hereby granted, free of charge, to any
|
||||
person obtaining a copy of this software and associated
|
||||
documentation files (the "Software"), to deal in the
|
||||
Software without restriction, including without
|
||||
limitation the rights to use, copy, modify, merge,
|
||||
publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software
|
||||
is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice
|
||||
shall be included in all copies or substantial portions
|
||||
of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
||||
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
||||
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
|
@ -1,73 +0,0 @@
|
|||
# shift_or_euc
|
||||
|
||||
[![Apache 2 / MIT dual-licensed](https://img.shields.io/badge/license-Apache%202%20%2F%20MIT-blue.svg)](https://github.com/hsivonen/shift_or_euc/blob/master/COPYRIGHT)
|
||||
|
||||
A Japanese legacy encoding detector for detecting between Shift_JIS, EUC-JP,
|
||||
and, optionally, ISO-2022-JP _given_ the assumption that the encoding is one
|
||||
of those.
|
||||
|
||||
This detector is generally more accurate (but see below about the failure
|
||||
mode on half-width katakana) and decides much sooner than machine
|
||||
learning-based detectors. To decide EUC-JP, machine learning-based detectors
|
||||
try to gain confidence that the input looks like EUC-JP. To decide EUC-JP,
|
||||
this detector instead looks for two simple rule-based signs of the input not
|
||||
being Shift_JIS.
|
||||
|
||||
As a consequence of not containing machine learning tables, the binary size
|
||||
footprint that this crate adds on top of
|
||||
[`encoding_rs`](https://docs.rs/crate/encoding_rs) is tiny.
|
||||
|
||||
## Documentation
|
||||
|
||||
[API documentation on docs.rs](https://docs.rs/crate/shift_or_euc)
|
||||
|
||||
## Licensing
|
||||
|
||||
See the file named [COPYRIGHT](https://github.com/hsivonen/shift_or_euc/blob/master/COPYRIGHT).
|
||||
|
||||
## Sample Program Usage
|
||||
|
||||
1. [Install Rust](https://rustup.rs/)
|
||||
2. `git clone https://github.com/hsivonen/shift_or_euc`
|
||||
3. `cd shift_or_euc`
|
||||
4. `cargo run --example detect PATH_TO_FILE`
|
||||
|
||||
The program prints one of:
|
||||
|
||||
* Shift_JIS
|
||||
* EUC-JP
|
||||
* ISO-2022-JP
|
||||
* Undecided
|
||||
|
||||
## Principle of Operation
|
||||
|
||||
The detector is based on two observations:
|
||||
|
||||
1. The ISO-2022-JP escape sequences don't normally occur in Shift_JIS or
|
||||
EUC-JP, so encountering such an escape sequence (before non-ASCII has been
|
||||
encountered) can be taken as indication of ISO-2022-JP.
|
||||
2. When normal (full-with) kana or common kanji encoded as Shift_JIS is
|
||||
decoded as EUC-JP, or vice versa, the result is either an error or half-width
|
||||
katakana, and it's very uncommon for Japanese HTML to have half-width katakana
|
||||
character before a normal kana or common kanji character. Therefore, if
|
||||
decoding as Shift_JIS results in error or have-width katakana, the detector
|
||||
decides that the content is EUC-JP, and vice versa.
|
||||
|
||||
## Failure Modes
|
||||
|
||||
The detector gives the wrong answer if the text has a half-width katakana
|
||||
character before normal kana or common kanji. Some uncommon kanji are
|
||||
undecidable. (All JIS X 0208 Level 1 kanji are decidable.)
|
||||
|
||||
The half-width katakana issue is mainly relevant for old 8-bit JIS X 0201-only
|
||||
text files that would decode correctly as Shift_JIS but that the detector
|
||||
detects as EUC-JP.
|
||||
|
||||
The undecidable kanji issue does not realistically show up when a full
|
||||
document is fed to the detector, because, realistically, in a full document,
|
||||
there is at least one kana or common kanji. It can occur, though, if the
|
||||
detector is only run on a prefix of a document and the prefix only contains
|
||||
the title of the document. It is possible for document title to consist
|
||||
entirely of undecidable kanji. (Indeed, Japanese Wikipedia has articles with
|
||||
such titles.) If the detector is undecided, falling back to Shift_JIS is
|
||||
typically the Web oriented better guess.
|
|
@ -1,56 +0,0 @@
|
|||
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
|
||||
use shift_or_euc::Detector;
|
||||
|
||||
fn main() {
|
||||
let mut args = std::env::args_os();
|
||||
if args.next().is_none() {
|
||||
eprintln!("Error: Program name missing from arguments.");
|
||||
std::process::exit(-1);
|
||||
}
|
||||
if let Some(path) = args.next() {
|
||||
if args.next().is_some() {
|
||||
eprintln!("Error: Too many arguments.");
|
||||
std::process::exit(-3);
|
||||
}
|
||||
if let Ok(mut file) = File::open(path) {
|
||||
let mut buffer = [0u8; 4096];
|
||||
let mut detector = Detector::new(true);
|
||||
loop {
|
||||
if let Ok(num_read) = file.read(&mut buffer[..]) {
|
||||
let opt_enc = if num_read == 0 {
|
||||
detector.feed(b"", true)
|
||||
} else {
|
||||
detector.feed(&buffer[..num_read], false)
|
||||
};
|
||||
if let Some(encoding) = opt_enc {
|
||||
println!("{}", encoding.name());
|
||||
return;
|
||||
} else if num_read == 0 {
|
||||
println!("Undecided");
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
eprintln!("Error: Error reading file.");
|
||||
std::process::exit(-5);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
eprintln!("Error: Could not open file.");
|
||||
std::process::exit(-4);
|
||||
}
|
||||
} else {
|
||||
eprintln!("Error: One path argument needed.");
|
||||
std::process::exit(-2);
|
||||
}
|
||||
}
|
|
@ -1,278 +0,0 @@
|
|||
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#![doc(html_root_url = "https://docs.rs/shift_or_euc/0.1.0")]
|
||||
|
||||
//! A Japanese legacy encoding detector for detecting between Shift_JIS,
|
||||
//! EUC-JP, and, optionally, ISO-2022-JP _given_ the assumption that the
|
||||
//! encoding is one of those.
|
||||
//!
|
||||
//! This detector is generally more accurate (but see below about the failure
|
||||
//! mode on half-width katakana) and decides much sooner than machine
|
||||
//! learning-based detectors. To decide EUC-JP, machine learning-based
|
||||
//! detectors try to gain confidence that the input looks like EUC-JP. To
|
||||
//! decide EUC-JP, this detector instead looks for two simple rule-based
|
||||
//! signs of the input not being Shift_JIS.
|
||||
//!
|
||||
//! As a consequence of not containing machine learning tables, the binary
|
||||
//! size footprint that this crate adds on top of
|
||||
//! [`encoding_rs`](https://docs.rs/crate/encoding_rs) is tiny.
|
||||
//!
|
||||
//! # Licensing
|
||||
//!
|
||||
//! See the file named [COPYRIGHT](https://github.com/hsivonen/shift_or_euc/blob/master/COPYRIGHT).
|
||||
//!
|
||||
//! # Principle of Operation
|
||||
//!
|
||||
//! The detector is based on two observations:
|
||||
//!
|
||||
//! 1. The ISO-2022-JP escape sequences don't normally occur in Shift_JIS or
|
||||
//! EUC-JP, so encountering such an escape sequence (before non-ASCII has been
|
||||
//! encountered) can be taken as indication of ISO-2022-JP.
|
||||
//! 2. When normal (full-with) kana or common kanji encoded as Shift_JIS is
|
||||
//! decoded as EUC-JP, or vice versa, the result is either an error or
|
||||
//! half-width katakana, and it's very uncommon for Japanese HTML to have
|
||||
//! half-width katakana character before a normal kana or common kanji
|
||||
//! character. Therefore, if decoding as Shift_JIS results in error or
|
||||
//! have-width katakana, the detector decides that the content is EUC-JP, and
|
||||
//! vice versa.
|
||||
//!
|
||||
//! # Failure Modes
|
||||
//!
|
||||
//! The detector gives the wrong answer if the text has a half-width katakana
|
||||
//! character before normal kana or common kanji. Some uncommon kanji are
|
||||
//! undecidable. (All JIS X 0208 Level 1 kanji are decidable.)
|
||||
//!
|
||||
//! The half-width katakana issue is mainly relevant for old 8-bit JIS X
|
||||
//! 0201-only text files that would decode correctly as Shift_JIS but that the
|
||||
//! detector detects as EUC-JP.
|
||||
//!
|
||||
//! The undecidable kanji issue does not realistically show up when a full
|
||||
//! document is fed to the detector, because, realistically, in a full
|
||||
//! document, there is at least one kana or common kanji. It can occur,
|
||||
//! though, if the detector is only run on a prefix of a document and the
|
||||
//! prefix only contains the title of the document. It is possible for
|
||||
//! document title to consist entirely of undecidable kanji. (Indeed,
|
||||
//! Japanese Wikipedia has articles with such titles.) If the detector is
|
||||
//! undecided, falling back to Shift_JIS is typically the Web oriented better
|
||||
//! guess.
|
||||
|
||||
use encoding_rs::Decoder;
|
||||
use encoding_rs::DecoderResult;
|
||||
use encoding_rs::Encoding;
|
||||
use encoding_rs::EUC_JP;
|
||||
use encoding_rs::ISO_2022_JP;
|
||||
use encoding_rs::SHIFT_JIS;
|
||||
|
||||
/// Returns the index of the first non-ASCII byte or the first
|
||||
/// 0x1B, whichever comes first, or the length of the buffer
|
||||
/// if neither is found.
|
||||
fn find_non_ascii_or_escape(buffer: &[u8]) -> usize {
|
||||
let ascii_up_to = Encoding::ascii_valid_up_to(buffer);
|
||||
if let Some(escape) = memchr::memchr(0x1B, &buffer[..ascii_up_to]) {
|
||||
escape
|
||||
} else {
|
||||
ascii_up_to
|
||||
}
|
||||
}
|
||||
|
||||
/// Feed decoder with one byte (if `last` is `false`) or EOF (if `last` is
|
||||
/// `true`). `byte` is ignored if `last` is `true`.
|
||||
/// Returns `true` if there was no rejection or `false` upon rejecting the
|
||||
/// encoding hypothesis represented by this decoder.
|
||||
#[inline(always)]
|
||||
fn feed_decoder(decoder: &mut Decoder, byte: u8, last: bool) -> bool {
|
||||
let mut output = [0u16; 1];
|
||||
let input = [byte];
|
||||
let (result, _read, written) = decoder.decode_to_utf16_without_replacement(
|
||||
if last { b"" } else { &input },
|
||||
&mut output,
|
||||
last,
|
||||
);
|
||||
match result {
|
||||
DecoderResult::InputEmpty => {
|
||||
if written == 1 {
|
||||
match output[0] {
|
||||
0xFF61...0xFF9F => {
|
||||
return false;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
DecoderResult::Malformed(_, _) => {
|
||||
return false;
|
||||
}
|
||||
DecoderResult::OutputFull => {
|
||||
unreachable!();
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// A detector for detecting the character encoding of input on the
|
||||
/// precondition that the encoding is a Japanese legacy encoding.
|
||||
pub struct Detector {
|
||||
shift_jis_decoder: Decoder,
|
||||
euc_jp_decoder: Decoder,
|
||||
second_byte_in_escape: u8,
|
||||
iso_2022_jp_disqualified: bool,
|
||||
escape_seen: bool,
|
||||
finished: bool,
|
||||
}
|
||||
|
||||
impl Detector {
|
||||
/// Instantiates the detector. If `allow_2022` is `true` the possible
|
||||
/// guesses are Shift_JIS, EUC-JP, ISO-2022-JP, and undecided. If
|
||||
/// `allow_2022` is `false`, the possible guesses are Shift_JIS, EUC-JP,
|
||||
/// and undecided.
|
||||
pub fn new(allow_2022: bool) -> Self {
|
||||
Detector {
|
||||
shift_jis_decoder: SHIFT_JIS.new_decoder_without_bom_handling(),
|
||||
euc_jp_decoder: EUC_JP.new_decoder_without_bom_handling(),
|
||||
second_byte_in_escape: 0,
|
||||
iso_2022_jp_disqualified: !allow_2022,
|
||||
escape_seen: false,
|
||||
finished: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Feeds bytes to the detector. If `last` is `true` the end of the stream
|
||||
/// is considered to occur immediately after the end of `buffer`.
|
||||
/// Otherwise, the stream is expected to continue. `buffer` may be empty.
|
||||
///
|
||||
/// If you're running the detector only on a prefix of a complete
|
||||
/// document, _do not_ pass `last` as `true` after the prefix if the
|
||||
/// stream as a whole still contains more content.
|
||||
///
|
||||
/// Returns `Some(encoding_rs::SHIFT_JIS)` if the detector guessed
|
||||
/// Shift_JIS. Returns `Some(encoding_rs::EUC_JP)` if the detector
|
||||
/// guessed EUC-JP. Returns `Some(encoding_rs::ISO_2022_JP)` if the
|
||||
/// detector guessed ISO-2022-JP (only possible if `true` was passed as
|
||||
/// `allow_2022` when instantiating the detector). Returns `None` if the
|
||||
/// detector is undecided. If `None` is returned even when passing `true`
|
||||
/// as `last`, falling back to Shift_JIS is the best guess for Web
|
||||
/// purposes.
|
||||
///
|
||||
/// Do not call again after the method has returned `Some(_)` or after
|
||||
/// the method has been called with `true` as `last`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If called after the method has returned `Some(_)` or after the method
|
||||
/// has been called with `true` as `last`.
|
||||
pub fn feed(&mut self, buffer: &[u8], last: bool) -> Option<&'static Encoding> {
|
||||
assert!(
|
||||
!self.finished,
|
||||
"Tried to used a detector that has finished."
|
||||
);
|
||||
self.finished = true; // Will change back to false unless we return early
|
||||
let mut i = 0;
|
||||
if !self.iso_2022_jp_disqualified {
|
||||
if !self.escape_seen {
|
||||
i = find_non_ascii_or_escape(buffer);
|
||||
}
|
||||
while i < buffer.len() {
|
||||
let byte = buffer[i];
|
||||
if byte > 0x7F {
|
||||
self.iso_2022_jp_disqualified = true;
|
||||
break;
|
||||
}
|
||||
if !self.escape_seen && byte == 0x1B {
|
||||
self.escape_seen = true;
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
if self.escape_seen && self.second_byte_in_escape == 0 {
|
||||
self.second_byte_in_escape = byte;
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
match (self.second_byte_in_escape, byte) {
|
||||
(0x28, 0x42) | (0x28, 0x4A) | (0x28, 0x49) | (0x24, 0x40) | (0x24, 0x42) => {
|
||||
return Some(ISO_2022_JP);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
if self.escape_seen {
|
||||
self.iso_2022_jp_disqualified = true;
|
||||
break;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
for &byte in &buffer[i..] {
|
||||
if !feed_decoder(&mut self.euc_jp_decoder, byte, false) {
|
||||
return Some(SHIFT_JIS);
|
||||
}
|
||||
if !feed_decoder(&mut self.shift_jis_decoder, byte, false) {
|
||||
return Some(EUC_JP);
|
||||
}
|
||||
}
|
||||
if last {
|
||||
if !feed_decoder(&mut self.euc_jp_decoder, 0, true) {
|
||||
return Some(SHIFT_JIS);
|
||||
}
|
||||
if !feed_decoder(&mut self.shift_jis_decoder, 0, true) {
|
||||
return Some(EUC_JP);
|
||||
}
|
||||
return None;
|
||||
}
|
||||
self.finished = false;
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
// Any copyright to the test code below this comment is dedicated to the
|
||||
// Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
#[test]
|
||||
fn test_iso_2022_jp() {
|
||||
let mut detector = Detector::new(true);
|
||||
assert_eq!(
|
||||
detector.feed(b"abc\x1B\x28\x42\xFF", true),
|
||||
Some(ISO_2022_JP)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_precedence() {
|
||||
let mut detector = Detector::new(true);
|
||||
assert_eq!(detector.feed(b"abc\xFF", true), Some(SHIFT_JIS));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_euc_jp() {
|
||||
let mut detector = Detector::new(true);
|
||||
assert_eq!(detector.feed(b"abc\x81\x40", true), Some(SHIFT_JIS));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_shift_jis() {
|
||||
let mut detector = Detector::new(true);
|
||||
assert_eq!(detector.feed(b"abc\xEB\xA8", true), Some(EUC_JP));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_shift_jis_before_invalid_euc_jp() {
|
||||
let mut detector = Detector::new(true);
|
||||
assert_eq!(detector.feed(b"abc\xEB\xA8\x81\x40", true), Some(EUC_JP));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_undecided() {
|
||||
let mut detector = Detector::new(true);
|
||||
assert_eq!(detector.feed(b"abc", false), None);
|
||||
assert_eq!(detector.feed(b"abc", false), None);
|
||||
}
|
||||
|
||||
}
|
|
@ -1 +0,0 @@
|
|||
{"files":{"CONTRIBUTING.md":"0e64fb3dd5a00e3fd528de6442de3f2ca851bd718c45cca0871aaf4eedac9ee1","COPYRIGHT":"3a7313aa2f19bf7095a2fd731c3d5e76f38d5e4640bd2a115d53032f24b2aa6c","Cargo.toml":"342e5345f4fb433b89f397b07e4e7162376b30cbbc1d6f6ccb11523116e6ed6b","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"dac4dde23582d18b01701032860d8f8a1979fb2cf626060ca8de77e081a2a3d5","README.md":"a323f1f4537bc7b3f9b3b216c8ac5041b83aa0321f5349a52627aade947c6272","include/shift_or_euc.h":"47c3b9832cb7eb8995aa37dcc2e76be7d4f5c7b3fa6b43135e579831ab449cd8","src/lib.rs":"cab1898dd6724e0a0324a1e44f6348c107f13916da8873dba69c70dbc95ba9cd"},"package":"c81ec08c8a68c45c48d8ef58b80ce038cc9945891c4a4996761e2ec5cba05abc"}
|
|
@ -1,38 +0,0 @@
|
|||
If you send a pull request / patch, please observe the following.
|
||||
|
||||
## Licensing
|
||||
|
||||
Since this crate is dual-licensed,
|
||||
[section 5 of the Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0#contributions)
|
||||
is considered to apply in the sense of Contributions being automatically
|
||||
under the Apache License 2.0 or MIT dual license (see the `COPYRIGHT` file).
|
||||
That is, by the act of offering a Contribution, you place your Contribution
|
||||
under the Apache License 2.0 or MIT dual license stated in the `COPYRIGHT`
|
||||
file. Please do not contribute if you aren't willing or allowed to license your
|
||||
contributions in this manner.
|
||||
|
||||
You are encouraged to dedicate test code that you contribute to the Public
|
||||
Domain using the CC0 dedication. If you contribute test code that is not
|
||||
dedicated to the Public Domain, please be sure not to put it in a part of
|
||||
source code that the comments designate as being dedicated to the Public
|
||||
Domain.
|
||||
|
||||
## Copyright Notices
|
||||
|
||||
If you require the addition of your copyright notice, it's up to you to edit in
|
||||
your notice as part of your Contribution. Not adding a copyright notice is
|
||||
taken as a waiver of copyright notice.
|
||||
|
||||
## Compatibility with Stable Rust
|
||||
|
||||
Please ensure that your Contribution compiles with the latest stable-channel
|
||||
rustc.
|
||||
|
||||
## rustfmt
|
||||
|
||||
The `rustfmt` version used for this code is `rustfmt-nightly`. Please either
|
||||
use that version or avoid using `rustfmt` (so as not to reformat all the code).
|
||||
|
||||
## Unit tests
|
||||
|
||||
Please ensure that `cargo test` succeeds.
|
|
@ -1,9 +0,0 @@
|
|||
shift_or_euc is copyright 2018 Mozilla Foundation.
|
||||
|
||||
Licensed under the Apache License, Version 2.0
|
||||
<LICENSE-APACHE or
|
||||
https://www.apache.org/licenses/LICENSE-2.0> or the MIT
|
||||
license <LICENSE-MIT or https://opensource.org/licenses/MIT>,
|
||||
at your option. All files in the project carrying such
|
||||
notice may not be copied, modified, or distributed except
|
||||
according to those terms.
|
|
@ -1,30 +0,0 @@
|
|||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||
#
|
||||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g., crates.io) dependencies
|
||||
#
|
||||
# If you believe there's an error in this file please file an
|
||||
# issue against the rust-lang/cargo repository. If you're
|
||||
# editing this file be aware that the upstream Cargo.toml
|
||||
# will likely look very different (and much more reasonable)
|
||||
|
||||
[package]
|
||||
edition = "2018"
|
||||
name = "shift_or_euc_c"
|
||||
version = "0.1.0"
|
||||
authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
|
||||
description = "C API for shift_or_euc"
|
||||
homepage = "https://docs.rs/shift_or_euc_c/"
|
||||
documentation = "https://docs.rs/shift_or_euc_c/"
|
||||
readme = "README.md"
|
||||
keywords = ["encoding", "web", "charset"]
|
||||
categories = ["text-processing", "encoding", "web-programming", "internationalization"]
|
||||
license = "MIT/Apache-2.0"
|
||||
repository = "https://github.com/hsivonen/shift_or_euc_c"
|
||||
[dependencies.encoding_rs]
|
||||
version = "0.8.17"
|
||||
|
||||
[dependencies.shift_or_euc]
|
||||
version = "0.1.0"
|
|
@ -1,202 +0,0 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -1,25 +0,0 @@
|
|||
Copyright (c) 2018 Mozilla Foundation
|
||||
|
||||
Permission is hereby granted, free of charge, to any
|
||||
person obtaining a copy of this software and associated
|
||||
documentation files (the "Software"), to deal in the
|
||||
Software without restriction, including without
|
||||
limitation the rights to use, copy, modify, merge,
|
||||
publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software
|
||||
is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice
|
||||
shall be included in all copies or substantial portions
|
||||
of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
||||
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
||||
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
|
@ -1,13 +0,0 @@
|
|||
# shift_or_euc_c
|
||||
|
||||
[![Apache 2 / MIT dual-licensed](https://img.shields.io/badge/license-Apache%202%20%2F%20MIT-blue.svg)](https://github.com/hsivonen/shift_or_euc_c/blob/master/COPYRIGHT)
|
||||
|
||||
C API for [`shift_or_euc`](https://docs.rs/crate/shift_or_euc).
|
||||
|
||||
## Documentation
|
||||
|
||||
[API documentation on docs.rs](https://docs.rs/crate/shift_or_euc_c)
|
||||
|
||||
## Licensing
|
||||
|
||||
See the file named [COPYRIGHT](https://github.com/hsivonen/shift_or_euc_c/blob/master/COPYRIGHT).
|
|
@ -1,88 +0,0 @@
|
|||
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#ifndef shift_or_euc_h
|
||||
#define shift_or_euc_h
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include "encoding_rs.h"
|
||||
|
||||
#ifndef SHIFT_OR_EUC_DETECTOR
|
||||
#define SHIFT_OR_EUC_DETECTOR Detector
|
||||
#ifndef __cplusplus
|
||||
typedef struct Detector_ Detector;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/// Instantiates the detector. If `allow_2022` is `true` the possible
|
||||
/// guesses are Shift_JIS, EUC-JP, ISO-2022-JP, and undecided. If
|
||||
/// `allow_2022` is `false`, the possible guesses are Shift_JIS, EUC-JP,
|
||||
/// and undecided.
|
||||
///
|
||||
/// The instantiated detector must be freed after use using
|
||||
/// `shift_or_euc_detector_free`.
|
||||
SHIFT_OR_EUC_DETECTOR* shift_or_euc_detector_new(bool allow_2022);
|
||||
|
||||
/// Deallocates a detector obtained from `shift_or_euc_detector_new`.
|
||||
void shift_or_euc_detector_free(SHIFT_OR_EUC_DETECTOR* detector);
|
||||
|
||||
/// Feeds bytes to the detector. If `last` is `true` the end of the stream
|
||||
/// is considered to occur immediately after the end of `buffer`.
|
||||
/// Otherwise, the stream is expected to continue. `buffer_len` may be zero.
|
||||
/// `buffer` must not be `NULL` but may be undereferencable when
|
||||
/// `buffer_len` is zero.
|
||||
///
|
||||
/// If you're running the detector only on a prefix of a complete
|
||||
/// document, _do not_ pass `last` as `true` after the prefix if the
|
||||
/// stream as a whole still contains more content.
|
||||
///
|
||||
/// Returns `SHIFT_JIS_ENCODING` if the detector guessed
|
||||
/// Shift_JIS. Returns `EUC_JP_ENCODING` if the detector
|
||||
/// guessed EUC-JP. Returns `ISO_2022_JP_ENCODING` if the
|
||||
/// detector guessed ISO-2022-JP (only possible if `true` was passed as
|
||||
/// `allow_2022` when instantiating the detector). Returns `NULL` if the
|
||||
/// detector is undecided. If `NULL` is returned even when passing `true`
|
||||
/// as `last`, falling back to Shift_JIS is the best guess for Web
|
||||
/// purposes.
|
||||
///
|
||||
/// Do not call again after the function has returned non-`NULL` or after
|
||||
/// the function has been called with `true` as `last`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If called after the function has returned non-`NULL` or after the
|
||||
/// function has been called with `true` as `last`.
|
||||
///
|
||||
/// # Undefined Behavior
|
||||
///
|
||||
/// UB ensues if
|
||||
///
|
||||
/// * `detector` does not point to a detector obtained from
|
||||
/// `shift_or_euc_detector_new` but not yet freed with
|
||||
/// `shift_or_euc_detector_free`.
|
||||
/// * `buffer` is `NULL`.
|
||||
/// * `buffer` and `buffer_len` don't designate a range of memory
|
||||
/// valid for reading.
|
||||
ENCODING_RS_ENCODING const* shift_or_euc_detector_feed(
|
||||
SHIFT_OR_EUC_DETECTOR* detector,
|
||||
uint8_t const* buffer,
|
||||
size_t buffer_len,
|
||||
bool last
|
||||
);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // shift_or_euc_h
|
|
@ -1,94 +0,0 @@
|
|||
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#![doc(html_root_url = "https://docs.rs/shift_or_euc_c/0.1.0")]
|
||||
|
||||
//! C API for [`shift_or_euc`](https://docs.rs/shift_or_euc/)
|
||||
//!
|
||||
//! # Panics
|
||||
//!
|
||||
//! This crate is designed to be used only in a `panic=abort` scenario.
|
||||
//! Panic propagation across FFI is not handled!
|
||||
//!
|
||||
//! # Licensing
|
||||
//!
|
||||
//! See the file named [COPYRIGHT](https://github.com/hsivonen/shift_or_euc/blob/master/COPYRIGHT).
|
||||
|
||||
use encoding_rs::Encoding;
|
||||
use shift_or_euc::*;
|
||||
|
||||
/// Instantiates the detector. If `allow_2022` is `true` the possible
|
||||
/// guesses are Shift_JIS, EUC-JP, ISO-2022-JP, and undecided. If
|
||||
/// `allow_2022` is `false`, the possible guesses are Shift_JIS, EUC-JP,
|
||||
/// and undecided.
|
||||
///
|
||||
/// The instantiated detector must be freed after use using
|
||||
/// `shift_or_euc_detector_free`.
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn shift_or_euc_detector_new(allow_2022: bool) -> *mut Detector {
|
||||
Box::into_raw(Box::new(Detector::new(allow_2022)))
|
||||
}
|
||||
|
||||
/// Deallocates a detector obtained from `shift_or_euc_detector_new`.
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn shift_or_euc_detector_free(detector: *mut Detector) {
|
||||
let _ = Box::from_raw(detector);
|
||||
}
|
||||
|
||||
/// Feeds bytes to the detector. If `last` is `true` the end of the stream
|
||||
/// is considered to occur immediately after the end of `buffer`.
|
||||
/// Otherwise, the stream is expected to continue. `buffer_len` may be zero.
|
||||
/// `buffer` must not be `NULL` but may be undereferencable when
|
||||
/// `buffer_len` is zero.
|
||||
///
|
||||
/// If you're running the detector only on a prefix of a complete
|
||||
/// document, _do not_ pass `last` as `true` after the prefix if the
|
||||
/// stream as a whole still contains more content.
|
||||
///
|
||||
/// Returns `SHIFT_JIS_ENCODING` if the detector guessed
|
||||
/// Shift_JIS. Returns `EUC_JP_ENCODING` if the detector
|
||||
/// guessed EUC-JP. Returns `ISO_2022_JP_ENCODING` if the
|
||||
/// detector guessed ISO-2022-JP (only possible if `true` was passed as
|
||||
/// `allow_2022` when instantiating the detector). Returns `NULL` if the
|
||||
/// detector is undecided. If `NULL` is returned even when passing `true`
|
||||
/// as `last`, falling back to Shift_JIS is the best guess for Web
|
||||
/// purposes.
|
||||
///
|
||||
/// Do not call again after the function has returned non-`NULL` or after
|
||||
/// the function has been called with `true` as `last`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If called after the function has returned non-`NULL` or after the
|
||||
/// function has been called with `true` as `last`.
|
||||
///
|
||||
/// # Undefined Behavior
|
||||
///
|
||||
/// UB ensues if
|
||||
///
|
||||
/// * `detector` does not point to a detector obtained from
|
||||
/// `shift_or_euc_detector_new` but not yet freed with
|
||||
/// `shift_or_euc_detector_free`.
|
||||
/// * `buffer` is `NULL`.
|
||||
/// * `buffer` and `buffer_len` don't designate a range of memory
|
||||
/// valid for reading.
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn shift_or_euc_detector_feed(
|
||||
detector: *mut Detector,
|
||||
buffer: *const u8,
|
||||
buffer_len: usize,
|
||||
last: bool,
|
||||
) -> *const Encoding {
|
||||
if let Some(encoding) = (*detector).feed(::std::slice::from_raw_parts(buffer, buffer_len), last)
|
||||
{
|
||||
encoding
|
||||
} else {
|
||||
::std::ptr::null()
|
||||
}
|
||||
}
|
|
@ -240,8 +240,6 @@ class MozBrowser extends MozElements.MozElementMixin(XULFrameElement) {
|
|||
|
||||
this._mayEnableCharacterEncodingMenu = null;
|
||||
|
||||
this._charsetAutodetected = false;
|
||||
|
||||
this._contentPrincipal = null;
|
||||
|
||||
this._csp = null;
|
||||
|
@ -618,16 +616,6 @@ class MozBrowser extends MozElements.MozElementMixin(XULFrameElement) {
|
|||
}
|
||||
}
|
||||
|
||||
get charsetAutodetected() {
|
||||
return this.isRemoteBrowser ? this._charsetAutodetected : this.docShell.charsetAutodetected;
|
||||
}
|
||||
|
||||
set charsetAutodetected(aAutodetected) {
|
||||
if (this.isRemoteBrowser) {
|
||||
this._charsetAutodetected = aAutodetected;
|
||||
}
|
||||
}
|
||||
|
||||
get contentPrincipal() {
|
||||
return this.isRemoteBrowser ? this._contentPrincipal : this.contentDocument.nodePrincipal;
|
||||
}
|
||||
|
@ -1706,7 +1694,6 @@ class MozBrowser extends MozElements.MozElementMixin(XULFrameElement) {
|
|||
"_contentTitle",
|
||||
"_characterSet",
|
||||
"_mayEnableCharacterEncodingMenu",
|
||||
"_charsetAutodetected",
|
||||
"_contentPrincipal",
|
||||
"_imageDocument",
|
||||
"_fullZoom",
|
||||
|
|
|
@ -38,7 +38,6 @@ cert_storage = { path = "../../../../security/manager/ssl/cert_storage", optiona
|
|||
bitsdownload = { path = "../../../components/bitsdownload", optional = true }
|
||||
storage = { path = "../../../../storage/rust" }
|
||||
bookmark_sync = { path = "../../../components/places/bookmark_sync", optional = true }
|
||||
shift_or_euc_c = "0.1.0"
|
||||
|
||||
[build-dependencies]
|
||||
rustc_version = "0.2"
|
||||
|
|
|
@ -44,7 +44,6 @@ extern crate bitsdownload;
|
|||
extern crate storage;
|
||||
#[cfg(feature = "moz_places")]
|
||||
extern crate bookmark_sync;
|
||||
extern crate shift_or_euc_c;
|
||||
|
||||
extern crate arrayvec;
|
||||
|
||||
|
|
|
@ -31,6 +31,8 @@ charsetMenuAutodet = Auto-Detect
|
|||
charsetMenuAutodet.key = D
|
||||
charsetMenuAutodet.off = (off)
|
||||
charsetMenuAutodet.off.key = o
|
||||
charsetMenuAutodet.ja = Japanese
|
||||
charsetMenuAutodet.ja.key = J
|
||||
charsetMenuAutodet.ru = Russian
|
||||
charsetMenuAutodet.ru.key = R
|
||||
charsetMenuAutodet.uk = Ukrainian
|
||||
|
@ -102,8 +104,25 @@ windows-1255 = Hebrew
|
|||
# sorts right after that one in the collation order for your locale.
|
||||
ISO-8859-8 = Hebrew, Visual
|
||||
|
||||
# Japanese (NOT AN ENCODING NAME)
|
||||
# Japanese
|
||||
Shift_JIS.key = J
|
||||
Shift_JIS = Japanese (Shift_JIS)
|
||||
EUC-JP.key = p
|
||||
EUC-JP = Japanese (EUC-JP)
|
||||
ISO-2022-JP.key = n
|
||||
ISO-2022-JP = Japanese (ISO-2022-JP)
|
||||
|
||||
# UI string in anticipation of bug 1543077; deliberately not in use yet
|
||||
|
||||
# LOCALIZATION NOTE (Japanese.key): If taken into use, this string will appear
|
||||
# instead of the string for Shift_JIS.key, so the use of the same
|
||||
# accelerator is deliberate.
|
||||
Japanese.key = J
|
||||
# LOCALIZATION NOTE (Japanese): If taken into use, this string will appear
|
||||
# as a single item in place of the strings for the three items Shift_JIS,
|
||||
# EUC-JP, and ISO-2022-JP, so this string does not need to make sense together
|
||||
# with those strings and should be translated the way those were
|
||||
# but omitting the part in parentheses.
|
||||
Japanese = Japanese
|
||||
|
||||
# Korean
|
||||
|
|
|
@ -16,16 +16,18 @@ ChromeUtils.defineModuleGetter(this, "Deprecated",
|
|||
|
||||
const kAutoDetectors = [
|
||||
["off", ""],
|
||||
["ja", "ja_parallel_state_machine"],
|
||||
["ru", "ruprob"],
|
||||
["uk", "ukprob"],
|
||||
];
|
||||
|
||||
/**
|
||||
* This set contains encodings that are in the Encoding Standard, except:
|
||||
* - Japanese encodings are represented by one autodetection item
|
||||
* - XSS-dangerous encodings (except ISO-2022-JP which is assumed to be
|
||||
* too common not to be included).
|
||||
* - x-user-defined, which practically never makes sense as an end-user-chosen
|
||||
* override.
|
||||
* - Encodings that IE11 doesn't have in its corresponding menu.
|
||||
* - Encodings that IE11 doesn't have in its correspoding menu.
|
||||
*/
|
||||
const kEncodings = new Set([
|
||||
// Globally relevant
|
||||
|
@ -58,8 +60,10 @@ const kEncodings = new Set([
|
|||
// Hebrew
|
||||
"windows-1255",
|
||||
"ISO-8859-8",
|
||||
// Japanese (NOT AN ENCODING NAME)
|
||||
"Japanese",
|
||||
// Japanese
|
||||
"Shift_JIS",
|
||||
"EUC-JP",
|
||||
"ISO-2022-JP",
|
||||
// Korean
|
||||
"EUC-KR",
|
||||
// Thai
|
||||
|
@ -91,7 +95,8 @@ function CharsetComparator(a, b) {
|
|||
// happens to make the less frequently-used items first.
|
||||
let titleA = a.label.replace(/\(.*/, "") + b.value;
|
||||
let titleB = b.label.replace(/\(.*/, "") + a.value;
|
||||
// Secondarily reverse sort by encoding name to sort "windows"
|
||||
// Secondarily reverse sort by encoding name to sort "windows" or
|
||||
// "shift_jis" first.
|
||||
return titleA.localeCompare(titleB) || b.value.localeCompare(a.value);
|
||||
}
|
||||
|
||||
|
@ -234,17 +239,7 @@ var CharsetMenu = {
|
|||
* For substantially similar encodings, treat two encodings as the same
|
||||
* for the purpose of the check mark.
|
||||
*/
|
||||
foldCharset(charset, isAutodetected) {
|
||||
if (isAutodetected) {
|
||||
switch (charset) {
|
||||
case "Shift_JIS":
|
||||
case "EUC-JP":
|
||||
case "ISO-2022-JP":
|
||||
return "Japanese";
|
||||
default:
|
||||
// fall through
|
||||
}
|
||||
}
|
||||
foldCharset(charset) {
|
||||
switch (charset) {
|
||||
case "ISO-8859-8-I":
|
||||
return "windows-1255";
|
||||
|
@ -257,11 +252,8 @@ var CharsetMenu = {
|
|||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* This method is for comm-central callers only.
|
||||
*/
|
||||
update(parent, charset) {
|
||||
let menuitem = parent.getElementsByAttribute("charset", this.foldCharset(charset, false)).item(0);
|
||||
let menuitem = parent.getElementsByAttribute("charset", this.foldCharset(charset)).item(0);
|
||||
if (menuitem) {
|
||||
menuitem.setAttribute("checked", "true");
|
||||
}
|
||||
|
|
|
@ -197,7 +197,6 @@ class RemoteWebProgressManager {
|
|||
if (json.charset) {
|
||||
this._browser._characterSet = json.charset;
|
||||
this._browser._mayEnableCharacterEncodingMenu = json.mayEnableCharacterEncodingMenu;
|
||||
this._browser._charsetAutodetected = json.charsetAutodetected;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -116,7 +116,6 @@ class WebProgressChild {
|
|||
json.title = this.mm.content.document.title;
|
||||
json.charset = this.mm.content.document.characterSet;
|
||||
json.mayEnableCharacterEncodingMenu = this.mm.docShell.mayEnableCharacterEncodingMenu;
|
||||
json.charsetAutodetected = this.mm.docShell.charsetAutodetected;
|
||||
json.principal = this.mm.content.document.nodePrincipal;
|
||||
let csp = this.mm.content.document.csp;
|
||||
json.csp = E10SUtils.serializeCSP(csp);
|
||||
|
|
Загрузка…
Ссылка в новой задаче