зеркало из https://github.com/mozilla/gecko-dev.git
Merge mozilla-inbound to mozilla-central. a=merge
This commit is contained in:
Коммит
e13e0af55b
|
@ -1009,21 +1009,21 @@ name = "encoding_c"
|
|||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_glue"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"nserror 0.1.0",
|
||||
"nsstring 0.1.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.16"
|
||||
version = "0.8.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
@ -1299,6 +1299,7 @@ dependencies = [
|
|||
"profiler_helper 0.1.0",
|
||||
"rsdparsa_capi 0.1.0",
|
||||
"rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"shift_or_euc_c 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"storage 0.1.0",
|
||||
"webrender_bindings 0.1.0",
|
||||
"xpcom 0.1.0",
|
||||
|
@ -2017,7 +2018,7 @@ name = "nsstring"
|
|||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2773,6 +2774,24 @@ dependencies = [
|
|||
"opaque-debug 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shift_or_euc"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shift_or_euc_c"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"shift_or_euc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shlex"
|
||||
version = "0.1.1"
|
||||
|
@ -3805,7 +3824,7 @@ dependencies = [
|
|||
"checksum either 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18785c1ba806c258137c937e44ada9ee7e69a37e3c72077542cd2f069d78562a"
|
||||
"checksum ena 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "25b4e5febb25f08c49f1b07dc33a182729a6b21edfb562b5aef95f78e0dbe5bb"
|
||||
"checksum encoding_c 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "769ecb8b33323998e482b218c0d13cd64c267609023b4b7ec3ee740714c318ee"
|
||||
"checksum encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)" = "0535f350c60aac0b87ccf28319abc749391e912192255b0c00a2c12c6917bd73"
|
||||
"checksum encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)" = "4155785c79f2f6701f185eb2e6b4caf0555ec03477cb4c70db67b465311620ed"
|
||||
"checksum env_logger 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0561146661ae44c579e993456bc76d11ce1e0c7d745e57b2fa7146b6e49fa2ad"
|
||||
"checksum error-chain 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ff511d5dc435d703f4971bc399647c9bc38e20cb41452e3b9feb4765419ed3f3"
|
||||
"checksum euclid 0.19.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d1a7698bdda3d7444a79d33bdc96e8b518d44ea3ff101d8492a6ca1207b886ea"
|
||||
|
@ -3967,6 +3986,8 @@ dependencies = [
|
|||
"checksum serde_json 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)" = "44dd2cfde475037451fa99b7e5df77aa3cfd1536575fa8e7a538ab36dcde49ae"
|
||||
"checksum sha-1 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "23962131a91661d643c98940b20fcaffe62d776a823247be80a48fcb8b6fce68"
|
||||
"checksum sha2 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b4d8bfd0e469f417657573d8451fb33d16cfe0989359b93baf3a1ffc639543d"
|
||||
"checksum shift_or_euc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f930dea4685b9803954b9d74cdc175c6d946a22f2eafe5aa2e9a58cdcae7da8c"
|
||||
"checksum shift_or_euc_c 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c81ec08c8a68c45c48d8ef58b80ce038cc9945891c4a4996761e2ec5cba05abc"
|
||||
"checksum shlex 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2"
|
||||
"checksum siphasher 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2ffc669b726f2bc9a3bcff66e5e23b56ba6bf70e22a34c3d7b6d0b3450b65b84"
|
||||
"checksum slab 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "17b4fcaed89ab08ef143da37bc52adbcc04d4a69014f4c1208d6b51f0c47bc23"
|
||||
|
|
|
@ -6615,6 +6615,9 @@ function handleDroppedLink(event, urlOrLinks, nameOrTriggeringPrincipal, trigger
|
|||
|
||||
function BrowserSetForcedCharacterSet(aCharset) {
|
||||
if (aCharset) {
|
||||
if (aCharset == "Japanese") {
|
||||
aCharset = "Shift_JIS";
|
||||
}
|
||||
gBrowser.selectedBrowser.characterSet = aCharset;
|
||||
// Save the forced character-set
|
||||
PlacesUIUtils.setCharsetForPage(getWebNavigation().currentURI,
|
||||
|
@ -6629,7 +6632,8 @@ function BrowserCharsetReload() {
|
|||
}
|
||||
|
||||
function UpdateCurrentCharset(target) {
|
||||
let selectedCharset = CharsetMenu.foldCharset(gBrowser.selectedBrowser.characterSet);
|
||||
let selectedCharset = CharsetMenu.foldCharset(gBrowser.selectedBrowser.characterSet,
|
||||
gBrowser.selectedBrowser.charsetAutodetected);
|
||||
for (let menuItem of target.getElementsByTagName("menuitem")) {
|
||||
let isSelected = menuItem.getAttribute("charset") === selectedCharset;
|
||||
menuItem.setAttribute("checked", isSelected);
|
||||
|
|
|
@ -358,7 +358,8 @@ const CustomizableWidgets = [
|
|||
},
|
||||
updateCurrentCharset(aDocument) {
|
||||
let currentCharset = aDocument.defaultView.gBrowser.selectedBrowser.characterSet;
|
||||
currentCharset = CharsetMenu.foldCharset(currentCharset);
|
||||
let {charsetAutodetected} = aDocument.defaultView.gBrowser.selectedBrowser;
|
||||
currentCharset = CharsetMenu.foldCharset(currentCharset, charsetAutodetected);
|
||||
|
||||
let pinnedContainer = aDocument.getElementById("PanelUI-characterEncodingView-pinned");
|
||||
let charsetContainer = aDocument.getElementById("PanelUI-characterEncodingView-charsets");
|
||||
|
|
|
@ -1276,7 +1276,6 @@ nsDocShell::GatherCharsetMenuTelemetry() {
|
|||
case kCharsetFromDocTypeDefault:
|
||||
case kCharsetFromCache:
|
||||
case kCharsetFromParentFrame:
|
||||
case kCharsetFromHintPrevDoc:
|
||||
// Changing charset on an unlabeled doc.
|
||||
if (isFileURL) {
|
||||
Telemetry::AccumulateCategorical(
|
||||
|
@ -1892,6 +1891,26 @@ nsDocShell::GetMayEnableCharacterEncodingMenu(
|
|||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
nsDocShell::GetCharsetAutodetected(bool* aCharsetAutodetected) {
|
||||
*aCharsetAutodetected = false;
|
||||
if (!mContentViewer) {
|
||||
return NS_OK;
|
||||
}
|
||||
Document* doc = mContentViewer->GetDocument();
|
||||
if (!doc) {
|
||||
return NS_OK;
|
||||
}
|
||||
int32_t source = doc->GetDocumentCharacterSetSource();
|
||||
|
||||
if (source == kCharsetFromAutoDetection ||
|
||||
source == kCharsetFromUserForcedAutoDetection) {
|
||||
*aCharsetAutodetected = true;
|
||||
}
|
||||
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
nsDocShell::GetDocShellEnumerator(int32_t aItemType,
|
||||
DocShellEnumeratorDirection aDirection,
|
||||
|
@ -8372,11 +8391,11 @@ nsresult nsDocShell::SetupNewViewer(nsIContentViewer* aNewViewer) {
|
|||
|
||||
const Encoding* forceCharset = nullptr;
|
||||
const Encoding* hintCharset = nullptr;
|
||||
int32_t hintCharsetSource;
|
||||
float textZoom;
|
||||
float pageZoom;
|
||||
float overrideDPPX;
|
||||
bool styleDisabled;
|
||||
int32_t hintCharsetSource = kCharsetUninitialized;
|
||||
float textZoom = 1.0;
|
||||
float pageZoom = 1.0;
|
||||
float overrideDPPX = 1.0;
|
||||
bool styleDisabled = false;
|
||||
// |newMUDV| also serves as a flag to set the data from the above vars
|
||||
nsCOMPtr<nsIContentViewer> newCv;
|
||||
|
||||
|
@ -10188,6 +10207,8 @@ nsresult nsDocShell::DoURILoad(nsDocShellLoadState* aLoadState,
|
|||
MOZ_ASSERT(NS_SUCCEEDED(rv));
|
||||
}
|
||||
|
||||
Unused << rv; // Keep Coverity happy
|
||||
|
||||
nsCOMPtr<nsIWritablePropertyBag2> props(do_QueryInterface(channel));
|
||||
if (props) {
|
||||
// save true referrer for those who need it (e.g. xpinstall whitelisting)
|
||||
|
|
|
@ -878,6 +878,11 @@ interface nsIDocShell : nsIDocShellTreeItem
|
|||
*/
|
||||
[infallible] readonly attribute boolean mayEnableCharacterEncodingMenu;
|
||||
|
||||
/**
|
||||
* Indicates that the character encoding was autodetected.
|
||||
*/
|
||||
[infallible] readonly attribute boolean charsetAutodetected;
|
||||
|
||||
attribute nsIEditor editor;
|
||||
readonly attribute boolean editable; /* this docShell is editable */
|
||||
readonly attribute boolean hasEditingSession; /* this docShell has an editing session */
|
||||
|
|
|
@ -44,6 +44,14 @@ support-files =
|
|||
file_bug1415918_beforeunload_iframe_2.html
|
||||
file_bug1415918_beforeunload_iframe.html
|
||||
file_bug1415918_beforeunload.html
|
||||
file_bug1543077-1-child.html
|
||||
file_bug1543077-1.html
|
||||
file_bug1543077-2-child.html
|
||||
file_bug1543077-2.html
|
||||
file_bug1543077-3-child.html
|
||||
file_bug1543077-3.html
|
||||
file_bug1543077-4-child.html
|
||||
file_bug1543077-4.html
|
||||
file_multiple_pushState.html
|
||||
print_postdata.sjs
|
||||
test-form_sjis.html
|
||||
|
@ -60,6 +68,10 @@ support-files =
|
|||
onpageshow_message.html
|
||||
file_cross_process_csp_inheritance.html
|
||||
|
||||
[browser_bug1543077-1.js]
|
||||
[browser_bug1543077-2.js]
|
||||
[browser_bug1543077-3.js]
|
||||
[browser_bug1543077-4.js]
|
||||
[browser_bug1206879.js]
|
||||
[browser_bug1309900_crossProcessHistoryNavigation.js]
|
||||
[browser_bug1328501.js]
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
function test() {
|
||||
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
|
||||
runCharsetTest(rootDir + "file_bug1543077-1.html", afterOpen, "Japanese", afterChangeCharset);
|
||||
}
|
||||
|
||||
function afterOpen() {
|
||||
is(content.document.documentElement.textContent.indexOf("\u00A4"), 131, "Parent doc should be windows-1252 initially");
|
||||
|
||||
is(content.frames[0].document.documentElement.textContent.indexOf("\u00A4"), 87, "Child doc should be windows-1252 initially");
|
||||
}
|
||||
|
||||
function afterChangeCharset() {
|
||||
is(content.document.documentElement.textContent.indexOf("\u3042"), 131, "Parent doc should decode as EUC-JP subsequently");
|
||||
is(content.frames[0].document.documentElement.textContent.indexOf("\u3042"), 87, "Child doc should decode as EUC-JP subsequently");
|
||||
|
||||
is(content.document.characterSet, "EUC-JP", "Parent doc should report EUC-JP subsequently");
|
||||
is(content.frames[0].document.characterSet, "EUC-JP", "Child doc should report EUC-JP subsequently");
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
function test() {
|
||||
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
|
||||
runCharsetTest(rootDir + "file_bug1543077-2.html", afterOpen, "Japanese", afterChangeCharset);
|
||||
}
|
||||
|
||||
function afterOpen() {
|
||||
is(content.document.documentElement.textContent.indexOf("\u201A"), 134, "Parent doc should be windows-1252 initially");
|
||||
|
||||
is(content.frames[0].document.documentElement.textContent.indexOf("\u201A"), 90, "Child doc should be windows-1252 initially");
|
||||
}
|
||||
|
||||
function afterChangeCharset() {
|
||||
is(content.document.documentElement.textContent.indexOf("\u3042"), 134, "Parent doc should decode as Shift_JIS subsequently");
|
||||
is(content.frames[0].document.documentElement.textContent.indexOf("\u3042"), 90, "Child doc should decode as Shift_JIS subsequently");
|
||||
|
||||
is(content.document.characterSet, "Shift_JIS", "Parent doc should report Shift_JIS subsequently");
|
||||
is(content.frames[0].document.characterSet, "Shift_JIS", "Child doc should report Shift_JIS subsequently");
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
function test() {
|
||||
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
|
||||
runCharsetTest(rootDir + "file_bug1543077-3.html", afterOpen, "Japanese", afterChangeCharset);
|
||||
}
|
||||
|
||||
function afterOpen() {
|
||||
is(content.document.documentElement.textContent.indexOf("\u001B"), 136, "Parent doc should be windows-1252 initially");
|
||||
|
||||
is(content.frames[0].document.documentElement.textContent.indexOf("\u001B"), 92, "Child doc should be windows-1252 initially");
|
||||
}
|
||||
|
||||
function afterChangeCharset() {
|
||||
is(content.document.documentElement.textContent.indexOf("\u3042"), 136, "Parent doc should decode as ISO-2022-JP subsequently");
|
||||
is(content.frames[0].document.documentElement.textContent.indexOf("\u3042"), 92, "Child doc should decode as ISO-2022-JP subsequently");
|
||||
|
||||
is(content.document.characterSet, "ISO-2022-JP", "Parent doc should report ISO-2022-JP subsequently");
|
||||
is(content.frames[0].document.characterSet, "ISO-2022-JP", "Child doc should report ISO-2022-JP subsequently");
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
function test() {
|
||||
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
|
||||
runCharsetTest(rootDir + "file_bug1543077-4.html", afterOpen, "Japanese", afterChangeCharset);
|
||||
}
|
||||
|
||||
function afterOpen() {
|
||||
is(content.document.documentElement.textContent.indexOf("\u00A4"), 131, "Parent doc should be windows-1252 initially");
|
||||
|
||||
is(content.frames[0].document.documentElement.textContent.indexOf("\u201A"), 90, "Child doc should be windows-1252 initially");
|
||||
}
|
||||
|
||||
function afterChangeCharset() {
|
||||
is(content.document.documentElement.textContent.indexOf("\u3042"), 131, "Parent doc should decode as EUC-JP subsequently");
|
||||
is(content.frames[0].document.documentElement.textContent.indexOf("\u3042"), 90, "Child doc should decode as Shift_JIS subsequently");
|
||||
|
||||
is(content.document.characterSet, "EUC-JP", "Parent doc should report EUC-JP subsequently");
|
||||
is(content.frames[0].document.characterSet, "Shift_JIS", "Child doc should report Shift_JIS subsequently");
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport">
|
||||
<title>No encoding declaration in parent or child</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>Hiragana letter a if decoded as EUC-JP: ¤¢</p>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport">
|
||||
<title>No encoding declaration in parent or child</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>No encoding declaration in parent or child</h1>
|
||||
|
||||
<p>Hiragana letter a if decoded as EUC-JP: ¤¢</p>
|
||||
|
||||
<iframe src="file_bug1543077-1-child.html"></iframe>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport">
|
||||
<title>No encoding declaration in parent or child</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>Hiragana letter a if decoded as Shift_JIS: ‚ </p>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport">
|
||||
<title>No encoding declaration in parent or child</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>No encoding declaration in parent or child</h1>
|
||||
|
||||
<p>Hiragana letter a if decoded as Shift_JIS: ‚ </p>
|
||||
|
||||
<iframe src="file_bug1543077-2-child.html"></iframe>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport">
|
||||
<title>No encoding declaration in parent or child</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>Hiragana letter a if decoded as ISO-2022-JP: $B$"(B</p>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport">
|
||||
<title>No encoding declaration in parent or child</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>No encoding declaration in parent or child</h1>
|
||||
|
||||
<p>Hiragana letter a if decoded as ISO-2022-JP: $B$"(B</p>
|
||||
|
||||
<iframe src="file_bug1543077-3-child.html"></iframe>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport">
|
||||
<title>No encoding declaration in parent or child</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>Hiragana letter a if decoded as Shift_JIS: ‚ </p>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport">
|
||||
<title>No encoding declaration in parent or child</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>No encoding declaration in parent or child</h1>
|
||||
|
||||
<p>Hiragana letter a if decoded as EUC-JP: ¤¢</p>
|
||||
|
||||
<iframe src="file_bug1543077-4-child.html"></iframe>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -93,6 +93,12 @@ interface nsIBrowser : nsISupports
|
|||
*/
|
||||
attribute boolean mayEnableCharacterEncodingMenu;
|
||||
|
||||
/**
|
||||
* Whether or not the character encoding was detected by analyzing
|
||||
* content (as opposed to reading a protocol label).
|
||||
*/
|
||||
attribute boolean charsetAutodetected;
|
||||
|
||||
/**
|
||||
* Called by Gecko to update the browser when its state changes.
|
||||
*
|
||||
|
|
|
@ -3539,6 +3539,8 @@ NS_IMETHODIMP BrowserChild::OnStateChange(nsIWebProgress* aWebProgress,
|
|||
stateChangeData->isNavigating() = docShell->GetIsNavigating();
|
||||
stateChangeData->mayEnableCharacterEncodingMenu() =
|
||||
docShell->GetMayEnableCharacterEncodingMenu();
|
||||
stateChangeData->charsetAutodetected() =
|
||||
docShell->GetCharsetAutodetected();
|
||||
|
||||
if (document && aStateFlags & nsIWebProgressListener::STATE_STOP) {
|
||||
document->GetContentType(stateChangeData->contentType());
|
||||
|
|
|
@ -2394,6 +2394,8 @@ mozilla::ipc::IPCResult BrowserParent::RecvOnStateChange(
|
|||
Unused << browser->SetIsNavigating(aStateChangeData->isNavigating());
|
||||
Unused << browser->SetMayEnableCharacterEncodingMenu(
|
||||
aStateChangeData->mayEnableCharacterEncodingMenu());
|
||||
Unused << browser->SetCharsetAutodetected(
|
||||
aStateChangeData->charsetAutodetected());
|
||||
Unused << browser->UpdateForStateChange(aStateChangeData->charset(),
|
||||
aStateChangeData->documentURI(),
|
||||
aStateChangeData->contentType());
|
||||
|
|
|
@ -123,6 +123,7 @@ struct WebProgressStateChangeData
|
|||
{
|
||||
bool isNavigating;
|
||||
bool mayEnableCharacterEncodingMenu;
|
||||
bool charsetAutodetected;
|
||||
|
||||
// The following fields are only set when the aStateFlags param passed with
|
||||
// this struct is |nsIWebProgress.STATE_STOP|.
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
DIRS += ['src']
|
||||
TEST_DIRS += ['tests']
|
||||
|
||||
with Files('**'):
|
||||
|
|
|
@ -1,41 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "CharDistribution.h"
|
||||
|
||||
#include "JISFreq.tab"
|
||||
#include "mozilla/ArrayUtils.h"
|
||||
|
||||
#define SURE_YES 0.99f
|
||||
#define SURE_NO 0.01f
|
||||
|
||||
// return confidence base on received data
|
||||
float CharDistributionAnalysis::GetConfidence(void) {
|
||||
// if we didn't receive any character in our consideration range, or the
|
||||
// number of frequent characters is below the minimum threshold, return
|
||||
// negative answer
|
||||
if (mTotalChars <= 0 || mFreqChars <= mDataThreshold) return SURE_NO;
|
||||
|
||||
if (mTotalChars != mFreqChars) {
|
||||
float r =
|
||||
mFreqChars / ((mTotalChars - mFreqChars) * mTypicalDistributionRatio);
|
||||
|
||||
if (r < SURE_YES) return r;
|
||||
}
|
||||
// normalize confidence, (we don't want to be 100% sure)
|
||||
return SURE_YES;
|
||||
}
|
||||
|
||||
SJISDistributionAnalysis::SJISDistributionAnalysis() {
|
||||
mCharToFreqOrder = JISCharToFreqOrder;
|
||||
mTableSize = mozilla::ArrayLength(JISCharToFreqOrder);
|
||||
mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO;
|
||||
}
|
||||
|
||||
EUCJPDistributionAnalysis::EUCJPDistributionAnalysis() {
|
||||
mCharToFreqOrder = JISCharToFreqOrder;
|
||||
mTableSize = mozilla::ArrayLength(JISCharToFreqOrder);
|
||||
mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO;
|
||||
}
|
|
@ -1,201 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef CharDistribution_h__
|
||||
#define CharDistribution_h__
|
||||
|
||||
#include "nscore.h"
|
||||
|
||||
#define ENOUGH_DATA_THRESHOLD 1024
|
||||
|
||||
class CharDistributionAnalysis {
|
||||
public:
|
||||
CharDistributionAnalysis() { Reset(); }
|
||||
|
||||
// feed a block of data and do distribution analysis
|
||||
void HandleData(const char* aBuf, uint32_t aLen) {}
|
||||
|
||||
// Feed a character with known length
|
||||
void HandleOneChar(const char* aStr, uint32_t aCharLen) {
|
||||
int32_t order;
|
||||
|
||||
// we only care about 2-bytes character in our distribution analysis
|
||||
order = (aCharLen == 2) ? GetOrder(aStr) : -1;
|
||||
|
||||
if (order >= 0) {
|
||||
mTotalChars++;
|
||||
// order is valid
|
||||
if ((uint32_t)order < mTableSize) {
|
||||
if (512 > mCharToFreqOrder[order]) mFreqChars++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// return confidence base on existing data
|
||||
float GetConfidence(void);
|
||||
|
||||
// Reset analyser, clear any state
|
||||
void Reset() {
|
||||
mDone = false;
|
||||
mTotalChars = 0;
|
||||
mFreqChars = 0;
|
||||
mDataThreshold = 0;
|
||||
}
|
||||
|
||||
// It is not necessary to receive all data to draw conclusion. For charset
|
||||
// detection,
|
||||
// certain amount of data is enough
|
||||
bool GotEnoughData() { return mTotalChars > ENOUGH_DATA_THRESHOLD; }
|
||||
|
||||
protected:
|
||||
// we do not handle character base on its original encoding string, but
|
||||
// convert this encoding string to a number, here called order.
|
||||
// This allow multiple encoding of a language to share one frequency table
|
||||
virtual int32_t GetOrder(const char* str) { return -1; }
|
||||
|
||||
// If this flag is set to true, detection is done and conclusion has been made
|
||||
bool mDone;
|
||||
|
||||
// The number of characters whose frequency order is less than 512
|
||||
uint32_t mFreqChars;
|
||||
|
||||
// Total character encounted.
|
||||
uint32_t mTotalChars;
|
||||
|
||||
// Number of hi-byte characters needed to trigger detection
|
||||
uint32_t mDataThreshold;
|
||||
|
||||
// Mapping table to get frequency order from char order (get from GetOrder())
|
||||
const int16_t* mCharToFreqOrder;
|
||||
|
||||
// Size of above table
|
||||
uint32_t mTableSize;
|
||||
|
||||
// This is a constant value varies from language to language, it is used in
|
||||
// calculating confidence. See my paper for further detail.
|
||||
float mTypicalDistributionRatio;
|
||||
};
|
||||
|
||||
class EUCTWDistributionAnalysis : public CharDistributionAnalysis {
|
||||
public:
|
||||
EUCTWDistributionAnalysis();
|
||||
|
||||
protected:
|
||||
// for euc-TW encoding, we are interested
|
||||
// first byte range: 0xc4 -- 0xfe
|
||||
// second byte range: 0xa1 -- 0xfe
|
||||
// no validation needed here. State machine has done that
|
||||
int32_t GetOrder(const char* str) override {
|
||||
if ((unsigned char)*str >= (unsigned char)0xc4)
|
||||
return 94 * ((unsigned char)str[0] - (unsigned char)0xc4) +
|
||||
(unsigned char)str[1] - (unsigned char)0xa1;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
class EUCKRDistributionAnalysis : public CharDistributionAnalysis {
|
||||
public:
|
||||
EUCKRDistributionAnalysis();
|
||||
|
||||
protected:
|
||||
// for euc-KR encoding, we are interested
|
||||
// first byte range: 0xb0 -- 0xfe
|
||||
// second byte range: 0xa1 -- 0xfe
|
||||
// no validation needed here. State machine has done that
|
||||
int32_t GetOrder(const char* str) override {
|
||||
if ((unsigned char)*str >= (unsigned char)0xb0)
|
||||
return 94 * ((unsigned char)str[0] - (unsigned char)0xb0) +
|
||||
(unsigned char)str[1] - (unsigned char)0xa1;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
class GB2312DistributionAnalysis : public CharDistributionAnalysis {
|
||||
public:
|
||||
GB2312DistributionAnalysis();
|
||||
|
||||
protected:
|
||||
// for GB2312 encoding, we are interested
|
||||
// first byte range: 0xb0 -- 0xfe
|
||||
// second byte range: 0xa1 -- 0xfe
|
||||
// no validation needed here. State machine has done that
|
||||
int32_t GetOrder(const char* str) override {
|
||||
if ((unsigned char)*str >= (unsigned char)0xb0 &&
|
||||
(unsigned char)str[1] >= (unsigned char)0xa1)
|
||||
return 94 * ((unsigned char)str[0] - (unsigned char)0xb0) +
|
||||
(unsigned char)str[1] - (unsigned char)0xa1;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
class Big5DistributionAnalysis : public CharDistributionAnalysis {
|
||||
public:
|
||||
Big5DistributionAnalysis();
|
||||
|
||||
protected:
|
||||
// for big5 encoding, we are interested
|
||||
// first byte range: 0xa4 -- 0xfe
|
||||
// second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
|
||||
// no validation needed here. State machine has done that
|
||||
int32_t GetOrder(const char* str) override {
|
||||
if ((unsigned char)*str >= (unsigned char)0xa4)
|
||||
if ((unsigned char)str[1] >= (unsigned char)0xa1)
|
||||
return 157 * ((unsigned char)str[0] - (unsigned char)0xa4) +
|
||||
(unsigned char)str[1] - (unsigned char)0xa1 + 63;
|
||||
else
|
||||
return 157 * ((unsigned char)str[0] - (unsigned char)0xa4) +
|
||||
(unsigned char)str[1] - (unsigned char)0x40;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
class SJISDistributionAnalysis : public CharDistributionAnalysis {
|
||||
public:
|
||||
SJISDistributionAnalysis();
|
||||
|
||||
protected:
|
||||
// for sjis encoding, we are interested
|
||||
// first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
|
||||
// second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
|
||||
// no validation needed here. State machine has done that
|
||||
int32_t GetOrder(const char* str) override {
|
||||
int32_t order;
|
||||
if ((unsigned char)*str >= (unsigned char)0x81 &&
|
||||
(unsigned char)*str <= (unsigned char)0x9f)
|
||||
order = 188 * ((unsigned char)str[0] - (unsigned char)0x81);
|
||||
else if ((unsigned char)*str >= (unsigned char)0xe0 &&
|
||||
(unsigned char)*str <= (unsigned char)0xef)
|
||||
order = 188 * ((unsigned char)str[0] - (unsigned char)0xe0 + 31);
|
||||
else
|
||||
return -1;
|
||||
order += (unsigned char)*(str + 1) - 0x40;
|
||||
if ((unsigned char)str[1] > (unsigned char)0x7f) order--;
|
||||
return order;
|
||||
}
|
||||
};
|
||||
|
||||
class EUCJPDistributionAnalysis : public CharDistributionAnalysis {
|
||||
public:
|
||||
EUCJPDistributionAnalysis();
|
||||
|
||||
protected:
|
||||
// for euc-JP encoding, we are interested
|
||||
// first byte range: 0xa0 -- 0xfe
|
||||
// second byte range: 0xa1 -- 0xfe
|
||||
// no validation needed here. State machine has done that
|
||||
int32_t GetOrder(const char* str) override {
|
||||
if ((unsigned char)*str >= (unsigned char)0xa0)
|
||||
return 94 * ((unsigned char)str[0] - (unsigned char)0xa1) +
|
||||
(unsigned char)str[1] - (unsigned char)0xa1;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
#endif // CharDistribution_h__
|
|
@ -1,554 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
//Sampling from about 20M text materials include literature and computer technology
|
||||
|
||||
// Japanese frequency table, applied to both S-JIS and EUC-JP
|
||||
//They are sorted in order.
|
||||
|
||||
/******************************************************************************
|
||||
* 128 --> 0.77094
|
||||
* 256 --> 0.85710
|
||||
* 512 --> 0.92635
|
||||
* 1024 --> 0.97130
|
||||
* 2048 --> 0.99431
|
||||
*
|
||||
* Idea Distribution Ratio = 0.92635 / (1-0.92635) = 12.58
|
||||
* Random Distribution Ration = 512 / (2965+62+83+86-512) = 0.191
|
||||
*
|
||||
* Typical Distribution Ratio, 25% of IDR
|
||||
*****************************************************************************/
|
||||
|
||||
#define JIS_TYPICAL_DISTRIBUTION_RATIO (float) 3.0
|
||||
|
||||
// Char to FreqOrder table
|
||||
static const int16_t JISCharToFreqOrder[] =
|
||||
{
|
||||
40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, // 16
|
||||
3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, // 32
|
||||
1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, // 48
|
||||
2042,1061,1062, 48, 49, 44, 45, 433, 434,1040,1041, 996, 787,2997,1255,4305, // 64
|
||||
2108,4609,1684,1648,5073,5074,5075,5076,5077,5078,3687,5079,4610,5080,3927,3928, // 80
|
||||
5081,3296,3432, 290,2285,1471,2187,5082,2580,2825,1303,2140,1739,1445,2691,3375, // 96
|
||||
1691,3297,4306,4307,4611, 452,3376,1182,2713,3688,3069,4308,5083,5084,5085,5086, // 112
|
||||
5087,5088,5089,5090,5091,5092,5093,5094,5095,5096,5097,5098,5099,5100,5101,5102, // 128
|
||||
5103,5104,5105,5106,5107,5108,5109,5110,5111,5112,4097,5113,5114,5115,5116,5117, // 144
|
||||
5118,5119,5120,5121,5122,5123,5124,5125,5126,5127,5128,5129,5130,5131,5132,5133, // 160
|
||||
5134,5135,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145,5146,5147,5148,5149, // 176
|
||||
5150,5151,5152,4612,5153,5154,5155,5156,5157,5158,5159,5160,5161,5162,5163,5164, // 192
|
||||
5165,5166,5167,5168,5169,5170,5171,5172,5173,5174,5175,1472, 598, 618, 820,1205, // 208
|
||||
1309,1412,1858,1307,1692,5176,5177,5178,5179,5180,5181,5182,1142,1452,1234,1172, // 224
|
||||
1875,2043,2149,1793,1382,2973, 925,2404,1067,1241, 960,1377,2935,1491, 919,1217, // 240
|
||||
1865,2030,1406,1499,2749,4098,5183,5184,5185,5186,5187,5188,2561,4099,3117,1804, // 256
|
||||
2049,3689,4309,3513,1663,5189,3166,3118,3298,1587,1561,3433,5190,3119,1625,2998, // 272
|
||||
3299,4613,1766,3690,2786,4614,5191,5192,5193,5194,2161, 26,3377, 2,3929, 20, // 288
|
||||
3691, 47,4100, 50, 17, 16, 35, 268, 27, 243, 42, 155, 24, 154, 29, 184, // 304
|
||||
4, 91, 14, 92, 53, 396, 33, 289, 9, 37, 64, 620, 21, 39, 321, 5, // 320
|
||||
12, 11, 52, 13, 3, 208, 138, 0, 7, 60, 526, 141, 151,1069, 181, 275, // 336
|
||||
1591, 83, 132,1475, 126, 331, 829, 15, 69, 160, 59, 22, 157, 55,1079, 312, // 352
|
||||
109, 38, 23, 25, 10, 19, 79,5195, 61, 382,1124, 8, 30,5196,5197,5198, // 368
|
||||
5199,5200,5201,5202,5203,5204,5205,5206, 89, 62, 74, 34,2416, 112, 139, 196, // 384
|
||||
271, 149, 84, 607, 131, 765, 46, 88, 153, 683, 76, 874, 101, 258, 57, 80, // 400
|
||||
32, 364, 121,1508, 169,1547, 68, 235, 145,2999, 41, 360,3027, 70, 63, 31, // 416
|
||||
43, 259, 262,1383, 99, 533, 194, 66, 93, 846, 217, 192, 56, 106, 58, 565, // 432
|
||||
280, 272, 311, 256, 146, 82, 308, 71, 100, 128, 214, 655, 110, 261, 104,1140, // 448
|
||||
54, 51, 36, 87, 67,3070, 185,2618,2936,2020, 28,1066,2390,2059,5207,5208, // 464
|
||||
5209,5210,5211,5212,5213,5214,5215,5216,4615,5217,5218,5219,5220,5221,5222,5223, // 480
|
||||
5224,5225,5226,5227,5228,5229,5230,5231,5232,5233,5234,5235,5236,3514,5237,5238, // 496
|
||||
5239,5240,5241,5242,5243,5244,2297,2031,4616,4310,3692,5245,3071,5246,3598,5247, // 512
|
||||
4617,3231,3515,5248,4101,4311,4618,3808,4312,4102,5249,4103,4104,3599,5250,5251, // 528
|
||||
5252,5253,5254,5255,5256,5257,5258,5259,5260,5261,5262,5263,5264,5265,5266,5267, // 544
|
||||
5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278,5279,5280,5281,5282,5283, // 560
|
||||
5284,5285,5286,5287,5288,5289,5290,5291,5292,5293,5294,5295,5296,5297,5298,5299, // 576
|
||||
5300,5301,5302,5303,5304,5305,5306,5307,5308,5309,5310,5311,5312,5313,5314,5315, // 592
|
||||
5316,5317,5318,5319,5320,5321,5322,5323,5324,5325,5326,5327,5328,5329,5330,5331, // 608
|
||||
5332,5333,5334,5335,5336,5337,5338,5339,5340,5341,5342,5343,5344,5345,5346,5347, // 624
|
||||
5348,5349,5350,5351,5352,5353,5354,5355,5356,5357,5358,5359,5360,5361,5362,5363, // 640
|
||||
5364,5365,5366,5367,5368,5369,5370,5371,5372,5373,5374,5375,5376,5377,5378,5379, // 656
|
||||
5380,5381, 363, 642,2787,2878,2788,2789,2316,3232,2317,3434,2011, 165,1942,3930, // 672
|
||||
3931,3932,3933,5382,4619,5383,4620,5384,5385,5386,5387,5388,5389,5390,5391,5392, // 688
|
||||
5393,5394,5395,5396,5397,5398,5399,5400,5401,5402,5403,5404,5405,5406,5407,5408, // 704
|
||||
5409,5410,5411,5412,5413,5414,5415,5416,5417,5418,5419,5420,5421,5422,5423,5424, // 720
|
||||
5425,5426,5427,5428,5429,5430,5431,5432,5433,5434,5435,5436,5437,5438,5439,5440, // 736
|
||||
5441,5442,5443,5444,5445,5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456, // 752
|
||||
5457,5458,5459,5460,5461,5462,5463,5464,5465,5466,5467,5468,5469,5470,5471,5472, // 768
|
||||
5473,5474,5475,5476,5477,5478,5479,5480,5481,5482,5483,5484,5485,5486,5487,5488, // 784
|
||||
5489,5490,5491,5492,5493,5494,5495,5496,5497,5498,5499,5500,5501,5502,5503,5504, // 800
|
||||
5505,5506,5507,5508,5509,5510,5511,5512,5513,5514,5515,5516,5517,5518,5519,5520, // 816
|
||||
5521,5522,5523,5524,5525,5526,5527,5528,5529,5530,5531,5532,5533,5534,5535,5536, // 832
|
||||
5537,5538,5539,5540,5541,5542,5543,5544,5545,5546,5547,5548,5549,5550,5551,5552, // 848
|
||||
5553,5554,5555,5556,5557,5558,5559,5560,5561,5562,5563,5564,5565,5566,5567,5568, // 864
|
||||
5569,5570,5571,5572,5573,5574,5575,5576,5577,5578,5579,5580,5581,5582,5583,5584, // 880
|
||||
5585,5586,5587,5588,5589,5590,5591,5592,5593,5594,5595,5596,5597,5598,5599,5600, // 896
|
||||
5601,5602,5603,5604,5605,5606,5607,5608,5609,5610,5611,5612,5613,5614,5615,5616, // 912
|
||||
5617,5618,5619,5620,5621,5622,5623,5624,5625,5626,5627,5628,5629,5630,5631,5632, // 928
|
||||
5633,5634,5635,5636,5637,5638,5639,5640,5641,5642,5643,5644,5645,5646,5647,5648, // 944
|
||||
5649,5650,5651,5652,5653,5654,5655,5656,5657,5658,5659,5660,5661,5662,5663,5664, // 960
|
||||
5665,5666,5667,5668,5669,5670,5671,5672,5673,5674,5675,5676,5677,5678,5679,5680, // 976
|
||||
5681,5682,5683,5684,5685,5686,5687,5688,5689,5690,5691,5692,5693,5694,5695,5696, // 992
|
||||
5697,5698,5699,5700,5701,5702,5703,5704,5705,5706,5707,5708,5709,5710,5711,5712, // 1008
|
||||
5713,5714,5715,5716,5717,5718,5719,5720,5721,5722,5723,5724,5725,5726,5727,5728, // 1024
|
||||
5729,5730,5731,5732,5733,5734,5735,5736,5737,5738,5739,5740,5741,5742,5743,5744, // 1040
|
||||
5745,5746,5747,5748,5749,5750,5751,5752,5753,5754,5755,5756,5757,5758,5759,5760, // 1056
|
||||
5761,5762,5763,5764,5765,5766,5767,5768,5769,5770,5771,5772,5773,5774,5775,5776, // 1072
|
||||
5777,5778,5779,5780,5781,5782,5783,5784,5785,5786,5787,5788,5789,5790,5791,5792, // 1088
|
||||
5793,5794,5795,5796,5797,5798,5799,5800,5801,5802,5803,5804,5805,5806,5807,5808, // 1104
|
||||
5809,5810,5811,5812,5813,5814,5815,5816,5817,5818,5819,5820,5821,5822,5823,5824, // 1120
|
||||
5825,5826,5827,5828,5829,5830,5831,5832,5833,5834,5835,5836,5837,5838,5839,5840, // 1136
|
||||
5841,5842,5843,5844,5845,5846,5847,5848,5849,5850,5851,5852,5853,5854,5855,5856, // 1152
|
||||
5857,5858,5859,5860,5861,5862,5863,5864,5865,5866,5867,5868,5869,5870,5871,5872, // 1168
|
||||
5873,5874,5875,5876,5877,5878,5879,5880,5881,5882,5883,5884,5885,5886,5887,5888, // 1184
|
||||
5889,5890,5891,5892,5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904, // 1200
|
||||
5905,5906,5907,5908,5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920, // 1216
|
||||
5921,5922,5923,5924,5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,5936, // 1232
|
||||
5937,5938,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,5952, // 1248
|
||||
5953,5954,5955,5956,5957,5958,5959,5960,5961,5962,5963,5964,5965,5966,5967,5968, // 1264
|
||||
5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984, // 1280
|
||||
5985,5986,5987,5988,5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999,6000, // 1296
|
||||
6001,6002,6003,6004,6005,6006,6007,6008,6009,6010,6011,6012,6013,6014,6015,6016, // 1312
|
||||
6017,6018,6019,6020,6021,6022,6023,6024,6025,6026,6027,6028,6029,6030,6031,6032, // 1328
|
||||
6033,6034,6035,6036,6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048, // 1344
|
||||
6049,6050,6051,6052,6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064, // 1360
|
||||
6065,6066,6067,6068,6069,6070,6071,6072,6073,6074,6075,6076,6077,6078,6079,6080, // 1376
|
||||
6081,6082,6083,6084,6085,6086,6087,6088,6089,6090,6091,6092,6093,6094,6095,6096, // 1392
|
||||
6097,6098,6099,6100,6101,6102,6103,6104,6105,6106,6107,6108,6109,6110,6111,6112, // 1408
|
||||
6113,6114,2044,2060,4621, 997,1235, 473,1186,4622, 920,3378,6115,6116, 379,1108, // 1424
|
||||
4313,2657,2735,3934,6117,3809, 636,3233, 573,1026,3693,3435,2974,3300,2298,4105, // 1440
|
||||
854,2937,2463, 393,2581,2417, 539, 752,1280,2750,2480, 140,1161, 440, 708,1569, // 1456
|
||||
665,2497,1746,1291,1523,3000, 164,1603, 847,1331, 537,1997, 486, 508,1693,2418, // 1472
|
||||
1970,2227, 878,1220, 299,1030, 969, 652,2751, 624,1137,3301,2619, 65,3302,2045, // 1488
|
||||
1761,1859,3120,1930,3694,3516, 663,1767, 852, 835,3695, 269, 767,2826,2339,1305, // 1504
|
||||
896,1150, 770,1616,6118, 506,1502,2075,1012,2519, 775,2520,2975,2340,2938,4314, // 1520
|
||||
3028,2086,1224,1943,2286,6119,3072,4315,2240,1273,1987,3935,1557, 175, 597, 985, // 1536
|
||||
3517,2419,2521,1416,3029, 585, 938,1931,1007,1052,1932,1685,6120,3379,4316,4623, // 1552
|
||||
804, 599,3121,1333,2128,2539,1159,1554,2032,3810, 687,2033,2904, 952, 675,1467, // 1568
|
||||
3436,6121,2241,1096,1786,2440,1543,1924, 980,1813,2228, 781,2692,1879, 728,1918, // 1584
|
||||
3696,4624, 548,1950,4625,1809,1088,1356,3303,2522,1944, 502, 972, 373, 513,2827, // 1600
|
||||
586,2377,2391,1003,1976,1631,6122,2464,1084, 648,1776,4626,2141, 324, 962,2012, // 1616
|
||||
2177,2076,1384, 742,2178,1448,1173,1810, 222, 102, 301, 445, 125,2420, 662,2498, // 1632
|
||||
277, 200,1476,1165,1068, 224,2562,1378,1446, 450,1880, 659, 791, 582,4627,2939, // 1648
|
||||
3936,1516,1274, 555,2099,3697,1020,1389,1526,3380,1762,1723,1787,2229, 412,2114, // 1664
|
||||
1900,2392,3518, 512,2597, 427,1925,2341,3122,1653,1686,2465,2499, 697, 330, 273, // 1680
|
||||
380,2162, 951, 832, 780, 991,1301,3073, 965,2270,3519, 668,2523,2636,1286, 535, // 1696
|
||||
1407, 518, 671, 957,2658,2378, 267, 611,2197,3030,6123, 248,2299, 967,1799,2356, // 1712
|
||||
850,1418,3437,1876,1256,1480,2828,1718,6124,6125,1755,1664,2405,6126,4628,2879, // 1728
|
||||
2829, 499,2179, 676,4629, 557,2329,2214,2090, 325,3234, 464, 811,3001, 992,2342, // 1744
|
||||
2481,1232,1469, 303,2242, 466,1070,2163, 603,1777,2091,4630,2752,4631,2714, 322, // 1760
|
||||
2659,1964,1768, 481,2188,1463,2330,2857,3600,2092,3031,2421,4632,2318,2070,1849, // 1776
|
||||
2598,4633,1302,2254,1668,1701,2422,3811,2905,3032,3123,2046,4106,1763,1694,4634, // 1792
|
||||
1604, 943,1724,1454, 917, 868,2215,1169,2940, 552,1145,1800,1228,1823,1955, 316, // 1808
|
||||
1080,2510, 361,1807,2830,4107,2660,3381,1346,1423,1134,4108,6127, 541,1263,1229, // 1824
|
||||
1148,2540, 545, 465,1833,2880,3438,1901,3074,2482, 816,3937, 713,1788,2500, 122, // 1840
|
||||
1575, 195,1451,2501,1111,6128, 859, 374,1225,2243,2483,4317, 390,1033,3439,3075, // 1856
|
||||
2524,1687, 266, 793,1440,2599, 946, 779, 802, 507, 897,1081, 528,2189,1292, 711, // 1872
|
||||
1866,1725,1167,1640, 753, 398,2661,1053, 246, 348,4318, 137,1024,3440,1600,2077, // 1888
|
||||
2129, 825,4319, 698, 238, 521, 187,2300,1157,2423,1641,1605,1464,1610,1097,2541, // 1904
|
||||
1260,1436, 759,2255,1814,2150, 705,3235, 409,2563,3304, 561,3033,2005,2564, 726, // 1920
|
||||
1956,2343,3698,4109, 949,3812,3813,3520,1669, 653,1379,2525, 881,2198, 632,2256, // 1936
|
||||
1027, 778,1074, 733,1957, 514,1481,2466, 554,2180, 702,3938,1606,1017,1398,6129, // 1952
|
||||
1380,3521, 921, 993,1313, 594, 449,1489,1617,1166, 768,1426,1360, 495,1794,3601, // 1968
|
||||
1177,3602,1170,4320,2344, 476, 425,3167,4635,3168,1424, 401,2662,1171,3382,1998, // 1984
|
||||
1089,4110, 477,3169, 474,6130,1909, 596,2831,1842, 494, 693,1051,1028,1207,3076, // 2000
|
||||
606,2115, 727,2790,1473,1115, 743,3522, 630, 805,1532,4321,2021, 366,1057, 838, // 2016
|
||||
684,1114,2142,4322,2050,1492,1892,1808,2271,3814,2424,1971,1447,1373,3305,1090, // 2032
|
||||
1536,3939,3523,3306,1455,2199, 336, 369,2331,1035, 584,2393, 902, 718,2600,6131, // 2048
|
||||
2753, 463,2151,1149,1611,2467, 715,1308,3124,1268, 343,1413,3236,1517,1347,2663, // 2064
|
||||
2093,3940,2022,1131,1553,2100,2941,1427,3441,2942,1323,2484,6132,1980, 872,2368, // 2080
|
||||
2441,2943, 320,2369,2116,1082, 679,1933,3941,2791,3815, 625,1143,2023, 422,2200, // 2096
|
||||
3816,6133, 730,1695, 356,2257,1626,2301,2858,2637,1627,1778, 937, 883,2906,2693, // 2112
|
||||
3002,1769,1086, 400,1063,1325,3307,2792,4111,3077, 456,2345,1046, 747,6134,1524, // 2128
|
||||
884,1094,3383,1474,2164,1059, 974,1688,2181,2258,1047, 345,1665,1187, 358, 875, // 2144
|
||||
3170, 305, 660,3524,2190,1334,1135,3171,1540,1649,2542,1527, 927, 968,2793, 885, // 2160
|
||||
1972,1850, 482, 500,2638,1218,1109,1085,2543,1654,2034, 876, 78,2287,1482,1277, // 2176
|
||||
861,1675,1083,1779, 724,2754, 454, 397,1132,1612,2332, 893, 672,1237, 257,2259, // 2192
|
||||
2370, 135,3384, 337,2244, 547, 352, 340, 709,2485,1400, 788,1138,2511, 540, 772, // 2208
|
||||
1682,2260,2272,2544,2013,1843,1902,4636,1999,1562,2288,4637,2201,1403,1533, 407, // 2224
|
||||
576,3308,1254,2071, 978,3385, 170, 136,1201,3125,2664,3172,2394, 213, 912, 873, // 2240
|
||||
3603,1713,2202, 699,3604,3699, 813,3442, 493, 531,1054, 468,2907,1483, 304, 281, // 2256
|
||||
4112,1726,1252,2094, 339,2319,2130,2639, 756,1563,2944, 748, 571,2976,1588,2425, // 2272
|
||||
2715,1851,1460,2426,1528,1392,1973,3237, 288,3309, 685,3386, 296, 892,2716,2216, // 2288
|
||||
1570,2245, 722,1747,2217, 905,3238,1103,6135,1893,1441,1965, 251,1805,2371,3700, // 2304
|
||||
2601,1919,1078, 75,2182,1509,1592,1270,2640,4638,2152,6136,3310,3817, 524, 706, // 2320
|
||||
1075, 292,3818,1756,2602, 317, 98,3173,3605,3525,1844,2218,3819,2502, 814, 567, // 2336
|
||||
385,2908,1534,6137, 534,1642,3239, 797,6138,1670,1529, 953,4323, 188,1071, 538, // 2352
|
||||
178, 729,3240,2109,1226,1374,2000,2357,2977, 731,2468,1116,2014,2051,6139,1261, // 2368
|
||||
1593, 803,2859,2736,3443, 556, 682, 823,1541,6140,1369,2289,1706,2794, 845, 462, // 2384
|
||||
2603,2665,1361, 387, 162,2358,1740, 739,1770,1720,1304,1401,3241,1049, 627,1571, // 2400
|
||||
2427,3526,1877,3942,1852,1500, 431,1910,1503, 677, 297,2795, 286,1433,1038,1198, // 2416
|
||||
2290,1133,1596,4113,4639,2469,1510,1484,3943,6141,2442, 108, 712,4640,2372, 866, // 2432
|
||||
3701,2755,3242,1348, 834,1945,1408,3527,2395,3243,1811, 824, 994,1179,2110,1548, // 2448
|
||||
1453, 790,3003, 690,4324,4325,2832,2909,3820,1860,3821, 225,1748, 310, 346,1780, // 2464
|
||||
2470, 821,1993,2717,2796, 828, 877,3528,2860,2471,1702,2165,2910,2486,1789, 453, // 2480
|
||||
359,2291,1676, 73,1164,1461,1127,3311, 421, 604, 314,1037, 589, 116,2487, 737, // 2496
|
||||
837,1180, 111, 244, 735,6142,2261,1861,1362, 986, 523, 418, 581,2666,3822, 103, // 2512
|
||||
855, 503,1414,1867,2488,1091, 657,1597, 979, 605,1316,4641,1021,2443,2078,2001, // 2528
|
||||
1209, 96, 587,2166,1032, 260,1072,2153, 173, 94, 226,3244, 819,2006,4642,4114, // 2544
|
||||
2203, 231,1744, 782, 97,2667, 786,3387, 887, 391, 442,2219,4326,1425,6143,2694, // 2560
|
||||
633,1544,1202, 483,2015, 592,2052,1958,2472,1655, 419, 129,4327,3444,3312,1714, // 2576
|
||||
1257,3078,4328,1518,1098, 865,1310,1019,1885,1512,1734, 469,2444, 148, 773, 436, // 2592
|
||||
1815,1868,1128,1055,4329,1245,2756,3445,2154,1934,1039,4643, 579,1238, 932,2320, // 2608
|
||||
353, 205, 801, 115,2428, 944,2321,1881, 399,2565,1211, 678, 766,3944, 335,2101, // 2624
|
||||
1459,1781,1402,3945,2737,2131,1010, 844, 981,1326,1013, 550,1816,1545,2620,1335, // 2640
|
||||
1008, 371,2881, 936,1419,1613,3529,1456,1395,2273,1834,2604,1317,2738,2503, 416, // 2656
|
||||
1643,4330, 806,1126, 229, 591,3946,1314,1981,1576,1837,1666, 347,1790, 977,3313, // 2672
|
||||
764,2861,1853, 688,2429,1920,1462, 77, 595, 415,2002,3034, 798,1192,4115,6144, // 2688
|
||||
2978,4331,3035,2695,2582,2072,2566, 430,2430,1727, 842,1396,3947,3702, 613, 377, // 2704
|
||||
278, 236,1417,3388,3314,3174, 757,1869, 107,3530,6145,1194, 623,2262, 207,1253, // 2720
|
||||
2167,3446,3948, 492,1117,1935, 536,1838,2757,1246,4332, 696,2095,2406,1393,1572, // 2736
|
||||
3175,1782, 583, 190, 253,1390,2230, 830,3126,3389, 934,3245,1703,1749,2979,1870, // 2752
|
||||
2545,1656,2204, 869,2346,4116,3176,1817, 496,1764,4644, 942,1504, 404,1903,1122, // 2768
|
||||
1580,3606,2945,1022, 515, 372,1735, 955,2431,3036,6146,2797,1110,2302,2798, 617, // 2784
|
||||
6147, 441, 762,1771,3447,3607,3608,1904, 840,3037, 86, 939,1385, 572,1370,2445, // 2800
|
||||
1336, 114,3703, 898, 294, 203,3315, 703,1583,2274, 429, 961,4333,1854,1951,3390, // 2816
|
||||
2373,3704,4334,1318,1381, 966,1911,2322,1006,1155, 309, 989, 458,2718,1795,1372, // 2832
|
||||
1203, 252,1689,1363,3177, 517,1936, 168,1490, 562, 193,3823,1042,4117,1835, 551, // 2848
|
||||
470,4645, 395, 489,3448,1871,1465,2583,2641, 417,1493, 279,1295, 511,1236,1119, // 2864
|
||||
72,1231,1982,1812,3004, 871,1564, 984,3449,1667,2696,2096,4646,2347,2833,1673, // 2880
|
||||
3609, 695,3246,2668, 807,1183,4647, 890, 388,2333,1801,1457,2911,1765,1477,1031, // 2896
|
||||
3316,3317,1278,3391,2799,2292,2526, 163,3450,4335,2669,1404,1802,6148,2323,2407, // 2912
|
||||
1584,1728,1494,1824,1269, 298, 909,3318,1034,1632, 375, 776,1683,2061, 291, 210, // 2928
|
||||
1123, 809,1249,1002,2642,3038, 206,1011,2132, 144, 975, 882,1565, 342, 667, 754, // 2944
|
||||
1442,2143,1299,2303,2062, 447, 626,2205,1221,2739,2912,1144,1214,2206,2584, 760, // 2960
|
||||
1715, 614, 950,1281,2670,2621, 810, 577,1287,2546,4648, 242,2168, 250,2643, 691, // 2976
|
||||
123,2644, 647, 313,1029, 689,1357,2946,1650, 216, 771,1339,1306, 808,2063, 549, // 2992
|
||||
913,1371,2913,2914,6149,1466,1092,1174,1196,1311,2605,2396,1783,1796,3079, 406, // 3008
|
||||
2671,2117,3949,4649, 487,1825,2220,6150,2915, 448,2348,1073,6151,2397,1707, 130, // 3024
|
||||
900,1598, 329, 176,1959,2527,1620,6152,2275,4336,3319,1983,2191,3705,3610,2155, // 3040
|
||||
3706,1912,1513,1614,6153,1988, 646, 392,2304,1589,3320,3039,1826,1239,1352,1340, // 3056
|
||||
2916, 505,2567,1709,1437,2408,2547, 906,6154,2672, 384,1458,1594,1100,1329, 710, // 3072
|
||||
423,3531,2064,2231,2622,1989,2673,1087,1882, 333, 841,3005,1296,2882,2379, 580, // 3088
|
||||
1937,1827,1293,2585, 601, 574, 249,1772,4118,2079,1120, 645, 901,1176,1690, 795, // 3104
|
||||
2207, 478,1434, 516,1190,1530, 761,2080, 930,1264, 355, 435,1552, 644,1791, 987, // 3120
|
||||
220,1364,1163,1121,1538, 306,2169,1327,1222, 546,2645, 218, 241, 610,1704,3321, // 3136
|
||||
1984,1839,1966,2528, 451,6155,2586,3707,2568, 907,3178, 254,2947, 186,1845,4650, // 3152
|
||||
745, 432,1757, 428,1633, 888,2246,2221,2489,3611,2118,1258,1265, 956,3127,1784, // 3168
|
||||
4337,2490, 319, 510, 119, 457,3612, 274,2035,2007,4651,1409,3128, 970,2758, 590, // 3184
|
||||
2800, 661,2247,4652,2008,3950,1420,1549,3080,3322,3951,1651,1375,2111, 485,2491, // 3200
|
||||
1429,1156,6156,2548,2183,1495, 831,1840,2529,2446, 501,1657, 307,1894,3247,1341, // 3216
|
||||
666, 899,2156,1539,2549,1559, 886, 349,2208,3081,2305,1736,3824,2170,2759,1014, // 3232
|
||||
1913,1386, 542,1397,2948, 490, 368, 716, 362, 159, 282,2569,1129,1658,1288,1750, // 3248
|
||||
2674, 276, 649,2016, 751,1496, 658,1818,1284,1862,2209,2087,2512,3451, 622,2834, // 3264
|
||||
376, 117,1060,2053,1208,1721,1101,1443, 247,1250,3179,1792,3952,2760,2398,3953, // 3280
|
||||
6157,2144,3708, 446,2432,1151,2570,3452,2447,2761,2835,1210,2448,3082, 424,2222, // 3296
|
||||
1251,2449,2119,2836, 504,1581,4338, 602, 817, 857,3825,2349,2306, 357,3826,1470, // 3312
|
||||
1883,2883, 255, 958, 929,2917,3248, 302,4653,1050,1271,1751,2307,1952,1430,2697, // 3328
|
||||
2719,2359, 354,3180, 777, 158,2036,4339,1659,4340,4654,2308,2949,2248,1146,2232, // 3344
|
||||
3532,2720,1696,2623,3827,6158,3129,1550,2698,1485,1297,1428, 637, 931,2721,2145, // 3360
|
||||
914,2550,2587, 81,2450, 612, 827,2646,1242,4655,1118,2884, 472,1855,3181,3533, // 3376
|
||||
3534, 569,1353,2699,1244,1758,2588,4119,2009,2762,2171,3709,1312,1531,6159,1152, // 3392
|
||||
1938, 134,1830, 471,3710,2276,1112,1535,3323,3453,3535, 982,1337,2950, 488, 826, // 3408
|
||||
674,1058,1628,4120,2017, 522,2399, 211, 568,1367,3454, 350, 293,1872,1139,3249, // 3424
|
||||
1399,1946,3006,1300,2360,3324, 588, 736,6160,2606, 744, 669,3536,3828,6161,1358, // 3440
|
||||
199, 723, 848, 933, 851,1939,1505,1514,1338,1618,1831,4656,1634,3613, 443,2740, // 3456
|
||||
3829, 717,1947, 491,1914,6162,2551,1542,4121,1025,6163,1099,1223, 198,3040,2722, // 3472
|
||||
370, 410,1905,2589, 998,1248,3182,2380, 519,1449,4122,1710, 947, 928,1153,4341, // 3488
|
||||
2277, 344,2624,1511, 615, 105, 161,1212,1076,1960,3130,2054,1926,1175,1906,2473, // 3504
|
||||
414,1873,2801,6164,2309, 315,1319,3325, 318,2018,2146,2157, 963, 631, 223,4342, // 3520
|
||||
4343,2675, 479,3711,1197,2625,3712,2676,2361,6165,4344,4123,6166,2451,3183,1886, // 3536
|
||||
2184,1674,1330,1711,1635,1506, 799, 219,3250,3083,3954,1677,3713,3326,2081,3614, // 3552
|
||||
1652,2073,4657,1147,3041,1752, 643,1961, 147,1974,3955,6167,1716,2037, 918,3007, // 3568
|
||||
1994, 120,1537, 118, 609,3184,4345, 740,3455,1219, 332,1615,3830,6168,1621,2980, // 3584
|
||||
1582, 783, 212, 553,2350,3714,1349,2433,2082,4124, 889,6169,2310,1275,1410, 973, // 3600
|
||||
166,1320,3456,1797,1215,3185,2885,1846,2590,2763,4658, 629, 822,3008, 763, 940, // 3616
|
||||
1990,2862, 439,2409,1566,1240,1622, 926,1282,1907,2764, 654,2210,1607, 327,1130, // 3632
|
||||
3956,1678,1623,6170,2434,2192, 686, 608,3831,3715, 903,3957,3042,6171,2741,1522, // 3648
|
||||
1915,1105,1555,2552,1359, 323,3251,4346,3457, 738,1354,2553,2311,2334,1828,2003, // 3664
|
||||
3832,1753,2351,1227,6172,1887,4125,1478,6173,2410,1874,1712,1847, 520,1204,2607, // 3680
|
||||
264,4659, 836,2677,2102, 600,4660,3833,2278,3084,6174,4347,3615,1342, 640, 532, // 3696
|
||||
543,2608,1888,2400,2591,1009,4348,1497, 341,1737,3616,2723,1394, 529,3252,1321, // 3712
|
||||
983,4661,1515,2120, 971,2592, 924, 287,1662,3186,4349,2700,4350,1519, 908,1948, // 3728
|
||||
2452, 156, 796,1629,1486,2223,2055, 694,4126,1259,1036,3392,1213,2249,2742,1889, // 3744
|
||||
1230,3958,1015, 910, 408, 559,3617,4662, 746, 725, 935,4663,3959,3009,1289, 563, // 3760
|
||||
867,4664,3960,1567,2981,2038,2626, 988,2263,2381,4351, 143,2374, 704,1895,6175, // 3776
|
||||
1188,3716,2088, 673,3085,2362,4352, 484,1608,1921,2765,2918, 215, 904,3618,3537, // 3792
|
||||
894, 509, 976,3043,2701,3961,4353,2837,2982, 498,6176,6177,1102,3538,1332,3393, // 3808
|
||||
1487,1636,1637, 233, 245,3962, 383, 650, 995,3044, 460,1520,1206,2352, 749,3327, // 3824
|
||||
530, 700, 389,1438,1560,1773,3963,2264, 719,2951,2724,3834, 870,1832,1644,1000, // 3840
|
||||
839,2474,3717, 197,1630,3394, 365,2886,3964,1285,2133, 734, 922, 818,1106, 732, // 3856
|
||||
480,2083,1774,3458, 923,2279,1350, 221,3086, 85,2233,2234,3835,1585,3010,2147, // 3872
|
||||
1387,1705,2382,1619,2475, 133, 239,2802,1991,1016,2084,2383, 411,2838,1113, 651, // 3888
|
||||
1985,1160,3328, 990,1863,3087,1048,1276,2647, 265,2627,1599,3253,2056, 150, 638, // 3904
|
||||
2019, 656, 853, 326,1479, 680,1439,4354,1001,1759, 413,3459,3395,2492,1431, 459, // 3920
|
||||
4355,1125,3329,2265,1953,1450,2065,2863, 849, 351,2678,3131,3254,3255,1104,1577, // 3936
|
||||
227,1351,1645,2453,2193,1421,2887, 812,2121, 634, 95,2435, 201,2312,4665,1646, // 3952
|
||||
1671,2743,1601,2554,2702,2648,2280,1315,1366,2089,3132,1573,3718,3965,1729,1189, // 3968
|
||||
328,2679,1077,1940,1136, 558,1283, 964,1195, 621,2074,1199,1743,3460,3619,1896, // 3984
|
||||
1916,1890,3836,2952,1154,2112,1064, 862, 378,3011,2066,2113,2803,1568,2839,6178, // 4000
|
||||
3088,2919,1941,1660,2004,1992,2194, 142, 707,1590,1708,1624,1922,1023,1836,1233, // 4016
|
||||
1004,2313, 789, 741,3620,6179,1609,2411,1200,4127,3719,3720,4666,2057,3721, 593, // 4032
|
||||
2840, 367,2920,1878,6180,3461,1521, 628,1168, 692,2211,2649, 300, 720,2067,2571, // 4048
|
||||
2953,3396, 959,2504,3966,3539,3462,1977, 701,6181, 954,1043, 800, 681, 183,3722, // 4064
|
||||
1803,1730,3540,4128,2103, 815,2314, 174, 467, 230,2454,1093,2134, 755,3541,3397, // 4080
|
||||
1141,1162,6182,1738,2039, 270,3256,2513,1005,1647,2185,3837, 858,1679,1897,1719, // 4096
|
||||
2954,2324,1806, 402, 670, 167,4129,1498,2158,2104, 750,6183, 915, 189,1680,1551, // 4112
|
||||
455,4356,1501,2455, 405,1095,2955, 338,1586,1266,1819, 570, 641,1324, 237,1556, // 4128
|
||||
2650,1388,3723,6184,1368,2384,1343,1978,3089,2436, 879,3724, 792,1191, 758,3012, // 4144
|
||||
1411,2135,1322,4357, 240,4667,1848,3725,1574,6185, 420,3045,1546,1391, 714,4358, // 4160
|
||||
1967, 941,1864, 863, 664, 426, 560,1731,2680,1785,2864,1949,2363, 403,3330,1415, // 4176
|
||||
1279,2136,1697,2335, 204, 721,2097,3838, 90,6186,2085,2505, 191,3967, 124,2148, // 4192
|
||||
1376,1798,1178,1107,1898,1405, 860,4359,1243,1272,2375,2983,1558,2456,1638, 113, // 4208
|
||||
3621, 578,1923,2609, 880, 386,4130, 784,2186,2266,1422,2956,2172,1722, 497, 263, // 4224
|
||||
2514,1267,2412,2610, 177,2703,3542, 774,1927,1344, 616,1432,1595,1018, 172,4360, // 4240
|
||||
2325, 911,4361, 438,1468,3622, 794,3968,2024,2173,1681,1829,2957, 945, 895,3090, // 4256
|
||||
575,2212,2476, 475,2401,2681, 785,2744,1745,2293,2555,1975,3133,2865, 394,4668, // 4272
|
||||
3839, 635,4131, 639, 202,1507,2195,2766,1345,1435,2572,3726,1908,1184,1181,2457, // 4288
|
||||
3727,3134,4362, 843,2611, 437, 916,4669, 234, 769,1884,3046,3047,3623, 833,6187, // 4304
|
||||
1639,2250,2402,1355,1185,2010,2047, 999, 525,1732,1290,1488,2612, 948,1578,3728, // 4320
|
||||
2413,2477,1216,2725,2159, 334,3840,1328,3624,2921,1525,4132, 564,1056, 891,4363, // 4336
|
||||
1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, // 4352
|
||||
2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, // 4368 //last 512
|
||||
|
||||
/***************************************************************************************
|
||||
*Everything below is of no interest for detection purpose *
|
||||
***************************************************************************************
|
||||
|
||||
2138,2122,3730,2888,1995,1820,1044,6190,6191,6192,6193,6194,6195,6196,6197,6198, // 4384
|
||||
6199,6200,6201,6202,6203,6204,6205,4670,6206,6207,6208,6209,6210,6211,6212,6213, // 4400
|
||||
6214,6215,6216,6217,6218,6219,6220,6221,6222,6223,6224,6225,6226,6227,6228,6229, // 4416
|
||||
6230,6231,6232,6233,6234,6235,6236,6237,3187,6238,6239,3969,6240,6241,6242,6243, // 4432
|
||||
6244,4671,6245,6246,4672,6247,6248,4133,6249,6250,4364,6251,2923,2556,2613,4673, // 4448
|
||||
4365,3970,6252,6253,6254,6255,4674,6256,6257,6258,2768,2353,4366,4675,4676,3188, // 4464
|
||||
4367,3463,6259,4134,4677,4678,6260,2267,6261,3842,3332,4368,3543,6262,6263,6264, // 4480
|
||||
3013,1954,1928,4135,4679,6265,6266,2478,3091,6267,4680,4369,6268,6269,1699,6270, // 4496
|
||||
3544,4136,4681,6271,4137,6272,4370,2804,6273,6274,2593,3971,3972,4682,6275,2236, // 4512
|
||||
4683,6276,6277,4684,6278,6279,4138,3973,4685,6280,6281,3258,6282,6283,6284,6285, // 4528
|
||||
3974,4686,2841,3975,6286,6287,3545,6288,6289,4139,4687,4140,6290,4141,6291,4142, // 4544
|
||||
6292,6293,3333,6294,6295,6296,4371,6297,3399,6298,6299,4372,3976,6300,6301,6302, // 4560
|
||||
4373,6303,6304,3843,3731,6305,4688,4374,6306,6307,3259,2294,6308,3732,2530,4143, // 4576
|
||||
6309,4689,6310,6311,6312,3048,6313,6314,4690,3733,2237,6315,6316,2282,3334,6317, // 4592
|
||||
6318,3844,6319,6320,4691,6321,3400,4692,6322,4693,6323,3049,6324,4375,6325,3977, // 4608
|
||||
6326,6327,6328,3546,6329,4694,3335,6330,4695,4696,6331,6332,6333,6334,4376,3978, // 4624
|
||||
6335,4697,3979,4144,6336,3980,4698,6337,6338,6339,6340,6341,4699,4700,4701,6342, // 4640
|
||||
6343,4702,6344,6345,4703,6346,6347,4704,6348,4705,4706,3135,6349,4707,6350,4708, // 4656
|
||||
6351,4377,6352,4709,3734,4145,6353,2506,4710,3189,6354,3050,4711,3981,6355,3547, // 4672
|
||||
3014,4146,4378,3735,2651,3845,3260,3136,2224,1986,6356,3401,6357,4712,2594,3627, // 4688
|
||||
3137,2573,3736,3982,4713,3628,4714,4715,2682,3629,4716,6358,3630,4379,3631,6359, // 4704
|
||||
6360,6361,3983,6362,6363,6364,6365,4147,3846,4717,6366,6367,3737,2842,6368,4718, // 4720
|
||||
2628,6369,3261,6370,2386,6371,6372,3738,3984,4719,3464,4720,3402,6373,2924,3336, // 4736
|
||||
4148,2866,6374,2805,3262,4380,2704,2069,2531,3138,2806,2984,6375,2769,6376,4721, // 4752
|
||||
4722,3403,6377,6378,3548,6379,6380,2705,3092,1979,4149,2629,3337,2889,6381,3338, // 4768
|
||||
4150,2557,3339,4381,6382,3190,3263,3739,6383,4151,4723,4152,2558,2574,3404,3191, // 4784
|
||||
6384,6385,4153,6386,4724,4382,6387,6388,4383,6389,6390,4154,6391,4725,3985,6392, // 4800
|
||||
3847,4155,6393,6394,6395,6396,6397,3465,6398,4384,6399,6400,6401,6402,6403,6404, // 4816
|
||||
4156,6405,6406,6407,6408,2123,6409,6410,2326,3192,4726,6411,6412,6413,6414,4385, // 4832
|
||||
4157,6415,6416,4158,6417,3093,3848,6418,3986,6419,6420,3849,6421,6422,6423,4159, // 4848
|
||||
6424,6425,4160,6426,3740,6427,6428,6429,6430,3987,6431,4727,6432,2238,6433,6434, // 4864
|
||||
4386,3988,6435,6436,3632,6437,6438,2843,6439,6440,6441,6442,3633,6443,2958,6444, // 4880
|
||||
6445,3466,6446,2364,4387,3850,6447,4388,2959,3340,6448,3851,6449,4728,6450,6451, // 4896
|
||||
3264,4729,6452,3193,6453,4389,4390,2706,3341,4730,6454,3139,6455,3194,6456,3051, // 4912
|
||||
2124,3852,1602,4391,4161,3853,1158,3854,4162,3989,4392,3990,4731,4732,4393,2040, // 4928
|
||||
4163,4394,3265,6457,2807,3467,3855,6458,6459,6460,3991,3468,4733,4734,6461,3140, // 4944
|
||||
2960,6462,4735,6463,6464,6465,6466,4736,4737,4738,4739,6467,6468,4164,2403,3856, // 4960
|
||||
6469,6470,2770,2844,6471,4740,6472,6473,6474,6475,6476,6477,6478,3195,6479,4741, // 4976
|
||||
4395,6480,2867,6481,4742,2808,6482,2493,4165,6483,6484,6485,6486,2295,4743,6487, // 4992
|
||||
6488,6489,3634,6490,6491,6492,6493,6494,6495,6496,2985,4744,6497,6498,4745,6499, // 5008
|
||||
6500,2925,3141,4166,6501,6502,4746,6503,6504,4747,6505,6506,6507,2890,6508,6509, // 5024
|
||||
6510,6511,6512,6513,6514,6515,6516,6517,6518,6519,3469,4167,6520,6521,6522,4748, // 5040
|
||||
4396,3741,4397,4749,4398,3342,2125,4750,6523,4751,4752,4753,3052,6524,2961,4168, // 5056
|
||||
6525,4754,6526,4755,4399,2926,4169,6527,3857,6528,4400,4170,6529,4171,6530,6531, // 5072
|
||||
2595,6532,6533,6534,6535,3635,6536,6537,6538,6539,6540,6541,6542,4756,6543,6544, // 5088
|
||||
6545,6546,6547,6548,4401,6549,6550,6551,6552,4402,3405,4757,4403,6553,6554,6555, // 5104
|
||||
4172,3742,6556,6557,6558,3992,3636,6559,6560,3053,2726,6561,3549,4173,3054,4404, // 5120
|
||||
6562,6563,3993,4405,3266,3550,2809,4406,6564,6565,6566,4758,4759,6567,3743,6568, // 5136
|
||||
4760,3744,4761,3470,6569,6570,6571,4407,6572,3745,4174,6573,4175,2810,4176,3196, // 5152
|
||||
4762,6574,4177,6575,6576,2494,2891,3551,6577,6578,3471,6579,4408,6580,3015,3197, // 5168
|
||||
6581,3343,2532,3994,3858,6582,3094,3406,4409,6583,2892,4178,4763,4410,3016,4411, // 5184
|
||||
6584,3995,3142,3017,2683,6585,4179,6586,6587,4764,4412,6588,6589,4413,6590,2986, // 5200
|
||||
6591,2962,3552,6592,2963,3472,6593,6594,4180,4765,6595,6596,2225,3267,4414,6597, // 5216
|
||||
3407,3637,4766,6598,6599,3198,6600,4415,6601,3859,3199,6602,3473,4767,2811,4416, // 5232
|
||||
1856,3268,3200,2575,3996,3997,3201,4417,6603,3095,2927,6604,3143,6605,2268,6606, // 5248
|
||||
3998,3860,3096,2771,6607,6608,3638,2495,4768,6609,3861,6610,3269,2745,4769,4181, // 5264
|
||||
3553,6611,2845,3270,6612,6613,6614,3862,6615,6616,4770,4771,6617,3474,3999,4418, // 5280
|
||||
4419,6618,3639,3344,6619,4772,4182,6620,2126,6621,6622,6623,4420,4773,6624,3018, // 5296
|
||||
6625,4774,3554,6626,4183,2025,3746,6627,4184,2707,6628,4421,4422,3097,1775,4185, // 5312
|
||||
3555,6629,6630,2868,6631,6632,4423,6633,6634,4424,2414,2533,2928,6635,4186,2387, // 5328
|
||||
6636,4775,6637,4187,6638,1891,4425,3202,3203,6639,6640,4776,6641,3345,6642,6643, // 5344
|
||||
3640,6644,3475,3346,3641,4000,6645,3144,6646,3098,2812,4188,3642,3204,6647,3863, // 5360
|
||||
3476,6648,3864,6649,4426,4001,6650,6651,6652,2576,6653,4189,4777,6654,6655,6656, // 5376
|
||||
2846,6657,3477,3205,4002,6658,4003,6659,3347,2252,6660,6661,6662,4778,6663,6664, // 5392
|
||||
6665,6666,6667,6668,6669,4779,4780,2048,6670,3478,3099,6671,3556,3747,4004,6672, // 5408
|
||||
6673,6674,3145,4005,3748,6675,6676,6677,6678,6679,3408,6680,6681,6682,6683,3206, // 5424
|
||||
3207,6684,6685,4781,4427,6686,4782,4783,4784,6687,6688,6689,4190,6690,6691,3479, // 5440
|
||||
6692,2746,6693,4428,6694,6695,6696,6697,6698,6699,4785,6700,6701,3208,2727,6702, // 5456
|
||||
3146,6703,6704,3409,2196,6705,4429,6706,6707,6708,2534,1996,6709,6710,6711,2747, // 5472
|
||||
6712,6713,6714,4786,3643,6715,4430,4431,6716,3557,6717,4432,4433,6718,6719,6720, // 5488
|
||||
6721,3749,6722,4006,4787,6723,6724,3644,4788,4434,6725,6726,4789,2772,6727,6728, // 5504
|
||||
6729,6730,6731,2708,3865,2813,4435,6732,6733,4790,4791,3480,6734,6735,6736,6737, // 5520
|
||||
4436,3348,6738,3410,4007,6739,6740,4008,6741,6742,4792,3411,4191,6743,6744,6745, // 5536
|
||||
6746,6747,3866,6748,3750,6749,6750,6751,6752,6753,6754,6755,3867,6756,4009,6757, // 5552
|
||||
4793,4794,6758,2814,2987,6759,6760,6761,4437,6762,6763,6764,6765,3645,6766,6767, // 5568
|
||||
3481,4192,6768,3751,6769,6770,2174,6771,3868,3752,6772,6773,6774,4193,4795,4438, // 5584
|
||||
3558,4796,4439,6775,4797,6776,6777,4798,6778,4799,3559,4800,6779,6780,6781,3482, // 5600
|
||||
6782,2893,6783,6784,4194,4801,4010,6785,6786,4440,6787,4011,6788,6789,6790,6791, // 5616
|
||||
6792,6793,4802,6794,6795,6796,4012,6797,6798,6799,6800,3349,4803,3483,6801,4804, // 5632
|
||||
4195,6802,4013,6803,6804,4196,6805,4014,4015,6806,2847,3271,2848,6807,3484,6808, // 5648
|
||||
6809,6810,4441,6811,4442,4197,4443,3272,4805,6812,3412,4016,1579,6813,6814,4017, // 5664
|
||||
6815,3869,6816,2964,6817,4806,6818,6819,4018,3646,6820,6821,4807,4019,4020,6822, // 5680
|
||||
6823,3560,6824,6825,4021,4444,6826,4198,6827,6828,4445,6829,6830,4199,4808,6831, // 5696
|
||||
6832,6833,3870,3019,2458,6834,3753,3413,3350,6835,4809,3871,4810,3561,4446,6836, // 5712
|
||||
6837,4447,4811,4812,6838,2459,4448,6839,4449,6840,6841,4022,3872,6842,4813,4814, // 5728
|
||||
6843,6844,4815,4200,4201,4202,6845,4023,6846,6847,4450,3562,3873,6848,6849,4816, // 5744
|
||||
4817,6850,4451,4818,2139,6851,3563,6852,6853,3351,6854,6855,3352,4024,2709,3414, // 5760
|
||||
4203,4452,6856,4204,6857,6858,3874,3875,6859,6860,4819,6861,6862,6863,6864,4453, // 5776
|
||||
3647,6865,6866,4820,6867,6868,6869,6870,4454,6871,2869,6872,6873,4821,6874,3754, // 5792
|
||||
6875,4822,4205,6876,6877,6878,3648,4206,4455,6879,4823,6880,4824,3876,6881,3055, // 5808
|
||||
4207,6882,3415,6883,6884,6885,4208,4209,6886,4210,3353,6887,3354,3564,3209,3485, // 5824
|
||||
2652,6888,2728,6889,3210,3755,6890,4025,4456,6891,4825,6892,6893,6894,6895,4211, // 5840
|
||||
6896,6897,6898,4826,6899,6900,4212,6901,4827,6902,2773,3565,6903,4828,6904,6905, // 5856
|
||||
6906,6907,3649,3650,6908,2849,3566,6909,3567,3100,6910,6911,6912,6913,6914,6915, // 5872
|
||||
4026,6916,3355,4829,3056,4457,3756,6917,3651,6918,4213,3652,2870,6919,4458,6920, // 5888
|
||||
2438,6921,6922,3757,2774,4830,6923,3356,4831,4832,6924,4833,4459,3653,2507,6925, // 5904
|
||||
4834,2535,6926,6927,3273,4027,3147,6928,3568,6929,6930,6931,4460,6932,3877,4461, // 5920
|
||||
2729,3654,6933,6934,6935,6936,2175,4835,2630,4214,4028,4462,4836,4215,6937,3148, // 5936
|
||||
4216,4463,4837,4838,4217,6938,6939,2850,4839,6940,4464,6941,6942,6943,4840,6944, // 5952
|
||||
4218,3274,4465,6945,6946,2710,6947,4841,4466,6948,6949,2894,6950,6951,4842,6952, // 5968
|
||||
4219,3057,2871,6953,6954,6955,6956,4467,6957,2711,6958,6959,6960,3275,3101,4843, // 5984
|
||||
6961,3357,3569,6962,4844,6963,6964,4468,4845,3570,6965,3102,4846,3758,6966,4847, // 6000
|
||||
3878,4848,4849,4029,6967,2929,3879,4850,4851,6968,6969,1733,6970,4220,6971,6972, // 6016
|
||||
6973,6974,6975,6976,4852,6977,6978,6979,6980,6981,6982,3759,6983,6984,6985,3486, // 6032
|
||||
3487,6986,3488,3416,6987,6988,6989,6990,6991,6992,6993,6994,6995,6996,6997,4853, // 6048
|
||||
6998,6999,4030,7000,7001,3211,7002,7003,4221,7004,7005,3571,4031,7006,3572,7007, // 6064
|
||||
2614,4854,2577,7008,7009,2965,3655,3656,4855,2775,3489,3880,4222,4856,3881,4032, // 6080
|
||||
3882,3657,2730,3490,4857,7010,3149,7011,4469,4858,2496,3491,4859,2283,7012,7013, // 6096
|
||||
7014,2365,4860,4470,7015,7016,3760,7017,7018,4223,1917,7019,7020,7021,4471,7022, // 6112
|
||||
2776,4472,7023,7024,7025,7026,4033,7027,3573,4224,4861,4034,4862,7028,7029,1929, // 6128
|
||||
3883,4035,7030,4473,3058,7031,2536,3761,3884,7032,4036,7033,2966,2895,1968,4474, // 6144
|
||||
3276,4225,3417,3492,4226,2105,7034,7035,1754,2596,3762,4227,4863,4475,3763,4864, // 6160
|
||||
3764,2615,2777,3103,3765,3658,3418,4865,2296,3766,2815,7036,7037,7038,3574,2872, // 6176
|
||||
3277,4476,7039,4037,4477,7040,7041,4038,7042,7043,7044,7045,7046,7047,2537,7048, // 6192
|
||||
7049,7050,7051,7052,7053,7054,4478,7055,7056,3767,3659,4228,3575,7057,7058,4229, // 6208
|
||||
7059,7060,7061,3660,7062,3212,7063,3885,4039,2460,7064,7065,7066,7067,7068,7069, // 6224
|
||||
7070,7071,7072,7073,7074,4866,3768,4867,7075,7076,7077,7078,4868,3358,3278,2653, // 6240
|
||||
7079,7080,4479,3886,7081,7082,4869,7083,7084,7085,7086,7087,7088,2538,7089,7090, // 6256
|
||||
7091,4040,3150,3769,4870,4041,2896,3359,4230,2930,7092,3279,7093,2967,4480,3213, // 6272
|
||||
4481,3661,7094,7095,7096,7097,7098,7099,7100,7101,7102,2461,3770,7103,7104,4231, // 6288
|
||||
3151,7105,7106,7107,4042,3662,7108,7109,4871,3663,4872,4043,3059,7110,7111,7112, // 6304
|
||||
3493,2988,7113,4873,7114,7115,7116,3771,4874,7117,7118,4232,4875,7119,3576,2336, // 6320
|
||||
4876,7120,4233,3419,4044,4877,4878,4482,4483,4879,4484,4234,7121,3772,4880,1045, // 6336
|
||||
3280,3664,4881,4882,7122,7123,7124,7125,4883,7126,2778,7127,4485,4486,7128,4884, // 6352
|
||||
3214,3887,7129,7130,3215,7131,4885,4045,7132,7133,4046,7134,7135,7136,7137,7138, // 6368
|
||||
7139,7140,7141,7142,7143,4235,7144,4886,7145,7146,7147,4887,7148,7149,7150,4487, // 6384
|
||||
4047,4488,7151,7152,4888,4048,2989,3888,7153,3665,7154,4049,7155,7156,7157,7158, // 6400
|
||||
7159,7160,2931,4889,4890,4489,7161,2631,3889,4236,2779,7162,7163,4891,7164,3060, // 6416
|
||||
7165,1672,4892,7166,4893,4237,3281,4894,7167,7168,3666,7169,3494,7170,7171,4050, // 6432
|
||||
7172,7173,3104,3360,3420,4490,4051,2684,4052,7174,4053,7175,7176,7177,2253,4054, // 6448
|
||||
7178,7179,4895,7180,3152,3890,3153,4491,3216,7181,7182,7183,2968,4238,4492,4055, // 6464
|
||||
7184,2990,7185,2479,7186,7187,4493,7188,7189,7190,7191,7192,4896,7193,4897,2969, // 6480
|
||||
4494,4898,7194,3495,7195,7196,4899,4495,7197,3105,2731,7198,4900,7199,7200,7201, // 6496
|
||||
4056,7202,3361,7203,7204,4496,4901,4902,7205,4497,7206,7207,2315,4903,7208,4904, // 6512
|
||||
7209,4905,2851,7210,7211,3577,7212,3578,4906,7213,4057,3667,4907,7214,4058,2354, // 6528
|
||||
3891,2376,3217,3773,7215,7216,7217,7218,7219,4498,7220,4908,3282,2685,7221,3496, // 6544
|
||||
4909,2632,3154,4910,7222,2337,7223,4911,7224,7225,7226,4912,4913,3283,4239,4499, // 6560
|
||||
7227,2816,7228,7229,7230,7231,7232,7233,7234,4914,4500,4501,7235,7236,7237,2686, // 6576
|
||||
7238,4915,7239,2897,4502,7240,4503,7241,2516,7242,4504,3362,3218,7243,7244,7245, // 6592
|
||||
4916,7246,7247,4505,3363,7248,7249,7250,7251,3774,4506,7252,7253,4917,7254,7255, // 6608
|
||||
3284,2991,4918,4919,3219,3892,4920,3106,3497,4921,7256,7257,7258,4922,7259,4923, // 6624
|
||||
3364,4507,4508,4059,7260,4240,3498,7261,7262,4924,7263,2992,3893,4060,3220,7264, // 6640
|
||||
7265,7266,7267,7268,7269,4509,3775,7270,2817,7271,4061,4925,4510,3776,7272,4241, // 6656
|
||||
4511,3285,7273,7274,3499,7275,7276,7277,4062,4512,4926,7278,3107,3894,7279,7280, // 6672
|
||||
4927,7281,4513,7282,7283,3668,7284,7285,4242,4514,4243,7286,2058,4515,4928,4929, // 6688
|
||||
4516,7287,3286,4244,7288,4517,7289,7290,7291,3669,7292,7293,4930,4931,4932,2355, // 6704
|
||||
4933,7294,2633,4518,7295,4245,7296,7297,4519,7298,7299,4520,4521,4934,7300,4246, // 6720
|
||||
4522,7301,7302,7303,3579,7304,4247,4935,7305,4936,7306,7307,7308,7309,3777,7310, // 6736
|
||||
4523,7311,7312,7313,4248,3580,7314,4524,3778,4249,7315,3581,7316,3287,7317,3221, // 6752
|
||||
7318,4937,7319,7320,7321,7322,7323,7324,4938,4939,7325,4525,7326,7327,7328,4063, // 6768
|
||||
7329,7330,4940,7331,7332,4941,7333,4526,7334,3500,2780,1741,4942,2026,1742,7335, // 6784
|
||||
7336,3582,4527,2388,7337,7338,7339,4528,7340,4250,4943,7341,7342,7343,4944,7344, // 6800
|
||||
7345,7346,3020,7347,4945,7348,7349,7350,7351,3895,7352,3896,4064,3897,7353,7354, // 6816
|
||||
7355,4251,7356,7357,3898,7358,3779,7359,3780,3288,7360,7361,4529,7362,4946,4530, // 6832
|
||||
2027,7363,3899,4531,4947,3222,3583,7364,4948,7365,7366,7367,7368,4949,3501,4950, // 6848
|
||||
3781,4951,4532,7369,2517,4952,4252,4953,3155,7370,4954,4955,4253,2518,4533,7371, // 6864
|
||||
7372,2712,4254,7373,7374,7375,3670,4956,3671,7376,2389,3502,4065,7377,2338,7378, // 6880
|
||||
7379,7380,7381,3061,7382,4957,7383,7384,7385,7386,4958,4534,7387,7388,2993,7389, // 6896
|
||||
3062,7390,4959,7391,7392,7393,4960,3108,4961,7394,4535,7395,4962,3421,4536,7396, // 6912
|
||||
4963,7397,4964,1857,7398,4965,7399,7400,2176,3584,4966,7401,7402,3422,4537,3900, // 6928
|
||||
3585,7403,3782,7404,2852,7405,7406,7407,4538,3783,2654,3423,4967,4539,7408,3784, // 6944
|
||||
3586,2853,4540,4541,7409,3901,7410,3902,7411,7412,3785,3109,2327,3903,7413,7414, // 6960
|
||||
2970,4066,2932,7415,7416,7417,3904,3672,3424,7418,4542,4543,4544,7419,4968,7420, // 6976
|
||||
7421,4255,7422,7423,7424,7425,7426,4067,7427,3673,3365,4545,7428,3110,2559,3674, // 6992
|
||||
7429,7430,3156,7431,7432,3503,7433,3425,4546,7434,3063,2873,7435,3223,4969,4547, // 7008
|
||||
4548,2898,4256,4068,7436,4069,3587,3786,2933,3787,4257,4970,4971,3788,7437,4972, // 7024
|
||||
3064,7438,4549,7439,7440,7441,7442,7443,4973,3905,7444,2874,7445,7446,7447,7448, // 7040
|
||||
3021,7449,4550,3906,3588,4974,7450,7451,3789,3675,7452,2578,7453,4070,7454,7455, // 7056
|
||||
7456,4258,3676,7457,4975,7458,4976,4259,3790,3504,2634,4977,3677,4551,4260,7459, // 7072
|
||||
7460,7461,7462,3907,4261,4978,7463,7464,7465,7466,4979,4980,7467,7468,2213,4262, // 7088
|
||||
7469,7470,7471,3678,4981,7472,2439,7473,4263,3224,3289,7474,3908,2415,4982,7475, // 7104
|
||||
4264,7476,4983,2655,7477,7478,2732,4552,2854,2875,7479,7480,4265,7481,4553,4984, // 7120
|
||||
7482,7483,4266,7484,3679,3366,3680,2818,2781,2782,3367,3589,4554,3065,7485,4071, // 7136
|
||||
2899,7486,7487,3157,2462,4072,4555,4073,4985,4986,3111,4267,2687,3368,4556,4074, // 7152
|
||||
3791,4268,7488,3909,2783,7489,2656,1962,3158,4557,4987,1963,3159,3160,7490,3112, // 7168
|
||||
4988,4989,3022,4990,4991,3792,2855,7491,7492,2971,4558,7493,7494,4992,7495,7496, // 7184
|
||||
7497,7498,4993,7499,3426,4559,4994,7500,3681,4560,4269,4270,3910,7501,4075,4995, // 7200
|
||||
4271,7502,7503,4076,7504,4996,7505,3225,4997,4272,4077,2819,3023,7506,7507,2733, // 7216
|
||||
4561,7508,4562,7509,3369,3793,7510,3590,2508,7511,7512,4273,3113,2994,2616,7513, // 7232
|
||||
7514,7515,7516,7517,7518,2820,3911,4078,2748,7519,7520,4563,4998,7521,7522,7523, // 7248
|
||||
7524,4999,4274,7525,4564,3682,2239,4079,4565,7526,7527,7528,7529,5000,7530,7531, // 7264
|
||||
5001,4275,3794,7532,7533,7534,3066,5002,4566,3161,7535,7536,4080,7537,3162,7538, // 7280
|
||||
7539,4567,7540,7541,7542,7543,7544,7545,5003,7546,4568,7547,7548,7549,7550,7551, // 7296
|
||||
7552,7553,7554,7555,7556,5004,7557,7558,7559,5005,7560,3795,7561,4569,7562,7563, // 7312
|
||||
7564,2821,3796,4276,4277,4081,7565,2876,7566,5006,7567,7568,2900,7569,3797,3912, // 7328
|
||||
7570,7571,7572,4278,7573,7574,7575,5007,7576,7577,5008,7578,7579,4279,2934,7580, // 7344
|
||||
7581,5009,7582,4570,7583,4280,7584,7585,7586,4571,4572,3913,7587,4573,3505,7588, // 7360
|
||||
5010,7589,7590,7591,7592,3798,4574,7593,7594,5011,7595,4281,7596,7597,7598,4282, // 7376
|
||||
5012,7599,7600,5013,3163,7601,5014,7602,3914,7603,7604,2734,4575,4576,4577,7605, // 7392
|
||||
7606,7607,7608,7609,3506,5015,4578,7610,4082,7611,2822,2901,2579,3683,3024,4579, // 7408
|
||||
3507,7612,4580,7613,3226,3799,5016,7614,7615,7616,7617,7618,7619,7620,2995,3290, // 7424
|
||||
7621,4083,7622,5017,7623,7624,7625,7626,7627,4581,3915,7628,3291,7629,5018,7630, // 7440
|
||||
7631,7632,7633,4084,7634,7635,3427,3800,7636,7637,4582,7638,5019,4583,5020,7639, // 7456
|
||||
3916,7640,3801,5021,4584,4283,7641,7642,3428,3591,2269,7643,2617,7644,4585,3592, // 7472
|
||||
7645,4586,2902,7646,7647,3227,5022,7648,4587,7649,4284,7650,7651,7652,4588,2284, // 7488
|
||||
7653,5023,7654,7655,7656,4589,5024,3802,7657,7658,5025,3508,4590,7659,7660,7661, // 7504
|
||||
1969,5026,7662,7663,3684,1821,2688,7664,2028,2509,4285,7665,2823,1841,7666,2689, // 7520
|
||||
3114,7667,3917,4085,2160,5027,5028,2972,7668,5029,7669,7670,7671,3593,4086,7672, // 7536
|
||||
4591,4087,5030,3803,7673,7674,7675,7676,7677,7678,7679,4286,2366,4592,4593,3067, // 7552
|
||||
2328,7680,7681,4594,3594,3918,2029,4287,7682,5031,3919,3370,4288,4595,2856,7683, // 7568
|
||||
3509,7684,7685,5032,5033,7686,7687,3804,2784,7688,7689,7690,7691,3371,7692,7693, // 7584
|
||||
2877,5034,7694,7695,3920,4289,4088,7696,7697,7698,5035,7699,5036,4290,5037,5038, // 7600
|
||||
5039,7700,7701,7702,5040,5041,3228,7703,1760,7704,5042,3229,4596,2106,4089,7705, // 7616
|
||||
4597,2824,5043,2107,3372,7706,4291,4090,5044,7707,4091,7708,5045,3025,3805,4598, // 7632
|
||||
4292,4293,4294,3373,7709,4599,7710,5046,7711,7712,5047,5048,3806,7713,7714,7715, // 7648
|
||||
5049,7716,7717,7718,7719,4600,5050,7720,7721,7722,5051,7723,4295,3429,7724,7725, // 7664
|
||||
7726,7727,3921,7728,3292,5052,4092,7729,7730,7731,7732,7733,7734,7735,5053,5054, // 7680
|
||||
7736,7737,7738,7739,3922,3685,7740,7741,7742,7743,2635,5055,7744,5056,4601,7745, // 7696
|
||||
7746,2560,7747,7748,7749,7750,3923,7751,7752,7753,7754,7755,4296,2903,7756,7757, // 7712
|
||||
7758,7759,7760,3924,7761,5057,4297,7762,7763,5058,4298,7764,4093,7765,7766,5059, // 7728
|
||||
3925,7767,7768,7769,7770,7771,7772,7773,7774,7775,7776,3595,7777,4299,5060,4094, // 7744
|
||||
7778,3293,5061,7779,7780,4300,7781,7782,4602,7783,3596,7784,7785,3430,2367,7786, // 7760
|
||||
3164,5062,5063,4301,7787,7788,4095,5064,5065,7789,3374,3115,7790,7791,7792,7793, // 7776
|
||||
7794,7795,7796,3597,4603,7797,7798,3686,3116,3807,5066,7799,7800,5067,7801,7802, // 7792
|
||||
4604,4302,5068,4303,4096,7803,7804,3294,7805,7806,5069,4605,2690,7807,3026,7808, // 7808
|
||||
7809,7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,7824, // 7824
|
||||
7825,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,7840, // 7840
|
||||
7841,7842,7843,7844,7845,7846,7847,7848,7849,7850,7851,7852,7853,7854,7855,7856, // 7856
|
||||
7857,7858,7859,7860,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870,7871,7872, // 7872
|
||||
7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886,7887,7888, // 7888
|
||||
7889,7890,7891,7892,7893,7894,7895,7896,7897,7898,7899,7900,7901,7902,7903,7904, // 7904
|
||||
7905,7906,7907,7908,7909,7910,7911,7912,7913,7914,7915,7916,7917,7918,7919,7920, // 7920
|
||||
7921,7922,7923,7924,3926,7925,7926,7927,7928,7929,7930,7931,7932,7933,7934,7935, // 7936
|
||||
7936,7937,7938,7939,7940,7941,7942,7943,7944,7945,7946,7947,7948,7949,7950,7951, // 7952
|
||||
7952,7953,7954,7955,7956,7957,7958,7959,7960,7961,7962,7963,7964,7965,7966,7967, // 7968
|
||||
7968,7969,7970,7971,7972,7973,7974,7975,7976,7977,7978,7979,7980,7981,7982,7983, // 7984
|
||||
7984,7985,7986,7987,7988,7989,7990,7991,7992,7993,7994,7995,7996,7997,7998,7999, // 8000
|
||||
8000,8001,8002,8003,8004,8005,8006,8007,8008,8009,8010,8011,8012,8013,8014,8015, // 8016
|
||||
8016,8017,8018,8019,8020,8021,8022,8023,8024,8025,8026,8027,8028,8029,8030,8031, // 8032
|
||||
8032,8033,8034,8035,8036,8037,8038,8039,8040,8041,8042,8043,8044,8045,8046,8047, // 8048
|
||||
8048,8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063, // 8064
|
||||
8064,8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079, // 8080
|
||||
8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095, // 8096
|
||||
8096,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110,8111, // 8112
|
||||
8112,8113,8114,8115,8116,8117,8118,8119,8120,8121,8122,8123,8124,8125,8126,8127, // 8128
|
||||
8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141,8142,8143, // 8144
|
||||
8144,8145,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155,8156,8157,8158,8159, // 8160
|
||||
8160,8161,8162,8163,8164,8165,8166,8167,8168,8169,8170,8171,8172,8173,8174,8175, // 8176
|
||||
8176,8177,8178,8179,8180,8181,8182,8183,8184,8185,8186,8187,8188,8189,8190,8191, // 8192
|
||||
8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207, // 8208
|
||||
8208,8209,8210,8211,8212,8213,8214,8215,8216,8217,8218,8219,8220,8221,8222,8223, // 8224
|
||||
8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239, // 8240
|
||||
8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, // 8256
|
||||
8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271, // 8272
|
||||
****************************************************************************************/
|
||||
|
||||
};
|
||||
|
|
@ -1,600 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nscore.h"
|
||||
#include "JpCntx.h"
|
||||
|
||||
// This is hiragana 2-char sequence table, the number in each cell represents
|
||||
// its frequency category
|
||||
const uint8_t jp2CharContext[83][83] = {
|
||||
{
|
||||
0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
|
||||
},
|
||||
{
|
||||
2, 4, 0, 4, 0, 3, 0, 4, 0, 3, 4, 4, 4, 2, 4, 3, 3, 4, 3, 2, 3,
|
||||
3, 4, 2, 3, 3, 3, 2, 4, 1, 4, 3, 3, 1, 5, 4, 3, 4, 3, 4, 3, 5,
|
||||
3, 0, 3, 5, 4, 2, 0, 3, 1, 0, 3, 3, 0, 3, 3, 0, 1, 1, 0, 4, 3,
|
||||
0, 3, 3, 0, 4, 0, 2, 0, 3, 5, 5, 5, 5, 4, 0, 4, 1, 0, 3, 4,
|
||||
},
|
||||
{
|
||||
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 5, 0, 5, 0, 4, 0, 4, 5, 4, 4, 3, 5, 3, 5, 1, 5, 3, 4,
|
||||
3, 4, 4, 3, 4, 3, 3, 4, 3, 5, 4, 4, 3, 5, 5, 3, 5, 5, 5, 3, 5,
|
||||
5, 3, 4, 5, 5, 3, 1, 3, 2, 0, 3, 4, 0, 4, 2, 0, 4, 2, 1, 5, 3,
|
||||
2, 3, 5, 0, 4, 0, 2, 0, 5, 4, 4, 5, 4, 5, 0, 4, 0, 0, 4, 4,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 4, 0, 3, 0, 3, 0, 4, 5, 4, 3, 3, 3, 3, 4, 3, 5, 4, 4,
|
||||
3, 5, 4, 4, 3, 4, 3, 4, 4, 4, 4, 5, 3, 4, 4, 3, 4, 5, 5, 4, 5,
|
||||
5, 1, 4, 5, 4, 3, 0, 3, 3, 1, 3, 3, 0, 4, 4, 0, 3, 3, 1, 5, 3,
|
||||
3, 3, 5, 0, 4, 0, 3, 0, 4, 4, 3, 4, 3, 3, 0, 4, 1, 1, 3, 4,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 3, 0, 3, 0, 4, 0, 3, 4, 4, 3, 2, 2, 1, 2, 1, 3, 1, 3,
|
||||
3, 3, 3, 3, 4, 3, 1, 3, 3, 5, 3, 3, 0, 4, 3, 0, 5, 4, 3, 3, 5,
|
||||
4, 4, 3, 4, 4, 5, 0, 1, 2, 0, 1, 2, 0, 2, 2, 0, 1, 0, 0, 5, 2,
|
||||
2, 1, 4, 0, 3, 0, 1, 0, 4, 4, 3, 5, 4, 3, 0, 2, 1, 0, 4, 3,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 5, 0, 4, 0, 2, 1, 4, 4, 2, 4, 1, 4, 2, 4, 2, 4, 3, 3,
|
||||
3, 4, 3, 3, 3, 3, 1, 4, 2, 3, 3, 3, 1, 4, 4, 1, 1, 1, 4, 3, 3,
|
||||
2, 0, 2, 4, 3, 2, 0, 3, 3, 0, 3, 1, 1, 0, 0, 0, 3, 3, 0, 4, 2,
|
||||
2, 3, 4, 0, 4, 0, 3, 0, 4, 4, 5, 3, 4, 4, 0, 3, 0, 0, 1, 4,
|
||||
},
|
||||
{
|
||||
1, 4, 0, 4, 0, 4, 0, 4, 0, 3, 5, 4, 4, 3, 4, 3, 5, 4, 3, 3, 4,
|
||||
3, 5, 4, 4, 4, 4, 3, 4, 2, 4, 3, 3, 1, 5, 4, 3, 2, 4, 5, 4, 5,
|
||||
5, 4, 4, 5, 4, 4, 0, 3, 2, 2, 3, 3, 0, 4, 3, 1, 3, 2, 1, 4, 3,
|
||||
3, 4, 5, 0, 3, 0, 2, 0, 4, 5, 5, 4, 5, 4, 0, 4, 0, 0, 5, 4,
|
||||
},
|
||||
{
|
||||
0, 5, 0, 5, 0, 4, 0, 3, 0, 4, 4, 3, 4, 3, 3, 3, 4, 0, 4, 4, 4,
|
||||
3, 4, 3, 4, 3, 3, 1, 4, 2, 4, 3, 4, 0, 5, 4, 1, 4, 5, 4, 4, 5,
|
||||
3, 2, 4, 3, 4, 3, 2, 4, 1, 3, 3, 3, 2, 3, 2, 0, 4, 3, 3, 4, 3,
|
||||
3, 3, 4, 0, 4, 0, 3, 0, 4, 5, 4, 4, 4, 3, 0, 4, 1, 0, 1, 3,
|
||||
},
|
||||
{
|
||||
0, 3, 1, 4, 0, 3, 0, 2, 0, 3, 4, 4, 3, 1, 4, 2, 3, 3, 4, 3, 4,
|
||||
3, 4, 3, 4, 4, 3, 2, 3, 1, 5, 4, 4, 1, 4, 4, 3, 5, 4, 4, 3, 5,
|
||||
5, 4, 3, 4, 4, 3, 1, 2, 3, 1, 2, 2, 0, 3, 2, 0, 3, 1, 0, 5, 3,
|
||||
3, 3, 4, 3, 3, 3, 3, 4, 4, 4, 4, 5, 4, 2, 0, 3, 3, 2, 4, 3,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 3, 0, 1, 0, 1, 0, 0, 3, 2, 0, 0, 2, 0, 1, 0, 2, 1, 3,
|
||||
3, 3, 1, 2, 3, 1, 0, 1, 0, 4, 2, 1, 1, 3, 3, 0, 4, 3, 3, 1, 4,
|
||||
3, 3, 0, 3, 3, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 4, 1,
|
||||
0, 2, 3, 2, 2, 2, 1, 3, 3, 3, 4, 4, 3, 2, 0, 3, 1, 0, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 4, 0, 3, 0, 3, 0, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 3, 4,
|
||||
2, 4, 3, 4, 3, 3, 2, 4, 3, 4, 5, 4, 1, 4, 5, 3, 5, 4, 5, 3, 5,
|
||||
4, 0, 3, 5, 5, 3, 1, 3, 3, 2, 2, 3, 0, 3, 4, 1, 3, 3, 2, 4, 3,
|
||||
3, 3, 4, 0, 4, 0, 3, 0, 4, 5, 4, 4, 5, 3, 0, 4, 1, 0, 3, 4,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 3, 0, 3, 0, 0, 0, 2, 2, 2, 1, 0, 1, 0, 0, 0, 3, 0, 3,
|
||||
0, 3, 0, 1, 3, 1, 0, 3, 1, 3, 3, 3, 1, 3, 3, 3, 0, 1, 3, 1, 3,
|
||||
4, 0, 0, 3, 1, 1, 0, 3, 2, 0, 0, 0, 0, 1, 3, 0, 1, 0, 0, 3, 3,
|
||||
2, 0, 3, 0, 0, 0, 0, 0, 3, 4, 3, 4, 3, 3, 0, 3, 0, 0, 2, 3,
|
||||
},
|
||||
{
|
||||
2, 3, 0, 3, 0, 2, 0, 1, 0, 3, 3, 4, 3, 1, 3, 1, 1, 1, 3, 1, 4,
|
||||
3, 4, 3, 3, 3, 0, 0, 3, 1, 5, 4, 3, 1, 4, 3, 2, 5, 5, 4, 4, 4,
|
||||
4, 3, 3, 4, 4, 4, 0, 2, 1, 1, 3, 2, 0, 1, 2, 0, 0, 1, 0, 4, 1,
|
||||
3, 3, 3, 0, 3, 0, 1, 0, 4, 4, 4, 5, 5, 3, 0, 2, 0, 0, 4, 4,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 1, 0, 3, 1, 3, 0, 2, 3, 3, 3, 0, 3, 1, 0, 0, 3, 0, 3,
|
||||
2, 3, 1, 3, 2, 1, 1, 0, 0, 4, 2, 1, 0, 2, 3, 1, 4, 3, 2, 0, 4,
|
||||
4, 3, 1, 3, 1, 3, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 4, 1,
|
||||
1, 1, 2, 0, 3, 0, 0, 0, 3, 4, 2, 4, 3, 2, 0, 1, 0, 0, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 1, 0, 4, 0, 5, 0, 4, 0, 2, 4, 4, 2, 3, 3, 2, 3, 3, 5, 3, 3,
|
||||
3, 4, 3, 4, 2, 3, 0, 4, 3, 3, 3, 4, 1, 4, 3, 2, 1, 5, 5, 3, 4,
|
||||
5, 1, 3, 5, 4, 2, 0, 3, 3, 0, 1, 3, 0, 4, 2, 0, 1, 3, 1, 4, 3,
|
||||
3, 3, 3, 0, 3, 0, 1, 0, 3, 4, 4, 4, 5, 5, 0, 3, 0, 1, 4, 5,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 3, 0, 3, 0, 0, 0, 2, 3, 1, 3, 0, 4, 0, 1, 1, 3, 0, 3,
|
||||
4, 3, 2, 3, 1, 0, 3, 3, 2, 3, 1, 3, 0, 2, 3, 0, 2, 1, 4, 1, 2,
|
||||
2, 0, 0, 3, 3, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 2, 2, 0, 3, 2,
|
||||
1, 3, 3, 0, 2, 0, 2, 0, 0, 3, 3, 1, 2, 4, 0, 3, 0, 2, 2, 3,
|
||||
},
|
||||
{
|
||||
2, 4, 0, 5, 0, 4, 0, 4, 0, 2, 4, 4, 4, 3, 4, 3, 3, 3, 1, 2, 4,
|
||||
3, 4, 3, 4, 4, 5, 0, 3, 3, 3, 3, 2, 0, 4, 3, 1, 4, 3, 4, 1, 4,
|
||||
4, 3, 3, 4, 4, 3, 1, 2, 3, 0, 4, 2, 0, 4, 1, 0, 3, 3, 0, 4, 3,
|
||||
3, 3, 4, 0, 4, 0, 2, 0, 3, 5, 3, 4, 5, 2, 0, 3, 0, 0, 4, 5,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 4, 0, 1, 0, 1, 0, 1, 3, 2, 2, 1, 3, 0, 3, 0, 2, 0, 2,
|
||||
0, 3, 0, 2, 0, 0, 0, 1, 0, 1, 1, 0, 0, 3, 1, 0, 0, 0, 4, 0, 3,
|
||||
1, 0, 2, 1, 3, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 2,
|
||||
2, 3, 1, 0, 3, 0, 0, 0, 1, 4, 4, 4, 3, 0, 0, 4, 0, 0, 1, 4,
|
||||
},
|
||||
{
|
||||
1, 4, 1, 5, 0, 3, 0, 3, 0, 4, 5, 4, 4, 3, 5, 3, 3, 4, 4, 3, 4,
|
||||
1, 3, 3, 3, 3, 2, 1, 4, 1, 5, 4, 3, 1, 4, 4, 3, 5, 4, 4, 3, 5,
|
||||
4, 3, 3, 4, 4, 4, 0, 3, 3, 1, 2, 3, 0, 3, 1, 0, 3, 3, 0, 5, 4,
|
||||
4, 4, 4, 4, 4, 3, 3, 5, 4, 4, 3, 3, 5, 4, 0, 3, 2, 0, 4, 4,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 3, 0, 1, 0, 0, 0, 1, 3, 3, 3, 2, 4, 1, 3, 0, 3, 1, 3,
|
||||
0, 2, 2, 1, 1, 0, 0, 2, 0, 4, 3, 1, 0, 4, 3, 0, 4, 4, 4, 1, 4,
|
||||
3, 1, 1, 3, 3, 1, 0, 2, 0, 0, 1, 3, 0, 0, 0, 0, 2, 0, 0, 4, 3,
|
||||
2, 4, 3, 5, 4, 3, 3, 3, 4, 3, 3, 4, 3, 3, 0, 2, 1, 0, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 4, 0, 3, 0, 2, 0, 2, 5, 5, 3, 4, 4, 4, 4, 1, 4, 3, 3,
|
||||
0, 4, 3, 4, 3, 1, 3, 3, 2, 4, 3, 0, 3, 4, 3, 0, 3, 4, 4, 2, 4,
|
||||
4, 0, 4, 5, 3, 3, 2, 2, 1, 1, 1, 2, 0, 1, 5, 0, 3, 3, 2, 4, 3,
|
||||
3, 3, 4, 0, 3, 0, 2, 0, 4, 4, 3, 5, 5, 0, 0, 3, 0, 2, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 4, 0, 3, 0, 1, 0, 3, 4, 3, 3, 1, 3, 3, 3, 0, 3, 1, 3,
|
||||
0, 4, 3, 3, 1, 1, 0, 3, 0, 3, 3, 0, 0, 4, 4, 0, 1, 5, 4, 3, 3,
|
||||
5, 0, 3, 3, 4, 3, 0, 2, 0, 1, 1, 1, 0, 1, 3, 0, 1, 2, 1, 3, 3,
|
||||
2, 3, 3, 0, 3, 0, 1, 0, 1, 3, 3, 4, 4, 1, 0, 1, 2, 2, 1, 3,
|
||||
},
|
||||
{
|
||||
0, 1, 0, 4, 0, 4, 0, 3, 0, 1, 3, 3, 3, 2, 3, 1, 1, 0, 3, 0, 3,
|
||||
3, 4, 3, 2, 4, 2, 0, 1, 0, 4, 3, 2, 0, 4, 3, 0, 5, 3, 3, 2, 4,
|
||||
4, 4, 3, 3, 3, 4, 0, 1, 3, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 4, 2,
|
||||
3, 3, 3, 0, 3, 0, 0, 0, 4, 4, 4, 5, 3, 2, 0, 3, 3, 0, 3, 5,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 3, 0, 0, 0, 3, 0, 1, 3, 0, 2, 0, 0, 0, 1, 0, 3, 1, 1,
|
||||
3, 3, 0, 0, 3, 0, 0, 3, 0, 2, 3, 1, 0, 3, 1, 0, 3, 3, 2, 0, 4,
|
||||
2, 2, 0, 2, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1,
|
||||
2, 0, 1, 0, 1, 0, 0, 0, 1, 3, 1, 2, 0, 0, 0, 1, 0, 0, 1, 4,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 3, 0, 5, 0, 1, 0, 2, 4, 3, 1, 3, 3, 2, 1, 1, 5, 2, 1,
|
||||
0, 5, 1, 2, 0, 0, 0, 3, 3, 2, 2, 3, 2, 4, 3, 0, 0, 3, 3, 1, 3,
|
||||
3, 0, 2, 5, 3, 4, 0, 3, 3, 0, 1, 2, 0, 2, 2, 0, 3, 2, 0, 2, 2,
|
||||
3, 3, 3, 0, 2, 0, 1, 0, 3, 4, 4, 2, 5, 4, 0, 3, 0, 0, 3, 5,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 3, 0, 3, 0, 1, 0, 3, 3, 3, 3, 0, 3, 0, 2, 0, 2, 1, 1,
|
||||
0, 2, 0, 1, 0, 0, 0, 2, 1, 0, 0, 1, 0, 3, 2, 0, 0, 3, 3, 1, 2,
|
||||
3, 1, 0, 3, 3, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 3,
|
||||
1, 2, 3, 0, 3, 0, 1, 0, 3, 2, 1, 0, 4, 3, 0, 1, 1, 0, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 5, 0, 3, 0, 3, 0, 4, 5, 5, 4, 3, 5, 3, 4, 3, 5, 3, 3,
|
||||
2, 5, 3, 4, 4, 4, 3, 4, 3, 4, 5, 5, 3, 4, 4, 3, 4, 4, 5, 4, 4,
|
||||
4, 3, 4, 5, 5, 4, 2, 3, 4, 2, 3, 4, 0, 3, 3, 1, 4, 3, 2, 4, 3,
|
||||
3, 5, 5, 0, 3, 0, 3, 0, 5, 5, 5, 5, 4, 4, 0, 4, 0, 1, 4, 4,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 4, 0, 3, 0, 3, 0, 3, 5, 4, 4, 2, 3, 2, 5, 1, 3, 2, 5,
|
||||
1, 4, 2, 3, 2, 3, 3, 4, 3, 3, 3, 3, 2, 5, 4, 1, 3, 3, 5, 3, 4,
|
||||
4, 0, 4, 4, 3, 1, 1, 3, 1, 0, 2, 3, 0, 2, 3, 0, 3, 0, 0, 4, 3,
|
||||
1, 3, 4, 0, 3, 0, 2, 0, 4, 4, 4, 3, 4, 5, 0, 4, 0, 0, 3, 4,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 3, 0, 3, 1, 2, 0, 3, 4, 4, 3, 3, 3, 0, 2, 2, 4, 3, 3,
|
||||
1, 3, 3, 3, 1, 1, 0, 3, 1, 4, 3, 2, 3, 4, 4, 2, 4, 4, 4, 3, 4,
|
||||
4, 3, 2, 4, 4, 3, 1, 3, 3, 1, 3, 3, 0, 4, 1, 0, 2, 2, 1, 4, 3,
|
||||
2, 3, 3, 5, 4, 3, 3, 5, 4, 4, 3, 3, 0, 4, 0, 3, 2, 2, 4, 4,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 1, 0, 0, 0, 0, 0, 1, 2, 1, 3, 0, 0, 0, 0, 0, 2, 0, 1,
|
||||
2, 1, 0, 0, 1, 0, 0, 0, 0, 3, 0, 0, 1, 0, 1, 1, 3, 1, 0, 0, 0,
|
||||
1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
|
||||
2, 2, 0, 3, 4, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1,
|
||||
},
|
||||
{
|
||||
0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 4, 1, 4, 0, 3, 0, 4, 0, 3,
|
||||
0, 4, 0, 3, 0, 3, 0, 4, 1, 5, 1, 4, 0, 0, 3, 0, 5, 0, 5, 2, 0,
|
||||
1, 0, 0, 0, 2, 1, 4, 0, 1, 3, 0, 0, 3, 0, 0, 3, 1, 1, 4, 1, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
},
|
||||
{
|
||||
1, 4, 0, 5, 0, 3, 0, 2, 0, 3, 5, 4, 4, 3, 4, 3, 5, 3, 4, 3, 3,
|
||||
0, 4, 3, 3, 3, 3, 3, 3, 2, 4, 4, 3, 1, 3, 4, 4, 5, 4, 4, 3, 4,
|
||||
4, 1, 3, 5, 4, 3, 3, 3, 1, 2, 2, 3, 3, 1, 3, 1, 3, 3, 3, 5, 3,
|
||||
3, 4, 5, 0, 3, 0, 3, 0, 3, 4, 3, 4, 4, 3, 0, 3, 0, 2, 4, 3,
|
||||
},
|
||||
{
|
||||
0, 1, 0, 4, 0, 0, 0, 0, 0, 1, 4, 0, 4, 1, 4, 2, 4, 0, 3, 0, 1,
|
||||
0, 1, 0, 0, 0, 0, 0, 2, 0, 3, 1, 1, 1, 0, 3, 0, 0, 0, 1, 2, 1,
|
||||
0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 0, 3, 2,
|
||||
0, 2, 2, 0, 1, 0, 0, 0, 2, 3, 2, 3, 3, 0, 0, 0, 0, 2, 1, 0,
|
||||
},
|
||||
{
|
||||
0, 5, 1, 5, 0, 3, 0, 3, 0, 5, 4, 4, 5, 1, 5, 3, 3, 0, 4, 3, 4,
|
||||
3, 5, 3, 4, 3, 3, 2, 4, 3, 4, 3, 3, 0, 3, 3, 1, 4, 4, 3, 4, 4,
|
||||
4, 3, 4, 5, 5, 3, 2, 3, 1, 1, 3, 3, 1, 3, 1, 1, 3, 3, 2, 4, 5,
|
||||
3, 3, 5, 0, 4, 0, 3, 0, 4, 4, 3, 5, 3, 3, 0, 3, 4, 0, 4, 3,
|
||||
},
|
||||
{
|
||||
0, 5, 0, 5, 0, 3, 0, 2, 0, 4, 4, 3, 5, 2, 4, 3, 3, 3, 4, 4, 4,
|
||||
3, 5, 3, 5, 3, 3, 1, 4, 0, 4, 3, 3, 0, 3, 3, 0, 4, 4, 4, 4, 5,
|
||||
4, 3, 3, 5, 5, 3, 2, 3, 1, 2, 3, 2, 0, 1, 0, 0, 3, 2, 2, 4, 4,
|
||||
3, 1, 5, 0, 4, 0, 3, 0, 4, 3, 1, 3, 2, 1, 0, 3, 3, 0, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 5, 0, 5, 0, 4, 0, 4, 5, 5, 5, 3, 4, 3, 3, 2, 5, 4, 4,
|
||||
3, 5, 3, 5, 3, 4, 0, 4, 3, 4, 4, 3, 2, 4, 4, 3, 4, 5, 4, 4, 5,
|
||||
5, 0, 3, 5, 5, 4, 1, 3, 3, 2, 3, 3, 1, 3, 1, 0, 4, 3, 1, 4, 4,
|
||||
3, 4, 5, 0, 4, 0, 2, 0, 4, 3, 4, 4, 3, 3, 0, 4, 0, 0, 5, 5,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 4, 0, 5, 0, 1, 1, 3, 3, 4, 4, 3, 4, 1, 3, 0, 5, 1, 3,
|
||||
0, 3, 1, 3, 1, 1, 0, 3, 0, 3, 3, 4, 0, 4, 3, 0, 4, 4, 4, 3, 4,
|
||||
4, 0, 3, 5, 4, 1, 0, 3, 0, 0, 2, 3, 0, 3, 1, 0, 3, 1, 0, 3, 2,
|
||||
1, 3, 5, 0, 3, 0, 1, 0, 3, 2, 3, 3, 4, 4, 0, 2, 2, 0, 4, 4,
|
||||
},
|
||||
{
|
||||
2, 4, 0, 5, 0, 4, 0, 3, 0, 4, 5, 5, 4, 3, 5, 3, 5, 3, 5, 3, 5,
|
||||
2, 5, 3, 4, 3, 3, 4, 3, 4, 5, 3, 2, 1, 5, 4, 3, 2, 3, 4, 5, 3,
|
||||
4, 1, 2, 5, 4, 3, 0, 3, 3, 0, 3, 2, 0, 2, 3, 0, 4, 1, 0, 3, 4,
|
||||
3, 3, 5, 0, 3, 0, 1, 0, 4, 5, 5, 5, 4, 3, 0, 4, 2, 0, 3, 5,
|
||||
},
|
||||
{
|
||||
0, 5, 0, 4, 0, 4, 0, 2, 0, 5, 4, 3, 4, 3, 4, 3, 3, 3, 4, 3, 4,
|
||||
2, 5, 3, 5, 3, 4, 1, 4, 3, 4, 4, 4, 0, 3, 5, 0, 4, 4, 4, 4, 5,
|
||||
3, 1, 3, 4, 5, 3, 3, 3, 3, 3, 3, 3, 0, 2, 2, 0, 3, 3, 2, 4, 3,
|
||||
3, 3, 5, 3, 4, 1, 3, 3, 5, 3, 2, 0, 0, 0, 0, 4, 3, 1, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 1, 0, 3, 0, 3, 0, 1, 0, 1, 3, 3, 3, 2, 3, 3, 3, 0, 3, 0, 0,
|
||||
0, 3, 1, 3, 0, 0, 0, 2, 2, 2, 3, 0, 0, 3, 2, 0, 1, 2, 4, 1, 3,
|
||||
3, 0, 0, 3, 3, 3, 0, 1, 0, 0, 2, 1, 0, 0, 3, 0, 3, 1, 0, 3, 0,
|
||||
0, 1, 3, 0, 2, 0, 1, 0, 3, 3, 1, 3, 3, 0, 0, 1, 1, 0, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 3, 0, 2, 1, 4, 0, 2, 2, 3, 1, 1, 3, 1, 1, 0, 2, 0, 3,
|
||||
1, 2, 3, 1, 3, 0, 0, 1, 0, 4, 3, 2, 3, 3, 3, 1, 4, 2, 3, 3, 3,
|
||||
3, 1, 0, 3, 1, 4, 0, 1, 1, 0, 1, 2, 0, 1, 1, 0, 1, 1, 0, 3, 1,
|
||||
3, 2, 2, 0, 1, 0, 0, 0, 2, 3, 3, 3, 1, 0, 0, 0, 0, 0, 2, 3,
|
||||
},
|
||||
{
|
||||
0, 5, 0, 4, 0, 5, 0, 2, 0, 4, 5, 5, 3, 3, 4, 3, 3, 1, 5, 4, 4,
|
||||
2, 4, 4, 4, 3, 4, 2, 4, 3, 5, 5, 4, 3, 3, 4, 3, 3, 5, 5, 4, 5,
|
||||
5, 1, 3, 4, 5, 3, 1, 4, 3, 1, 3, 3, 0, 3, 3, 1, 4, 3, 1, 4, 5,
|
||||
3, 3, 5, 0, 4, 0, 3, 0, 5, 3, 3, 1, 4, 3, 0, 4, 0, 1, 5, 3,
|
||||
},
|
||||
{
|
||||
0, 5, 0, 5, 0, 4, 0, 2, 0, 4, 4, 3, 4, 3, 3, 3, 3, 3, 5, 4, 4,
|
||||
4, 4, 4, 4, 5, 3, 3, 5, 2, 4, 4, 4, 3, 4, 4, 3, 3, 4, 4, 5, 5,
|
||||
3, 3, 4, 3, 4, 3, 3, 4, 3, 3, 3, 3, 1, 2, 2, 1, 4, 3, 3, 5, 4,
|
||||
4, 3, 4, 0, 4, 0, 3, 0, 4, 4, 4, 4, 4, 1, 0, 4, 2, 0, 2, 4,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 4, 0, 3, 0, 1, 0, 3, 5, 2, 3, 0, 3, 0, 2, 1, 4, 2, 3,
|
||||
3, 4, 1, 4, 3, 3, 2, 4, 1, 3, 3, 3, 0, 3, 3, 0, 0, 3, 3, 3, 5,
|
||||
3, 3, 3, 3, 3, 2, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 1, 0, 0, 3, 1,
|
||||
2, 2, 3, 0, 3, 0, 2, 0, 4, 4, 3, 3, 4, 1, 0, 3, 0, 0, 2, 4,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1,
|
||||
0, 2, 0, 1, 0, 0, 0, 0, 0, 3, 1, 3, 0, 3, 2, 0, 0, 0, 1, 0, 3,
|
||||
2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 0, 2, 0, 0, 0, 0, 0, 0, 2,
|
||||
},
|
||||
{
|
||||
0, 2, 1, 3, 0, 2, 0, 2, 0, 3, 3, 3, 3, 1, 3, 1, 3, 3, 3, 3, 3,
|
||||
3, 4, 2, 2, 1, 2, 1, 4, 0, 4, 3, 1, 3, 3, 3, 2, 4, 3, 5, 4, 3,
|
||||
3, 3, 3, 3, 3, 3, 0, 1, 3, 0, 2, 0, 0, 1, 0, 0, 1, 0, 0, 4, 2,
|
||||
0, 2, 3, 0, 3, 3, 0, 3, 3, 4, 2, 3, 1, 4, 0, 1, 2, 0, 2, 3,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 3, 0, 1, 0, 3, 0, 2, 3, 3, 3, 0, 3, 1, 2, 0, 3, 3, 2,
|
||||
3, 3, 2, 3, 2, 3, 1, 3, 0, 4, 3, 2, 0, 3, 3, 1, 4, 3, 3, 2, 3,
|
||||
4, 3, 1, 3, 3, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 4, 1,
|
||||
1, 0, 3, 0, 3, 1, 0, 2, 3, 3, 3, 3, 3, 1, 0, 0, 2, 0, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 2, 0, 3, 0, 0, 0, 0, 0, 0,
|
||||
0, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 3, 1, 0, 1, 0, 1, 0, 0,
|
||||
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 3, 0, 2, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 3,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 3, 1, 3, 0, 3, 0, 2, 3, 3, 3, 1, 3, 1, 3, 1, 3, 1, 3,
|
||||
3, 3, 1, 3, 0, 2, 3, 1, 1, 4, 3, 3, 2, 3, 3, 1, 2, 2, 4, 1, 3,
|
||||
3, 0, 1, 4, 2, 3, 0, 1, 3, 0, 3, 0, 0, 1, 3, 0, 2, 0, 0, 3, 3,
|
||||
2, 1, 3, 0, 3, 0, 2, 0, 3, 4, 4, 4, 3, 1, 0, 3, 0, 0, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 1, 0, 2, 0, 0, 0, 1, 3, 2, 2, 1, 3, 0, 1, 1, 3, 0, 3,
|
||||
2, 3, 1, 2, 0, 2, 0, 1, 1, 3, 3, 3, 0, 3, 3, 1, 1, 2, 3, 2, 3,
|
||||
3, 1, 2, 3, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 2, 1,
|
||||
2, 1, 3, 0, 3, 0, 0, 0, 3, 4, 4, 4, 3, 2, 0, 2, 0, 0, 2, 4,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 3,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 3, 0, 2, 0, 3, 0, 3, 3, 3, 2, 3, 2, 2, 2, 0, 3, 1, 3,
|
||||
3, 3, 2, 3, 3, 0, 0, 3, 0, 3, 2, 2, 0, 2, 3, 1, 4, 3, 4, 3, 3,
|
||||
2, 3, 1, 5, 4, 4, 0, 3, 1, 2, 1, 3, 0, 3, 1, 1, 2, 0, 2, 3, 1,
|
||||
3, 1, 3, 0, 3, 0, 1, 0, 3, 3, 4, 4, 2, 1, 0, 2, 1, 0, 2, 4,
|
||||
},
|
||||
{
|
||||
0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 4, 2, 5, 1, 4, 0, 2, 0, 2, 1, 3,
|
||||
1, 4, 0, 2, 1, 0, 0, 2, 1, 4, 1, 1, 0, 3, 3, 0, 5, 1, 3, 2, 3,
|
||||
3, 1, 0, 3, 2, 3, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0,
|
||||
1, 0, 3, 0, 2, 0, 1, 0, 3, 3, 3, 4, 3, 3, 0, 0, 0, 0, 2, 3,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 1, 0, 0, 0, 0, 0, 3,
|
||||
},
|
||||
{
|
||||
0, 1, 0, 3, 0, 4, 0, 3, 0, 2, 4, 3, 1, 0, 3, 2, 2, 1, 3, 1, 2,
|
||||
2, 3, 1, 1, 1, 2, 1, 3, 0, 1, 2, 0, 1, 3, 2, 1, 3, 0, 5, 5, 1,
|
||||
0, 0, 1, 3, 2, 1, 0, 3, 0, 0, 1, 0, 0, 0, 0, 0, 3, 4, 0, 1, 1,
|
||||
1, 3, 2, 0, 2, 0, 1, 0, 2, 3, 3, 1, 2, 3, 0, 1, 0, 1, 0, 4,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 1, 0, 3, 0, 3, 0, 2, 2, 1, 0, 0, 4, 0, 3, 0, 3, 1, 3,
|
||||
0, 3, 0, 3, 0, 1, 0, 3, 0, 3, 1, 3, 0, 3, 3, 0, 0, 1, 2, 1, 1,
|
||||
1, 0, 1, 2, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2,
|
||||
1, 2, 0, 0, 2, 0, 0, 0, 0, 2, 3, 3, 3, 3, 0, 0, 0, 0, 1, 4,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 3, 0, 3, 0, 0, 0, 0, 3, 1, 1, 0, 3, 0, 1, 0, 2, 0, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 2, 0, 2, 3, 0, 0, 2, 2, 3, 1,
|
||||
2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 2, 0, 0, 0, 0, 2, 3,
|
||||
},
|
||||
{
|
||||
2, 4, 0, 5, 0, 5, 0, 4, 0, 3, 4, 3, 3, 3, 4, 3, 3, 3, 4, 3, 4,
|
||||
4, 5, 4, 5, 5, 5, 2, 3, 0, 5, 5, 4, 1, 5, 4, 3, 1, 5, 4, 3, 4,
|
||||
4, 3, 3, 4, 3, 3, 0, 3, 2, 0, 2, 3, 0, 3, 0, 0, 3, 3, 0, 5, 3,
|
||||
2, 3, 3, 0, 3, 0, 3, 0, 3, 4, 5, 4, 5, 3, 0, 4, 3, 0, 3, 4,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 3, 4, 3, 2, 3, 2, 3, 0, 4, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 0, 3, 2, 4, 3, 3, 1, 3, 4, 3, 4, 4, 4, 3, 4,
|
||||
4, 3, 2, 4, 4, 1, 0, 2, 0, 0, 1, 1, 0, 2, 0, 0, 3, 1, 0, 5, 3,
|
||||
2, 1, 3, 0, 3, 0, 1, 2, 4, 3, 2, 4, 3, 3, 0, 3, 2, 0, 4, 4,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 3, 0, 1, 0, 0, 0, 1, 4, 3, 3, 2, 3, 1, 3, 1, 4, 2, 3,
|
||||
2, 4, 2, 3, 4, 3, 0, 2, 2, 3, 3, 3, 0, 3, 3, 3, 0, 3, 4, 1, 3,
|
||||
3, 0, 3, 4, 3, 3, 0, 1, 1, 0, 1, 0, 0, 0, 4, 0, 3, 0, 0, 3, 1,
|
||||
2, 1, 3, 0, 4, 0, 1, 0, 4, 3, 3, 4, 3, 3, 0, 2, 0, 0, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 4, 0, 1, 0, 3, 0, 3, 4, 3, 3, 0, 3, 3, 3, 1, 3, 1, 3,
|
||||
3, 4, 3, 3, 3, 0, 0, 3, 1, 5, 3, 3, 1, 3, 3, 2, 5, 4, 3, 3, 4,
|
||||
5, 3, 2, 5, 3, 4, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 1, 1, 0, 4, 2,
|
||||
2, 1, 3, 0, 3, 0, 2, 0, 4, 4, 3, 5, 3, 2, 0, 1, 1, 0, 3, 4,
|
||||
},
|
||||
{
|
||||
0, 5, 0, 4, 0, 5, 0, 2, 0, 4, 4, 3, 3, 2, 3, 3, 3, 1, 4, 3, 4,
|
||||
1, 5, 3, 4, 3, 4, 0, 4, 2, 4, 3, 4, 1, 5, 4, 0, 4, 4, 4, 4, 5,
|
||||
4, 1, 3, 5, 4, 2, 1, 4, 1, 1, 3, 2, 0, 3, 1, 0, 3, 2, 1, 4, 3,
|
||||
3, 3, 4, 0, 4, 0, 3, 0, 4, 4, 4, 3, 3, 3, 0, 4, 2, 0, 3, 4,
|
||||
},
|
||||
{
|
||||
1, 4, 0, 4, 0, 3, 0, 1, 0, 3, 3, 3, 1, 1, 3, 3, 2, 2, 3, 3, 1,
|
||||
0, 3, 2, 2, 1, 2, 0, 3, 1, 2, 1, 2, 0, 3, 2, 0, 2, 2, 3, 3, 4,
|
||||
3, 0, 3, 3, 1, 2, 0, 1, 1, 3, 1, 2, 0, 0, 3, 0, 1, 1, 0, 3, 2,
|
||||
2, 3, 3, 0, 3, 0, 0, 0, 2, 3, 3, 4, 3, 3, 0, 1, 0, 0, 1, 4,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 4, 0, 4, 0, 0, 0, 3, 4, 4, 3, 1, 4, 2, 3, 2, 3, 3, 3,
|
||||
1, 4, 3, 4, 0, 3, 0, 4, 2, 3, 3, 2, 2, 5, 4, 2, 1, 3, 4, 3, 4,
|
||||
3, 1, 3, 3, 4, 2, 0, 2, 1, 0, 3, 3, 0, 0, 2, 0, 3, 1, 0, 4, 4,
|
||||
3, 4, 3, 0, 4, 0, 1, 0, 2, 4, 4, 4, 4, 4, 0, 3, 2, 0, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 0, 0, 1, 0, 0, 0,
|
||||
1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 3, 0, 4, 0, 4, 0, 1, 3, 3, 3, 0, 4, 0, 2, 1, 2, 1, 1,
|
||||
1, 2, 0, 3, 1, 1, 0, 1, 0, 3, 1, 0, 0, 3, 3, 2, 0, 1, 1, 0, 0,
|
||||
0, 0, 0, 1, 0, 2, 0, 2, 2, 0, 3, 1, 0, 0, 1, 0, 1, 1, 0, 1, 2,
|
||||
0, 3, 0, 0, 0, 0, 1, 0, 0, 3, 3, 4, 3, 1, 0, 1, 0, 3, 0, 2,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 3, 0, 5, 0, 0, 0, 0, 1, 0, 2, 0, 3, 1, 0, 1, 3, 0, 0,
|
||||
0, 2, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 4, 0, 0, 0, 2, 3, 0, 1,
|
||||
4, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0,
|
||||
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 3, 0, 0, 0, 0, 0, 3,
|
||||
},
|
||||
{
|
||||
0, 2, 0, 5, 0, 5, 0, 1, 0, 2, 4, 3, 3, 2, 5, 1, 3, 2, 3, 3, 3,
|
||||
0, 4, 1, 2, 0, 3, 0, 4, 0, 2, 2, 1, 1, 5, 3, 0, 0, 1, 4, 2, 3,
|
||||
2, 0, 3, 3, 3, 2, 0, 2, 4, 1, 1, 2, 0, 1, 1, 0, 3, 1, 0, 1, 3,
|
||||
1, 2, 3, 0, 2, 0, 0, 0, 1, 3, 5, 4, 4, 4, 0, 3, 0, 0, 1, 3,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 5, 0, 4, 0, 4, 0, 4, 5, 4, 3, 3, 4, 3, 3, 3, 4, 3, 4,
|
||||
4, 5, 3, 4, 5, 4, 2, 4, 2, 3, 4, 3, 1, 4, 4, 1, 3, 5, 4, 4, 5,
|
||||
5, 4, 4, 5, 5, 5, 2, 3, 3, 1, 4, 3, 1, 3, 3, 0, 3, 3, 1, 4, 3,
|
||||
4, 4, 4, 0, 3, 0, 4, 0, 3, 3, 4, 4, 5, 0, 0, 4, 3, 0, 4, 5,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 4, 0, 3, 0, 3, 0, 3, 4, 4, 4, 3, 3, 2, 4, 3, 4, 3, 4,
|
||||
3, 5, 3, 4, 3, 2, 1, 4, 2, 4, 4, 3, 1, 3, 4, 2, 4, 5, 5, 3, 4,
|
||||
5, 4, 1, 5, 4, 3, 0, 3, 2, 2, 3, 2, 1, 3, 1, 0, 3, 3, 3, 5, 3,
|
||||
3, 3, 5, 4, 4, 2, 3, 3, 4, 3, 3, 3, 2, 1, 0, 3, 2, 1, 4, 3,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 5, 0, 4, 0, 3, 0, 3, 5, 5, 3, 2, 4, 3, 4, 0, 5, 4, 4,
|
||||
1, 4, 4, 4, 3, 3, 3, 4, 3, 5, 5, 2, 3, 3, 4, 1, 2, 5, 5, 3, 5,
|
||||
5, 2, 3, 5, 5, 4, 0, 3, 2, 0, 3, 3, 1, 1, 5, 1, 4, 1, 0, 4, 3,
|
||||
2, 3, 5, 0, 4, 0, 3, 0, 5, 4, 3, 4, 3, 0, 0, 4, 1, 0, 4, 4,
|
||||
},
|
||||
{
|
||||
1, 3, 0, 4, 0, 2, 0, 2, 0, 2, 5, 5, 3, 3, 3, 3, 3, 0, 4, 2, 3,
|
||||
4, 4, 4, 3, 4, 0, 0, 3, 4, 5, 4, 3, 3, 3, 3, 2, 5, 5, 4, 5, 5,
|
||||
5, 4, 3, 5, 5, 5, 1, 3, 1, 0, 1, 0, 0, 3, 2, 0, 4, 2, 0, 5, 2,
|
||||
3, 2, 4, 1, 3, 0, 3, 0, 4, 5, 4, 5, 4, 3, 0, 4, 2, 0, 5, 4,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 4, 0, 5, 0, 3, 0, 3, 4, 4, 3, 2, 3, 2, 3, 3, 3, 3, 3,
|
||||
2, 4, 3, 3, 2, 2, 0, 3, 3, 3, 3, 3, 1, 3, 3, 3, 0, 4, 4, 3, 4,
|
||||
4, 1, 1, 4, 4, 2, 0, 3, 1, 0, 1, 1, 0, 4, 1, 0, 2, 3, 1, 3, 3,
|
||||
1, 3, 4, 0, 3, 0, 1, 0, 3, 1, 3, 0, 0, 1, 0, 2, 0, 0, 4, 4,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
},
|
||||
{
|
||||
0, 3, 0, 3, 0, 2, 0, 3, 0, 1, 5, 4, 3, 3, 3, 1, 4, 2, 1, 2, 3,
|
||||
4, 4, 2, 4, 4, 5, 0, 3, 1, 4, 3, 4, 0, 4, 3, 3, 3, 2, 3, 2, 5,
|
||||
3, 4, 3, 2, 2, 3, 0, 0, 3, 0, 2, 1, 0, 1, 2, 0, 0, 0, 0, 2, 1,
|
||||
1, 3, 1, 0, 2, 0, 4, 0, 3, 4, 4, 4, 5, 2, 0, 2, 0, 0, 1, 3,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,
|
||||
1, 1, 0, 0, 1, 1, 0, 0, 0, 4, 2, 1, 1, 0, 1, 0, 3, 2, 0, 0, 3,
|
||||
1, 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0,
|
||||
1, 0, 0, 0, 2, 0, 0, 0, 1, 4, 0, 4, 2, 1, 0, 0, 0, 0, 0, 1,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
|
||||
0, 0, 1, 0, 1, 0, 0, 0, 0, 3, 1, 0, 0, 0, 2, 0, 2, 1, 0, 0, 1,
|
||||
2, 1, 0, 1, 1, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3,
|
||||
1, 0, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 4, 0, 4, 0, 3, 0, 4, 4, 3, 4, 2, 4, 3, 2, 0, 4, 4, 4,
|
||||
3, 5, 3, 5, 3, 3, 2, 4, 2, 4, 3, 4, 3, 1, 4, 0, 2, 3, 4, 4, 4,
|
||||
3, 3, 3, 4, 4, 4, 3, 4, 1, 3, 4, 3, 2, 1, 2, 1, 3, 3, 3, 4, 4,
|
||||
3, 3, 5, 0, 4, 0, 3, 0, 4, 3, 3, 3, 2, 1, 0, 3, 0, 0, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 4, 0, 3, 0, 3, 0, 3, 0, 3, 5, 5, 3, 3, 3, 3, 4, 3, 4, 3, 3,
|
||||
3, 4, 4, 4, 3, 3, 3, 3, 4, 3, 5, 3, 3, 1, 3, 2, 4, 5, 5, 5, 5,
|
||||
4, 3, 4, 5, 5, 3, 2, 2, 3, 3, 3, 3, 2, 3, 3, 1, 2, 3, 2, 4, 3,
|
||||
3, 3, 4, 0, 4, 0, 2, 0, 4, 3, 2, 2, 1, 2, 0, 3, 0, 0, 4, 1,
|
||||
},
|
||||
};
|
||||
|
||||
#define MINIMUM_DATA_THRESHOLD 4
|
||||
|
||||
void JapaneseContextAnalysis::HandleData(const char* aBuf, uint32_t aLen) {
|
||||
uint32_t charLen;
|
||||
int32_t order;
|
||||
uint32_t i;
|
||||
|
||||
if (mDone) return;
|
||||
|
||||
// The buffer we got is byte oriented, and a character may span in more than
|
||||
// one buffers. In case the last one or two byte in last buffer is not
|
||||
// complete, we record how many byte needed to complete that character and
|
||||
// skip these bytes here. We can choose to record those bytes as well and
|
||||
// analyse the character once it is complete, but since a character will not
|
||||
// make much difference, by simply skipping this character will simply our
|
||||
// logic and improve performance.
|
||||
for (i = mNeedToSkipCharNum; i < aLen;) {
|
||||
order = GetOrder(aBuf + i, &charLen);
|
||||
i += charLen;
|
||||
if (i > aLen) {
|
||||
mNeedToSkipCharNum = i - aLen;
|
||||
mLastCharOrder = -1;
|
||||
} else {
|
||||
if (order != -1 && mLastCharOrder != -1) {
|
||||
mTotalRel++;
|
||||
if (mTotalRel > MAX_REL_THRESHOLD) {
|
||||
mDone = true;
|
||||
break;
|
||||
}
|
||||
mRelSample[jp2CharContext[mLastCharOrder][order]]++;
|
||||
}
|
||||
mLastCharOrder = order;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void JapaneseContextAnalysis::Reset() {
|
||||
mTotalRel = 0;
|
||||
for (uint32_t i = 0; i < NUM_OF_CATEGORY; i++) mRelSample[i] = 0;
|
||||
mNeedToSkipCharNum = 0;
|
||||
mLastCharOrder = -1;
|
||||
mDone = false;
|
||||
mDataThreshold = 0;
|
||||
}
|
||||
#define DONT_KNOW (float)-1
|
||||
|
||||
float JapaneseContextAnalysis::GetConfidence(void) {
|
||||
// This is just one way to calculate confidence. It works well for me.
|
||||
if (mTotalRel > mDataThreshold)
|
||||
return ((float)(mTotalRel - mRelSample[0])) / mTotalRel;
|
||||
else
|
||||
return (float)DONT_KNOW;
|
||||
}
|
||||
|
||||
int32_t SJISContextAnalysis::GetOrder(const char* str, uint32_t* charLen) {
|
||||
// find out current char's byte length
|
||||
if (((unsigned char)*str >= (unsigned char)0x81 &&
|
||||
(unsigned char)*str <= (unsigned char)0x9f) ||
|
||||
((unsigned char)*str >= (unsigned char)0xe0 &&
|
||||
(unsigned char)*str <= (unsigned char)0xfc))
|
||||
*charLen = 2;
|
||||
else
|
||||
*charLen = 1;
|
||||
|
||||
// return its order if it is hiragana
|
||||
if (*str == '\202' && (unsigned char)*(str + 1) >= (unsigned char)0x9f &&
|
||||
(unsigned char)*(str + 1) <= (unsigned char)0xf1)
|
||||
return (unsigned char)*(str + 1) - (unsigned char)0x9f;
|
||||
return -1;
|
||||
}
|
||||
|
||||
int32_t EUCJPContextAnalysis::GetOrder(const char* str, uint32_t* charLen) {
|
||||
// find out current char's byte length
|
||||
if ((unsigned char)*str == (unsigned char)0x8e ||
|
||||
((unsigned char)*str >= (unsigned char)0xa1 &&
|
||||
(unsigned char)*str <= (unsigned char)0xfe))
|
||||
*charLen = 2;
|
||||
else if ((unsigned char)*str == (unsigned char)0x8f)
|
||||
*charLen = 3;
|
||||
else
|
||||
*charLen = 1;
|
||||
|
||||
// return its order if it is hiragana
|
||||
if ((unsigned char)*str == (unsigned char)0xa4 &&
|
||||
(unsigned char)*(str + 1) >= (unsigned char)0xa1 &&
|
||||
(unsigned char)*(str + 1) <= (unsigned char)0xf3)
|
||||
return (unsigned char)*(str + 1) - (unsigned char)0xa1;
|
||||
return -1;
|
||||
}
|
|
@ -1,97 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef __JPCNTX_H__
|
||||
#define __JPCNTX_H__
|
||||
|
||||
#define NUM_OF_CATEGORY 6
|
||||
|
||||
#include "nscore.h"
|
||||
|
||||
#define ENOUGH_REL_THRESHOLD 100
|
||||
#define MAX_REL_THRESHOLD 1000
|
||||
|
||||
// hiragana frequency category table
|
||||
extern const uint8_t jp2CharContext[83][83];
|
||||
|
||||
class JapaneseContextAnalysis {
|
||||
public:
|
||||
JapaneseContextAnalysis() { Reset(); }
|
||||
|
||||
void HandleData(const char* aBuf, uint32_t aLen);
|
||||
|
||||
void HandleOneChar(const char* aStr, uint32_t aCharLen) {
|
||||
int32_t order;
|
||||
|
||||
// if we received enough data, stop here
|
||||
if (mTotalRel > MAX_REL_THRESHOLD) mDone = true;
|
||||
if (mDone) return;
|
||||
|
||||
// Only 2-bytes characters are of our interest
|
||||
order = (aCharLen == 2) ? GetOrder(aStr) : -1;
|
||||
if (order != -1 && mLastCharOrder != -1) {
|
||||
mTotalRel++;
|
||||
// count this sequence to its category counter
|
||||
mRelSample[jp2CharContext[mLastCharOrder][order]]++;
|
||||
}
|
||||
mLastCharOrder = order;
|
||||
}
|
||||
|
||||
float GetConfidence(void);
|
||||
void Reset();
|
||||
bool GotEnoughData() { return mTotalRel > ENOUGH_REL_THRESHOLD; }
|
||||
|
||||
protected:
|
||||
virtual int32_t GetOrder(const char* str, uint32_t* charLen) = 0;
|
||||
virtual int32_t GetOrder(const char* str) = 0;
|
||||
|
||||
// category counters, each integer counts sequences in its category
|
||||
uint32_t mRelSample[NUM_OF_CATEGORY];
|
||||
|
||||
// total sequence received
|
||||
uint32_t mTotalRel;
|
||||
|
||||
// Number of sequences needed to trigger detection
|
||||
uint32_t mDataThreshold;
|
||||
|
||||
// The order of previous char
|
||||
int32_t mLastCharOrder;
|
||||
|
||||
// if last byte in current buffer is not the last byte of a character, we
|
||||
// need to know how many byte to skip in next buffer.
|
||||
uint32_t mNeedToSkipCharNum;
|
||||
|
||||
// If this flag is set to true, detection is done and conclusion has been made
|
||||
bool mDone;
|
||||
};
|
||||
|
||||
class SJISContextAnalysis : public JapaneseContextAnalysis {
|
||||
// SJISContextAnalysis(){};
|
||||
protected:
|
||||
int32_t GetOrder(const char* str, uint32_t* charLen) override;
|
||||
|
||||
int32_t GetOrder(const char* str) override {
|
||||
// We only interested in Hiragana, so first byte is '\202'
|
||||
if (*str == '\202' && (unsigned char)*(str + 1) >= (unsigned char)0x9f &&
|
||||
(unsigned char)*(str + 1) <= (unsigned char)0xf1)
|
||||
return (unsigned char)*(str + 1) - (unsigned char)0x9f;
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
class EUCJPContextAnalysis : public JapaneseContextAnalysis {
|
||||
protected:
|
||||
int32_t GetOrder(const char* str, uint32_t* charLen) override;
|
||||
int32_t GetOrder(const char* str) override
|
||||
// We only interested in Hiragana, so first byte is '\244'
|
||||
{
|
||||
if (*str == '\244' && (unsigned char)*(str + 1) >= (unsigned char)0xa1 &&
|
||||
(unsigned char)*(str + 1) <= (unsigned char)0xf3)
|
||||
return (unsigned char)*(str + 1) - (unsigned char)0xa1;
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
#endif /* __JPCNTX_H__ */
|
|
@ -1,21 +0,0 @@
|
|||
# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
|
||||
# vim: set filetype=python:
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
UNIFIED_SOURCES += [
|
||||
'CharDistribution.cpp',
|
||||
'JpCntx.cpp',
|
||||
'nsCharSetProber.cpp',
|
||||
'nsEscCharsetProber.cpp',
|
||||
'nsEscSM.cpp',
|
||||
'nsEUCJPProber.cpp',
|
||||
'nsMBCSGroupProber.cpp',
|
||||
'nsMBCSSM.cpp',
|
||||
'nsSJISProber.cpp',
|
||||
'nsUniversalDetector.cpp',
|
||||
'nsUTF8Prober.cpp',
|
||||
]
|
||||
|
||||
FINAL_LIBRARY = 'xul'
|
|
@ -1,88 +0,0 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nsCharSetProber.h"
|
||||
|
||||
// This filter applies to all scripts which do not use English characters
|
||||
bool nsCharSetProber::FilterWithoutEnglishLetters(const char* aBuf,
|
||||
uint32_t aLen, char** newBuf,
|
||||
uint32_t& newLen) {
|
||||
char* newptr;
|
||||
char *prevPtr, *curPtr;
|
||||
|
||||
bool meetMSB = false;
|
||||
newptr = *newBuf = (char*)malloc(aLen);
|
||||
if (!newptr) return false;
|
||||
|
||||
for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf + aLen; curPtr++) {
|
||||
if (*curPtr & 0x80) {
|
||||
meetMSB = true;
|
||||
} else if (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') ||
|
||||
*curPtr > 'z') {
|
||||
// current char is a symbol, most likely a punctuation. we treat it as
|
||||
// segment delimiter
|
||||
if (meetMSB && curPtr > prevPtr)
|
||||
// this segment contains more than single symbol, and it has upper ASCII,
|
||||
// we need to keep it
|
||||
{
|
||||
while (prevPtr < curPtr) *newptr++ = *prevPtr++;
|
||||
prevPtr++;
|
||||
*newptr++ = ' ';
|
||||
meetMSB = false;
|
||||
} else // ignore current segment. (either because it is just a symbol or
|
||||
// just an English word)
|
||||
prevPtr = curPtr + 1;
|
||||
}
|
||||
}
|
||||
if (meetMSB && curPtr > prevPtr)
|
||||
while (prevPtr < curPtr) *newptr++ = *prevPtr++;
|
||||
|
||||
newLen = newptr - *newBuf;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// This filter applies to all scripts which contain both English characters and
|
||||
// upper ASCII characters.
|
||||
bool nsCharSetProber::FilterWithEnglishLetters(const char* aBuf, uint32_t aLen,
|
||||
char** newBuf,
|
||||
uint32_t& newLen) {
|
||||
// do filtering to reduce load to probers
|
||||
char* newptr;
|
||||
char *prevPtr, *curPtr;
|
||||
bool isInTag = false;
|
||||
|
||||
newptr = *newBuf = (char*)malloc(aLen);
|
||||
if (!newptr) return false;
|
||||
|
||||
for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf + aLen; curPtr++) {
|
||||
if (*curPtr == '>')
|
||||
isInTag = false;
|
||||
else if (*curPtr == '<')
|
||||
isInTag = true;
|
||||
|
||||
if (!(*curPtr & 0x80) &&
|
||||
(*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z')) {
|
||||
if (curPtr > prevPtr &&
|
||||
!isInTag) // Current segment contains more than just a symbol
|
||||
// and it is not inside a tag, keep it.
|
||||
{
|
||||
while (prevPtr < curPtr) *newptr++ = *prevPtr++;
|
||||
prevPtr++;
|
||||
*newptr++ = ' ';
|
||||
} else
|
||||
prevPtr = curPtr + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// If the current segment contains more than just a symbol
|
||||
// and it is not inside a tag then keep it.
|
||||
if (!isInTag)
|
||||
while (prevPtr < curPtr) *newptr++ = *prevPtr++;
|
||||
|
||||
newLen = newptr - *newBuf;
|
||||
|
||||
return true;
|
||||
}
|
|
@ -1,44 +0,0 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
#ifndef nsCharSetProber_h__
|
||||
#define nsCharSetProber_h__
|
||||
|
||||
#include "nscore.h"
|
||||
|
||||
//#define DEBUG_chardet // Uncomment this for debug dump.
|
||||
|
||||
typedef enum {
|
||||
eDetecting = 0, // We are still detecting, no sure answer yet, but caller can
|
||||
// ask for confidence.
|
||||
eFoundIt = 1, // That's a positive answer
|
||||
eNotMe = 2 // Negative answer
|
||||
} nsProbingState;
|
||||
|
||||
#define SHORTCUT_THRESHOLD (float)0.95
|
||||
|
||||
class nsCharSetProber {
|
||||
public:
|
||||
virtual ~nsCharSetProber() {}
|
||||
virtual const char* GetCharSetName() = 0;
|
||||
virtual nsProbingState HandleData(const char* aBuf, uint32_t aLen) = 0;
|
||||
virtual nsProbingState GetState(void) = 0;
|
||||
virtual void Reset(void) = 0;
|
||||
virtual float GetConfidence(void) = 0;
|
||||
|
||||
#ifdef DEBUG_chardet
|
||||
virtual void DumpStatus(){};
|
||||
#endif
|
||||
|
||||
// Helper functions used in the Latin1 and Group probers.
|
||||
// both functions Allocate a new buffer for newBuf. This buffer should be
|
||||
// freed by the caller using free().
|
||||
// Both functions return false in case of memory allocation failure.
|
||||
static bool FilterWithoutEnglishLetters(const char* aBuf, uint32_t aLen,
|
||||
char** newBuf, uint32_t& newLen);
|
||||
static bool FilterWithEnglishLetters(const char* aBuf, uint32_t aLen,
|
||||
char** newBuf, uint32_t& newLen);
|
||||
};
|
||||
|
||||
#endif /* nsCharSetProber_h__ */
|
|
@ -1,85 +0,0 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
#ifndef nsCodingStateMachine_h__
|
||||
#define nsCodingStateMachine_h__
|
||||
|
||||
#include "mozilla/ArrayUtils.h"
|
||||
|
||||
#include "nsPkgInt.h"
|
||||
|
||||
/* Apart from these 3 generic states, machine states are specific to
|
||||
* each charset prober.
|
||||
*/
|
||||
#define eStart 0
|
||||
#define eError 1
|
||||
#define eItsMe 2
|
||||
|
||||
#define GETCLASS(c) GETFROMPCK(((unsigned char)(c)), mModel->classTable)
|
||||
|
||||
// state machine model
|
||||
typedef struct {
|
||||
nsPkgInt classTable;
|
||||
uint32_t classFactor;
|
||||
nsPkgInt stateTable;
|
||||
const uint32_t* charLenTable;
|
||||
#ifdef DEBUG
|
||||
const size_t charLenTableLength;
|
||||
#endif
|
||||
const char* name;
|
||||
} SMModel;
|
||||
|
||||
class nsCodingStateMachine {
|
||||
public:
|
||||
explicit nsCodingStateMachine(const SMModel* sm) : mModel(sm) {
|
||||
mCurrentState = eStart;
|
||||
}
|
||||
uint32_t NextState(char c) {
|
||||
// for each byte we get its class , if it is first byte, we also get byte
|
||||
// length
|
||||
uint32_t byteCls = GETCLASS(c);
|
||||
if (mCurrentState == eStart) {
|
||||
mCurrentBytePos = 0;
|
||||
MOZ_ASSERT(byteCls < mModel->charLenTableLength);
|
||||
mCurrentCharLen = mModel->charLenTable[byteCls];
|
||||
}
|
||||
// from byte's class and stateTable, we get its next state
|
||||
mCurrentState = GETFROMPCK(mCurrentState * mModel->classFactor + byteCls,
|
||||
mModel->stateTable);
|
||||
mCurrentBytePos++;
|
||||
return mCurrentState;
|
||||
}
|
||||
uint32_t GetCurrentCharLen(void) { return mCurrentCharLen; }
|
||||
void Reset(void) { mCurrentState = eStart; }
|
||||
const char* GetCodingStateMachine() { return mModel->name; }
|
||||
|
||||
protected:
|
||||
uint32_t mCurrentState;
|
||||
uint32_t mCurrentCharLen;
|
||||
uint32_t mCurrentBytePos;
|
||||
|
||||
const SMModel* mModel;
|
||||
};
|
||||
|
||||
extern const SMModel UTF8SMModel;
|
||||
extern const SMModel Big5SMModel;
|
||||
extern const SMModel EUCJPSMModel;
|
||||
extern const SMModel EUCKRSMModel;
|
||||
extern const SMModel EUCTWSMModel;
|
||||
extern const SMModel GB18030SMModel;
|
||||
extern const SMModel SJISSMModel;
|
||||
|
||||
extern const SMModel HZSMModel;
|
||||
extern const SMModel ISO2022CNSMModel;
|
||||
extern const SMModel ISO2022JPSMModel;
|
||||
extern const SMModel ISO2022KRSMModel;
|
||||
|
||||
#undef CHAR_LEN_TABLE
|
||||
#ifdef DEBUG
|
||||
# define CHAR_LEN_TABLE(x) x, mozilla::ArrayLength(x)
|
||||
#else
|
||||
# define CHAR_LEN_TABLE(x) x
|
||||
#endif
|
||||
|
||||
#endif /* nsCodingStateMachine_h__ */
|
|
@ -1,60 +0,0 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
// for japanese encoding, obeserve characteristic:
|
||||
// 1, kana character (or hankaku?) often have hight frequency of appereance
|
||||
// 2, kana character often exist in group
|
||||
// 3, certain combination of kana is never used in japanese language
|
||||
|
||||
#include "nsEUCJPProber.h"
|
||||
#include "nsDebug.h"
|
||||
|
||||
void nsEUCJPProber::Reset(void) {
|
||||
mCodingSM->Reset();
|
||||
mState = eDetecting;
|
||||
mContextAnalyser.Reset();
|
||||
mDistributionAnalyser.Reset();
|
||||
}
|
||||
|
||||
nsProbingState nsEUCJPProber::HandleData(const char* aBuf, uint32_t aLen) {
|
||||
NS_ASSERTION(aLen, "HandleData called with empty buffer");
|
||||
uint32_t codingState;
|
||||
|
||||
for (uint32_t i = 0; i < aLen; i++) {
|
||||
codingState = mCodingSM->NextState(aBuf[i]);
|
||||
if (codingState == eItsMe) {
|
||||
mState = eFoundIt;
|
||||
break;
|
||||
}
|
||||
if (codingState == eStart) {
|
||||
uint32_t charLen = mCodingSM->GetCurrentCharLen();
|
||||
|
||||
if (i == 0) {
|
||||
mLastChar[1] = aBuf[0];
|
||||
mContextAnalyser.HandleOneChar(mLastChar, charLen);
|
||||
mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
|
||||
} else {
|
||||
mContextAnalyser.HandleOneChar(aBuf + i - 1, charLen);
|
||||
mDistributionAnalyser.HandleOneChar(aBuf + i - 1, charLen);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mLastChar[0] = aBuf[aLen - 1];
|
||||
|
||||
if (mState == eDetecting)
|
||||
if (mContextAnalyser.GotEnoughData() &&
|
||||
GetConfidence() > SHORTCUT_THRESHOLD)
|
||||
mState = eFoundIt;
|
||||
|
||||
return mState;
|
||||
}
|
||||
|
||||
float nsEUCJPProber::GetConfidence(void) {
|
||||
float contxtCf = mContextAnalyser.GetConfidence();
|
||||
float distribCf = mDistributionAnalyser.GetConfidence();
|
||||
|
||||
return (contxtCf > distribCf ? contxtCf : distribCf);
|
||||
}
|
|
@ -1,42 +0,0 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
// for S-JIS encoding, obeserve characteristic:
|
||||
// 1, kana character (or hankaku?) often have hight frequency of appereance
|
||||
// 2, kana character often exist in group
|
||||
// 3, certain combination of kana is never used in japanese language
|
||||
|
||||
#ifndef nsEUCJPProber_h__
|
||||
#define nsEUCJPProber_h__
|
||||
|
||||
#include "nsCharSetProber.h"
|
||||
#include "nsCodingStateMachine.h"
|
||||
#include "JpCntx.h"
|
||||
#include "CharDistribution.h"
|
||||
|
||||
class nsEUCJPProber : public nsCharSetProber {
|
||||
public:
|
||||
nsEUCJPProber() {
|
||||
mCodingSM = new nsCodingStateMachine(&EUCJPSMModel);
|
||||
Reset();
|
||||
}
|
||||
virtual ~nsEUCJPProber(void) { delete mCodingSM; }
|
||||
nsProbingState HandleData(const char* aBuf, uint32_t aLen) override;
|
||||
const char* GetCharSetName() override { return "EUC-JP"; }
|
||||
nsProbingState GetState(void) override { return mState; }
|
||||
void Reset(void) override;
|
||||
float GetConfidence(void) override;
|
||||
|
||||
protected:
|
||||
nsCodingStateMachine* mCodingSM;
|
||||
nsProbingState mState;
|
||||
|
||||
EUCJPContextAnalysis mContextAnalyser;
|
||||
EUCJPDistributionAnalysis mDistributionAnalyser;
|
||||
|
||||
char mLastChar[2];
|
||||
};
|
||||
|
||||
#endif /* nsEUCJPProber_h__ */
|
|
@ -1,37 +0,0 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nsEscCharsetProber.h"
|
||||
#include "nsUniversalDetector.h"
|
||||
|
||||
nsEscCharSetProber::nsEscCharSetProber() {
|
||||
mCodingSM = new nsCodingStateMachine(&ISO2022JPSMModel);
|
||||
mState = eDetecting;
|
||||
mDetectedCharset = nullptr;
|
||||
}
|
||||
|
||||
nsEscCharSetProber::~nsEscCharSetProber(void) {}
|
||||
|
||||
void nsEscCharSetProber::Reset(void) {
|
||||
mState = eDetecting;
|
||||
mCodingSM->Reset();
|
||||
mDetectedCharset = nullptr;
|
||||
}
|
||||
|
||||
nsProbingState nsEscCharSetProber::HandleData(const char* aBuf, uint32_t aLen) {
|
||||
uint32_t codingState;
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < aLen && mState == eDetecting; i++) {
|
||||
codingState = mCodingSM->NextState(aBuf[i]);
|
||||
if (codingState == eItsMe) {
|
||||
mState = eFoundIt;
|
||||
mDetectedCharset = mCodingSM->GetCodingStateMachine();
|
||||
return mState;
|
||||
}
|
||||
}
|
||||
|
||||
return mState;
|
||||
}
|
|
@ -1,31 +0,0 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef nsEscCharSetProber_h__
|
||||
#define nsEscCharSetProber_h__
|
||||
|
||||
#include "nsCharSetProber.h"
|
||||
#include "nsCodingStateMachine.h"
|
||||
#include "nsAutoPtr.h"
|
||||
|
||||
class nsEscCharSetProber : public nsCharSetProber {
|
||||
public:
|
||||
nsEscCharSetProber();
|
||||
virtual ~nsEscCharSetProber(void);
|
||||
nsProbingState HandleData(const char* aBuf, uint32_t aLen) override;
|
||||
const char* GetCharSetName() override { return mDetectedCharset; }
|
||||
nsProbingState GetState(void) override { return mState; }
|
||||
void Reset(void) override;
|
||||
float GetConfidence(void) override { return (float)0.99; }
|
||||
|
||||
protected:
|
||||
void GetDistribution(uint32_t aCharLen, const char* aStr);
|
||||
|
||||
nsAutoPtr<nsCodingStateMachine> mCodingSM;
|
||||
nsProbingState mState;
|
||||
const char* mDetectedCharset;
|
||||
};
|
||||
|
||||
#endif /* nsEscCharSetProber_h__ */
|
|
@ -1,70 +0,0 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
#include "nsCodingStateMachine.h"
|
||||
|
||||
static const uint32_t ISO2022JP_cls[256 / 8] = {
|
||||
PCK4BITS(2, 0, 0, 0, 0, 0, 0, 0), // 00 - 07
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 2, 2), // 08 - 0f
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 10 - 17
|
||||
PCK4BITS(0, 0, 0, 1, 0, 0, 0, 0), // 18 - 1f
|
||||
PCK4BITS(0, 0, 0, 0, 7, 0, 0, 0), // 20 - 27
|
||||
PCK4BITS(3, 0, 0, 0, 0, 0, 0, 0), // 28 - 2f
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 30 - 37
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 38 - 3f
|
||||
PCK4BITS(6, 0, 4, 0, 8, 0, 0, 0), // 40 - 47
|
||||
PCK4BITS(0, 9, 5, 0, 0, 0, 0, 0), // 48 - 4f
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 50 - 57
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 58 - 5f
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 60 - 67
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 68 - 6f
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 70 - 77
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 78 - 7f
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 80 - 87
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 88 - 8f
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 90 - 97
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 98 - 9f
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a0 - a7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e0 - e7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e8 - ef
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // f0 - f7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2) // f8 - ff
|
||||
};
|
||||
|
||||
static const uint32_t ISO2022JP_st[9] = {
|
||||
PCK4BITS(eStart, 3, eError, eStart, eStart, eStart, eStart,
|
||||
eStart), // 00-07
|
||||
PCK4BITS(eStart, eStart, eError, eError, eError, eError, eError,
|
||||
eError), // 08-0f
|
||||
PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe,
|
||||
eItsMe), // 10-17
|
||||
PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eError,
|
||||
eError), // 18-1f
|
||||
PCK4BITS(eError, 5, eError, eError, eError, 4, eError, eError), // 20-27
|
||||
PCK4BITS(eError, eError, eError, 6, eItsMe, eError, eItsMe,
|
||||
eError), // 28-2f
|
||||
PCK4BITS(eError, eError, eError, eError, eError, eError, eItsMe,
|
||||
eItsMe), // 30-37
|
||||
PCK4BITS(eError, eError, eError, eItsMe, eError, eError, eError,
|
||||
eError), // 38-3f
|
||||
PCK4BITS(eError, eError, eError, eError, eItsMe, eError, eStart,
|
||||
eStart) // 40-47
|
||||
};
|
||||
|
||||
static const uint32_t ISO2022JPCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
const SMModel ISO2022JPSMModel = {
|
||||
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_cls},
|
||||
10,
|
||||
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_st},
|
||||
CHAR_LEN_TABLE(ISO2022JPCharLenTable),
|
||||
"ISO-2022-JP",
|
||||
};
|
|
@ -1,149 +0,0 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
#include <stdio.h>
|
||||
|
||||
#include "nsMBCSGroupProber.h"
|
||||
#include "nsUniversalDetector.h"
|
||||
|
||||
#if defined(DEBUG_chardet) || defined(DEBUG_jgmyers)
|
||||
const char* ProberName[] = {
|
||||
"UTF8",
|
||||
"SJIS",
|
||||
"EUCJP",
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
nsMBCSGroupProber::nsMBCSGroupProber() {
|
||||
mProbers[0] = new nsUTF8Prober();
|
||||
mProbers[1] = new nsSJISProber();
|
||||
mProbers[2] = new nsEUCJPProber();
|
||||
Reset();
|
||||
}
|
||||
|
||||
nsMBCSGroupProber::~nsMBCSGroupProber() {
|
||||
for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) {
|
||||
delete mProbers[i];
|
||||
}
|
||||
}
|
||||
|
||||
const char* nsMBCSGroupProber::GetCharSetName() {
|
||||
if (mBestGuess == -1) {
|
||||
GetConfidence();
|
||||
if (mBestGuess == -1) mBestGuess = 0;
|
||||
}
|
||||
return mProbers[mBestGuess]->GetCharSetName();
|
||||
}
|
||||
|
||||
void nsMBCSGroupProber::Reset(void) {
|
||||
mActiveNum = 0;
|
||||
for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) {
|
||||
if (mProbers[i]) {
|
||||
mProbers[i]->Reset();
|
||||
mIsActive[i] = true;
|
||||
++mActiveNum;
|
||||
} else
|
||||
mIsActive[i] = false;
|
||||
}
|
||||
mBestGuess = -1;
|
||||
mState = eDetecting;
|
||||
mKeepNext = 0;
|
||||
}
|
||||
|
||||
nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, uint32_t aLen) {
|
||||
nsProbingState st;
|
||||
uint32_t start = 0;
|
||||
uint32_t keepNext = mKeepNext;
|
||||
|
||||
// do filtering to reduce load to probers
|
||||
for (uint32_t pos = 0; pos < aLen; ++pos) {
|
||||
if (aBuf[pos] & 0x80) {
|
||||
if (!keepNext) start = pos;
|
||||
keepNext = 2;
|
||||
} else if (keepNext) {
|
||||
if (--keepNext == 0) {
|
||||
for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) {
|
||||
if (!mIsActive[i]) continue;
|
||||
st = mProbers[i]->HandleData(aBuf + start, pos + 1 - start);
|
||||
if (st == eFoundIt) {
|
||||
mBestGuess = i;
|
||||
mState = eFoundIt;
|
||||
return mState;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (keepNext) {
|
||||
for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) {
|
||||
if (!mIsActive[i]) continue;
|
||||
st = mProbers[i]->HandleData(aBuf + start, aLen - start);
|
||||
if (st == eFoundIt) {
|
||||
mBestGuess = i;
|
||||
mState = eFoundIt;
|
||||
return mState;
|
||||
}
|
||||
}
|
||||
}
|
||||
mKeepNext = keepNext;
|
||||
|
||||
return mState;
|
||||
}
|
||||
|
||||
float nsMBCSGroupProber::GetConfidence(void) {
|
||||
uint32_t i;
|
||||
float bestConf = 0.0, cf;
|
||||
|
||||
switch (mState) {
|
||||
case eFoundIt:
|
||||
return (float)0.99;
|
||||
case eNotMe:
|
||||
return (float)0.01;
|
||||
default:
|
||||
for (i = 0; i < NUM_OF_PROBERS; i++) {
|
||||
if (!mIsActive[i]) continue;
|
||||
cf = mProbers[i]->GetConfidence();
|
||||
if (bestConf < cf) {
|
||||
bestConf = cf;
|
||||
mBestGuess = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
return bestConf;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_chardet
|
||||
void nsMBCSGroupProber::DumpStatus() {
|
||||
uint32_t i;
|
||||
float cf;
|
||||
|
||||
GetConfidence();
|
||||
for (i = 0; i < NUM_OF_PROBERS; i++) {
|
||||
if (!mIsActive[i])
|
||||
printf(" MBCS inactive: [%s] (confidence is too low).\r\n",
|
||||
ProberName[i]);
|
||||
else {
|
||||
cf = mProbers[i]->GetConfidence();
|
||||
printf(" MBCS %1.3f: [%s]\r\n", cf, ProberName[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_jgmyers
|
||||
void nsMBCSGroupProber::GetDetectorState(
|
||||
nsUniversalDetector::DetectorState (
|
||||
&states)[nsUniversalDetector::NumDetectors],
|
||||
uint32_t& offset) {
|
||||
for (uint32_t i = 0; i < NUM_OF_PROBERS; ++i) {
|
||||
states[offset].name = ProberName[i];
|
||||
states[offset].isActive = mIsActive[i];
|
||||
states[offset].confidence =
|
||||
mIsActive[i] ? mProbers[i]->GetConfidence() : 0.0;
|
||||
++offset;
|
||||
}
|
||||
}
|
||||
#endif /* DEBUG_jgmyers */
|
|
@ -1,43 +0,0 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef nsMBCSGroupProber_h__
|
||||
#define nsMBCSGroupProber_h__
|
||||
|
||||
#include "nsSJISProber.h"
|
||||
#include "nsUTF8Prober.h"
|
||||
#include "nsEUCJPProber.h"
|
||||
|
||||
#define NUM_OF_PROBERS 3
|
||||
|
||||
class nsMBCSGroupProber : public nsCharSetProber {
|
||||
public:
|
||||
nsMBCSGroupProber();
|
||||
virtual ~nsMBCSGroupProber();
|
||||
nsProbingState HandleData(const char* aBuf, uint32_t aLen) override;
|
||||
const char* GetCharSetName() override;
|
||||
nsProbingState GetState(void) override { return mState; }
|
||||
void Reset(void) override;
|
||||
float GetConfidence(void) override;
|
||||
|
||||
#ifdef DEBUG_chardet
|
||||
void DumpStatus();
|
||||
#endif
|
||||
#ifdef DEBUG_jgmyers
|
||||
void GetDetectorState(nsUniversalDetector::DetectorState (
|
||||
&states)[nsUniversalDetector::NumDetectors],
|
||||
uint32_t& offset);
|
||||
#endif
|
||||
|
||||
protected:
|
||||
nsProbingState mState;
|
||||
nsCharSetProber* mProbers[NUM_OF_PROBERS];
|
||||
bool mIsActive[NUM_OF_PROBERS];
|
||||
int32_t mBestGuess;
|
||||
uint32_t mActiveNum;
|
||||
uint32_t mKeepNext;
|
||||
};
|
||||
|
||||
#endif /* nsMBCSGroupProber_h__ */
|
|
@ -1,200 +0,0 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
#include "nsCodingStateMachine.h"
|
||||
|
||||
/*
|
||||
Modification from frank tang's original work:
|
||||
. 0x00 is allowed as a legal character. Since some web pages contains this char
|
||||
in text stream.
|
||||
*/
|
||||
|
||||
static const uint32_t EUCJP_cls[256 / 8] = {
|
||||
// PCK4BITS(5,4,4,4,4,4,4,4), // 00 - 07
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 00 - 07
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 5, 5), // 08 - 0f
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 10 - 17
|
||||
PCK4BITS(4, 4, 4, 5, 4, 4, 4, 4), // 18 - 1f
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 20 - 27
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 28 - 2f
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 30 - 37
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 38 - 3f
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 40 - 47
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 48 - 4f
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 50 - 57
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 58 - 5f
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 60 - 67
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 68 - 6f
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 70 - 77
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 78 - 7f
|
||||
PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // 80 - 87
|
||||
PCK4BITS(5, 5, 5, 5, 5, 5, 1, 3), // 88 - 8f
|
||||
PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // 90 - 97
|
||||
PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // 98 - 9f
|
||||
PCK4BITS(5, 2, 2, 2, 2, 2, 2, 2), // a0 - a7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // e0 - e7
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // e8 - ef
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // f0 - f7
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 5) // f8 - ff
|
||||
};
|
||||
|
||||
static const uint32_t EUCJP_st[5] = {
|
||||
PCK4BITS(3, 4, 3, 5, eStart, eError, eError, eError), // 00-07
|
||||
PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe,
|
||||
eItsMe), // 08-0f
|
||||
PCK4BITS(eItsMe, eItsMe, eStart, eError, eStart, eError, eError,
|
||||
eError), // 10-17
|
||||
PCK4BITS(eError, eError, eStart, eError, eError, eError, 3,
|
||||
eError), // 18-1f
|
||||
PCK4BITS(3, eError, eError, eError, eStart, eStart, eStart,
|
||||
eStart) // 20-27
|
||||
};
|
||||
|
||||
static const uint32_t EUCJPCharLenTable[] = {2, 2, 2, 3, 1, 0};
|
||||
|
||||
const SMModel EUCJPSMModel = {
|
||||
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_cls},
|
||||
6,
|
||||
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_st},
|
||||
CHAR_LEN_TABLE(EUCJPCharLenTable),
|
||||
"EUC-JP",
|
||||
};
|
||||
|
||||
// sjis
|
||||
|
||||
static const uint32_t SJIS_cls[256 / 8] = {
|
||||
// PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 00 - 07
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 0, 0), // 08 - 0f
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 10 - 17
|
||||
PCK4BITS(1, 1, 1, 0, 1, 1, 1, 1), // 18 - 1f
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 20 - 27
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 28 - 2f
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 30 - 37
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 38 - 3f
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 40 - 47
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 48 - 4f
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 50 - 57
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 58 - 5f
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 60 - 67
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 68 - 6f
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 70 - 77
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 1), // 78 - 7f
|
||||
PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 80 - 87
|
||||
PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 88 - 8f
|
||||
PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 90 - 97
|
||||
PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 98 - 9f
|
||||
// 0xa0 is illegal in sjis encoding, but some pages does
|
||||
// contain such byte. We need to be more error forgiven.
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a0 - a7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df
|
||||
PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // e0 - e7
|
||||
PCK4BITS(3, 3, 3, 3, 3, 4, 4, 4), // e8 - ef
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // f0 - f7
|
||||
PCK4BITS(4, 4, 4, 4, 4, 0, 0, 0) // f8 - ff
|
||||
};
|
||||
|
||||
static const uint32_t SJIS_st[3] = {
|
||||
PCK4BITS(eError, eStart, eStart, 3, eError, eError, eError,
|
||||
eError), // 00-07
|
||||
PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe,
|
||||
eItsMe), // 08-0f
|
||||
PCK4BITS(eItsMe, eItsMe, eError, eError, eStart, eStart, eStart,
|
||||
eStart) // 10-17
|
||||
};
|
||||
|
||||
static const uint32_t SJISCharLenTable[] = {0, 1, 1, 2, 0, 0};
|
||||
|
||||
const SMModel SJISSMModel = {
|
||||
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_cls},
|
||||
6,
|
||||
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_st},
|
||||
CHAR_LEN_TABLE(SJISCharLenTable),
|
||||
"Shift_JIS",
|
||||
};
|
||||
|
||||
static const uint32_t UTF8_cls[256 / 8] = {
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 00 - 07
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 0, 0), // 08 - 0f
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 10 - 17
|
||||
PCK4BITS(1, 1, 1, 0, 1, 1, 1, 1), // 18 - 1f
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 20 - 27
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 28 - 2f
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 30 - 37
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 38 - 3f
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 40 - 47
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 48 - 4f
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 50 - 57
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 58 - 5f
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 60 - 67
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 68 - 6f
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 70 - 77
|
||||
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 78 - 7f
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 80 - 87
|
||||
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 88 - 8f
|
||||
PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 90 - 97
|
||||
PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 98 - 9f
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // a0 - a7
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // a8 - af
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // b0 - b7
|
||||
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // b8 - bf
|
||||
PCK4BITS(0, 0, 5, 5, 5, 5, 5, 5), // c0 - c7
|
||||
PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // c8 - cf
|
||||
PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // d0 - d7
|
||||
PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // d8 - df
|
||||
PCK4BITS(6, 7, 7, 7, 7, 7, 7, 7), // e0 - e7
|
||||
PCK4BITS(7, 7, 7, 7, 7, 8, 7, 7), // e8 - ef
|
||||
PCK4BITS(9, 10, 10, 10, 11, 0, 0, 0), // f0 - f7
|
||||
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0) // f8 - ff
|
||||
};
|
||||
|
||||
static const uint32_t UTF8_st[15] = {
|
||||
PCK4BITS(eError, eStart, eError, eError, eError, 3, 4, 5), // 00 - 07
|
||||
PCK4BITS(6, 7, 8, 9, eError, eError, eError, eError), // 08 - 0f
|
||||
PCK4BITS(eError, eError, eError, eError, eError, eError, eError,
|
||||
eError), // 10 - 17
|
||||
PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe,
|
||||
eItsMe), // 18 - 1f
|
||||
PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eError, eError, eStart,
|
||||
eStart), // 20 - 27
|
||||
PCK4BITS(eStart, eError, eError, eError, eError, eError, eError,
|
||||
eError), // 28 - 2f
|
||||
PCK4BITS(eError, eError, eError, eError, 3, eError, eError,
|
||||
eError), // 30 - 37
|
||||
PCK4BITS(eError, eError, eError, eError, eError, eError, 3, 3), // 38 - 3f
|
||||
PCK4BITS(3, eError, eError, eError, eError, eError, eError,
|
||||
eError), // 40 - 47
|
||||
PCK4BITS(eError, eError, 3, 3, eError, eError, eError, eError), // 48 - 4f
|
||||
PCK4BITS(eError, eError, eError, eError, eError, eError, 5, 5), // 50 - 57
|
||||
PCK4BITS(eError, eError, eError, eError, eError, eError, eError,
|
||||
eError), // 58 - 5f
|
||||
PCK4BITS(eError, eError, 5, 5, 5, eError, eError, eError), // 60 - 67
|
||||
PCK4BITS(eError, eError, eError, eError, eError, eError, 5,
|
||||
eError), // 68 - 6f
|
||||
PCK4BITS(eError, eError, eError, eError, eError, eError, eError,
|
||||
eError) // 70 - 77
|
||||
};
|
||||
|
||||
static const uint32_t UTF8CharLenTable[] = {0, 1, 0, 0, 0, 2, 3, 3, 3, 4, 4, 4};
|
||||
|
||||
const SMModel UTF8SMModel = {
|
||||
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_cls},
|
||||
12,
|
||||
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_st},
|
||||
CHAR_LEN_TABLE(UTF8CharLenTable),
|
||||
"UTF-8",
|
||||
};
|
|
@ -1,43 +0,0 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef nsPkgInt_h__
|
||||
#define nsPkgInt_h__
|
||||
#include "nscore.h"
|
||||
|
||||
typedef enum { eIdxSft4bits = 3, eIdxSft8bits = 2, eIdxSft16bits = 1 } nsIdxSft;
|
||||
|
||||
typedef enum { eSftMsk4bits = 7, eSftMsk8bits = 3, eSftMsk16bits = 1 } nsSftMsk;
|
||||
|
||||
typedef enum { eBitSft4bits = 2, eBitSft8bits = 3, eBitSft16bits = 4 } nsBitSft;
|
||||
|
||||
typedef enum {
|
||||
eUnitMsk4bits = 0x0000000FL,
|
||||
eUnitMsk8bits = 0x000000FFL,
|
||||
eUnitMsk16bits = 0x0000FFFFL
|
||||
} nsUnitMsk;
|
||||
|
||||
typedef struct nsPkgInt {
|
||||
nsIdxSft idxsft;
|
||||
nsSftMsk sftmsk;
|
||||
nsBitSft bitsft;
|
||||
nsUnitMsk unitmsk;
|
||||
const uint32_t* const data;
|
||||
} nsPkgInt;
|
||||
|
||||
#define PCK16BITS(a, b) ((uint32_t)(((b) << 16) | (a)))
|
||||
|
||||
#define PCK8BITS(a, b, c, d) \
|
||||
PCK16BITS(((uint32_t)(((b) << 8) | (a))), ((uint32_t)(((d) << 8) | (c))))
|
||||
|
||||
#define PCK4BITS(a, b, c, d, e, f, g, h) \
|
||||
PCK8BITS(((uint32_t)(((b) << 4) | (a))), ((uint32_t)(((d) << 4) | (c))), \
|
||||
((uint32_t)(((f) << 4) | (e))), ((uint32_t)(((h) << 4) | (g))))
|
||||
|
||||
#define GETFROMPCK(i, c) \
|
||||
(((((c).data)[(i) >> (c).idxsft]) >> (((i) & (c).sftmsk) << (c).bitsft)) & \
|
||||
(c).unitmsk)
|
||||
|
||||
#endif /* nsPkgInt_h__ */
|
|
@ -1,59 +0,0 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
// for S-JIS encoding, obeserve characteristic:
|
||||
// 1, kana character (or hankaku?) often have hight frequency of appereance
|
||||
// 2, kana character often exist in group
|
||||
// 3, certain combination of kana is never used in japanese language
|
||||
|
||||
#include "nsSJISProber.h"
|
||||
#include "nsDebug.h"
|
||||
|
||||
void nsSJISProber::Reset(void) {
|
||||
mCodingSM->Reset();
|
||||
mState = eDetecting;
|
||||
mContextAnalyser.Reset();
|
||||
mDistributionAnalyser.Reset();
|
||||
}
|
||||
|
||||
nsProbingState nsSJISProber::HandleData(const char* aBuf, uint32_t aLen) {
|
||||
NS_ASSERTION(aLen, "HandleData called with empty buffer");
|
||||
uint32_t codingState;
|
||||
|
||||
for (uint32_t i = 0; i < aLen; i++) {
|
||||
codingState = mCodingSM->NextState(aBuf[i]);
|
||||
if (codingState == eItsMe) {
|
||||
mState = eFoundIt;
|
||||
break;
|
||||
}
|
||||
if (codingState == eStart) {
|
||||
uint32_t charLen = mCodingSM->GetCurrentCharLen();
|
||||
if (i == 0) {
|
||||
mLastChar[1] = aBuf[0];
|
||||
mContextAnalyser.HandleOneChar(mLastChar + 2 - charLen, charLen);
|
||||
mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
|
||||
} else {
|
||||
mContextAnalyser.HandleOneChar(aBuf + i + 1 - charLen, charLen);
|
||||
mDistributionAnalyser.HandleOneChar(aBuf + i - 1, charLen);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mLastChar[0] = aBuf[aLen - 1];
|
||||
|
||||
if (mState == eDetecting)
|
||||
if (mContextAnalyser.GotEnoughData() &&
|
||||
GetConfidence() > SHORTCUT_THRESHOLD)
|
||||
mState = eFoundIt;
|
||||
|
||||
return mState;
|
||||
}
|
||||
|
||||
float nsSJISProber::GetConfidence(void) {
|
||||
float contxtCf = mContextAnalyser.GetConfidence();
|
||||
float distribCf = mDistributionAnalyser.GetConfidence();
|
||||
|
||||
return (contxtCf > distribCf ? contxtCf : distribCf);
|
||||
}
|
|
@ -1,42 +0,0 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
// for S-JIS encoding, obeserve characteristic:
|
||||
// 1, kana character (or hankaku?) often have hight frequency of appereance
|
||||
// 2, kana character often exist in group
|
||||
// 3, certain combination of kana is never used in japanese language
|
||||
|
||||
#ifndef nsSJISProber_h__
|
||||
#define nsSJISProber_h__
|
||||
|
||||
#include "nsCharSetProber.h"
|
||||
#include "nsCodingStateMachine.h"
|
||||
#include "JpCntx.h"
|
||||
#include "CharDistribution.h"
|
||||
|
||||
class nsSJISProber : public nsCharSetProber {
|
||||
public:
|
||||
nsSJISProber() {
|
||||
mCodingSM = new nsCodingStateMachine(&SJISSMModel);
|
||||
Reset();
|
||||
}
|
||||
virtual ~nsSJISProber(void) { delete mCodingSM; }
|
||||
nsProbingState HandleData(const char* aBuf, uint32_t aLen) override;
|
||||
const char* GetCharSetName() override { return "Shift_JIS"; }
|
||||
nsProbingState GetState(void) override { return mState; }
|
||||
void Reset(void) override;
|
||||
float GetConfidence(void) override;
|
||||
|
||||
protected:
|
||||
nsCodingStateMachine* mCodingSM;
|
||||
nsProbingState mState;
|
||||
|
||||
SJISContextAnalysis mContextAnalyser;
|
||||
SJISDistributionAnalysis mDistributionAnalyser;
|
||||
|
||||
char mLastChar[2];
|
||||
};
|
||||
|
||||
#endif /* nsSJISProber_h__ */
|
|
@ -1,43 +0,0 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nsUTF8Prober.h"
|
||||
|
||||
void nsUTF8Prober::Reset(void) {
|
||||
mCodingSM->Reset();
|
||||
mNumOfMBChar = 0;
|
||||
mState = eDetecting;
|
||||
}
|
||||
|
||||
nsProbingState nsUTF8Prober::HandleData(const char* aBuf, uint32_t aLen) {
|
||||
uint32_t codingState;
|
||||
|
||||
for (uint32_t i = 0; i < aLen; i++) {
|
||||
codingState = mCodingSM->NextState(aBuf[i]);
|
||||
if (codingState == eItsMe) {
|
||||
mState = eFoundIt;
|
||||
break;
|
||||
}
|
||||
if (codingState == eStart) {
|
||||
if (mCodingSM->GetCurrentCharLen() >= 2) mNumOfMBChar++;
|
||||
}
|
||||
}
|
||||
|
||||
if (mState == eDetecting)
|
||||
if (GetConfidence() > SHORTCUT_THRESHOLD) mState = eFoundIt;
|
||||
return mState;
|
||||
}
|
||||
|
||||
#define ONE_CHAR_PROB (float)0.50
|
||||
|
||||
float nsUTF8Prober::GetConfidence(void) {
|
||||
float unlike = (float)0.99;
|
||||
|
||||
if (mNumOfMBChar < 6) {
|
||||
for (uint32_t i = 0; i < mNumOfMBChar; i++) unlike *= ONE_CHAR_PROB;
|
||||
return (float)1.0 - unlike;
|
||||
} else
|
||||
return (float)0.99;
|
||||
}
|
|
@ -1,32 +0,0 @@
|
|||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef nsUTF8Prober_h__
|
||||
#define nsUTF8Prober_h__
|
||||
|
||||
#include "nsCharSetProber.h"
|
||||
#include "nsCodingStateMachine.h"
|
||||
|
||||
class nsUTF8Prober : public nsCharSetProber {
|
||||
public:
|
||||
nsUTF8Prober() {
|
||||
mNumOfMBChar = 0;
|
||||
mCodingSM = new nsCodingStateMachine(&UTF8SMModel);
|
||||
Reset();
|
||||
}
|
||||
virtual ~nsUTF8Prober() { delete mCodingSM; }
|
||||
nsProbingState HandleData(const char* aBuf, uint32_t aLen) override;
|
||||
const char* GetCharSetName() override { return "UTF-8"; }
|
||||
nsProbingState GetState(void) override { return mState; }
|
||||
void Reset(void) override;
|
||||
float GetConfidence(void) override;
|
||||
|
||||
protected:
|
||||
nsCodingStateMachine* mCodingSM;
|
||||
nsProbingState mState;
|
||||
uint32_t mNumOfMBChar;
|
||||
};
|
||||
|
||||
#endif /* nsUTF8Prober_h__ */
|
|
@ -1,179 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nscore.h"
|
||||
|
||||
#include "nsUniversalDetector.h"
|
||||
|
||||
#include "nsMBCSGroupProber.h"
|
||||
#include "nsEscCharsetProber.h"
|
||||
|
||||
nsUniversalDetector::nsUniversalDetector() {
|
||||
mDone = false;
|
||||
mBestGuess = -1; // illegal value as signal
|
||||
mInTag = false;
|
||||
mMultibyteProber = nullptr;
|
||||
mEscCharSetProber = nullptr;
|
||||
|
||||
mStart = true;
|
||||
mDetectedCharset = nullptr;
|
||||
mGotData = false;
|
||||
mInputState = ePureAscii;
|
||||
mLastChar = '\0';
|
||||
}
|
||||
|
||||
nsUniversalDetector::~nsUniversalDetector() {
|
||||
delete mMultibyteProber;
|
||||
delete mEscCharSetProber;
|
||||
}
|
||||
|
||||
void nsUniversalDetector::Reset() {
|
||||
mDone = false;
|
||||
mBestGuess = -1; // illegal value as signal
|
||||
mInTag = false;
|
||||
|
||||
mStart = true;
|
||||
mDetectedCharset = nullptr;
|
||||
mGotData = false;
|
||||
mInputState = ePureAscii;
|
||||
mLastChar = '\0';
|
||||
|
||||
if (mMultibyteProber) {
|
||||
mMultibyteProber->Reset();
|
||||
}
|
||||
|
||||
if (mEscCharSetProber) {
|
||||
mEscCharSetProber->Reset();
|
||||
}
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
#define SHORTCUT_THRESHOLD (float)0.95
|
||||
#define MINIMUM_THRESHOLD (float)0.20
|
||||
|
||||
nsresult nsUniversalDetector::HandleData(const char* aBuf, uint32_t aLen) {
|
||||
if (mDone) return NS_OK;
|
||||
|
||||
if (aLen > 0) mGotData = true;
|
||||
|
||||
// If the data starts with BOM, we know it is UTF
|
||||
if (mStart) {
|
||||
mStart = false;
|
||||
if (aLen >= 2) {
|
||||
switch (aBuf[0]) {
|
||||
case '\xEF':
|
||||
if ((aLen > 2) && ('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2])) {
|
||||
// EF BB BF UTF-8 encoded BOM
|
||||
mDetectedCharset = "UTF-8";
|
||||
}
|
||||
break;
|
||||
case '\xFE':
|
||||
if ('\xFF' == aBuf[1]) {
|
||||
// FE FF UTF-16, big endian BOM
|
||||
mDetectedCharset = "UTF-16BE";
|
||||
}
|
||||
break;
|
||||
case '\xFF':
|
||||
if ('\xFE' == aBuf[1]) {
|
||||
// FF FE UTF-16, little endian BOM
|
||||
mDetectedCharset = "UTF-16LE";
|
||||
}
|
||||
break;
|
||||
} // switch
|
||||
}
|
||||
|
||||
if (mDetectedCharset) {
|
||||
mDone = true;
|
||||
return NS_OK;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t i;
|
||||
for (i = 0; i < aLen; i++) {
|
||||
// other than 0xa0, if every othe character is ascii, the page is ascii
|
||||
if (aBuf[i] & '\x80' &&
|
||||
aBuf[i] != '\xA0') // Since many Ascii only page contains NBSP
|
||||
{
|
||||
// we got a non-ascii byte (high-byte)
|
||||
if (mInputState != eHighbyte) {
|
||||
// adjust state
|
||||
mInputState = eHighbyte;
|
||||
|
||||
// kill mEscCharSetProber if it is active
|
||||
if (mEscCharSetProber) {
|
||||
delete mEscCharSetProber;
|
||||
mEscCharSetProber = nullptr;
|
||||
}
|
||||
|
||||
// start multibyte charset prober
|
||||
if (!mMultibyteProber) {
|
||||
mMultibyteProber = new nsMBCSGroupProber();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// ok, just pure ascii so far
|
||||
if ((ePureAscii == mInputState) && (aBuf[i] == '\033')) {
|
||||
// found escape character
|
||||
mInputState = eEscAscii;
|
||||
}
|
||||
mLastChar = aBuf[i];
|
||||
}
|
||||
}
|
||||
|
||||
nsProbingState st;
|
||||
switch (mInputState) {
|
||||
case eEscAscii:
|
||||
if (nullptr == mEscCharSetProber) {
|
||||
mEscCharSetProber = new nsEscCharSetProber();
|
||||
if (nullptr == mEscCharSetProber) return NS_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
st = mEscCharSetProber->HandleData(aBuf, aLen);
|
||||
if (st == eFoundIt) {
|
||||
mDone = true;
|
||||
mDetectedCharset = mEscCharSetProber->GetCharSetName();
|
||||
}
|
||||
break;
|
||||
case eHighbyte:
|
||||
st = mMultibyteProber->HandleData(aBuf, aLen);
|
||||
if (st == eFoundIt) {
|
||||
mDone = true;
|
||||
mDetectedCharset = mMultibyteProber->GetCharSetName();
|
||||
return NS_OK;
|
||||
}
|
||||
break;
|
||||
|
||||
default: // pure ascii
|
||||
; // do nothing here
|
||||
}
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
void nsUniversalDetector::DataEnd() {
|
||||
if (!mGotData) {
|
||||
// we haven't got any data yet, return immediately
|
||||
// caller program sometimes call DataEnd before anything has been sent to
|
||||
// detector
|
||||
return;
|
||||
}
|
||||
|
||||
if (mDetectedCharset) {
|
||||
mDone = true;
|
||||
Report(mDetectedCharset);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (mInputState) {
|
||||
case eHighbyte: {
|
||||
// do not report anything because we are not confident of it, that's in
|
||||
// fact a negative answer
|
||||
if (mMultibyteProber->GetConfidence() > MINIMUM_THRESHOLD)
|
||||
Report(mMultibyteProber->GetCharSetName());
|
||||
} break;
|
||||
case eEscAscii:
|
||||
break;
|
||||
default:;
|
||||
}
|
||||
}
|
|
@ -1,37 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef nsUniversalDetector_h__
|
||||
#define nsUniversalDetector_h__
|
||||
|
||||
class nsCharSetProber;
|
||||
|
||||
typedef enum { ePureAscii = 0, eEscAscii = 1, eHighbyte = 2 } nsInputState;
|
||||
|
||||
class nsUniversalDetector {
|
||||
public:
|
||||
nsUniversalDetector();
|
||||
virtual ~nsUniversalDetector();
|
||||
virtual nsresult HandleData(const char* aBuf, uint32_t aLen);
|
||||
virtual void DataEnd(void);
|
||||
|
||||
protected:
|
||||
virtual void Report(const char* aCharset) = 0;
|
||||
virtual void Reset();
|
||||
nsInputState mInputState;
|
||||
bool mDone;
|
||||
bool mInTag;
|
||||
bool mStart;
|
||||
bool mGotData;
|
||||
char mLastChar;
|
||||
const char* mDetectedCharset;
|
||||
int32_t mBestGuess;
|
||||
uint32_t mLanguageFilter;
|
||||
|
||||
nsCharSetProber* mMultibyteProber;
|
||||
nsCharSetProber* mEscCharSetProber;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -1,8 +0,0 @@
|
|||
# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
|
||||
# vim: set filetype=python:
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
DIRS += ['base', 'xpcom']
|
||||
|
|
@ -1,15 +0,0 @@
|
|||
# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
|
||||
# vim: set filetype=python:
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
UNIFIED_SOURCES += [
|
||||
'nsUdetXPCOMWrapper.cpp',
|
||||
]
|
||||
|
||||
FINAL_LIBRARY = 'xul'
|
||||
|
||||
LOCAL_INCLUDES += [
|
||||
'../base',
|
||||
]
|
|
@ -1,75 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nscore.h"
|
||||
|
||||
#include "nsUniversalDetector.h"
|
||||
#include "nsUdetXPCOMWrapper.h"
|
||||
#include "nsCharSetProber.h" // for DumpStatus
|
||||
|
||||
#include "nsUniversalCharDetDll.h"
|
||||
//---- for XPCOM
|
||||
#include "nsIFactory.h"
|
||||
#include "nsISupports.h"
|
||||
#include "nsCOMPtr.h"
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
nsXPCOMDetector::nsXPCOMDetector() : nsUniversalDetector() {}
|
||||
//---------------------------------------------------------------------
|
||||
nsXPCOMDetector::~nsXPCOMDetector() {}
|
||||
//---------------------------------------------------------------------
|
||||
|
||||
NS_IMPL_ISUPPORTS(nsXPCOMDetector, nsICharsetDetector)
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
NS_IMETHODIMP nsXPCOMDetector::Init(nsICharsetDetectionObserver* aObserver) {
|
||||
NS_ASSERTION(mObserver == nullptr, "Init twice");
|
||||
if (nullptr == aObserver) return NS_ERROR_ILLEGAL_VALUE;
|
||||
|
||||
mObserver = aObserver;
|
||||
return NS_OK;
|
||||
}
|
||||
//----------------------------------------------------------
|
||||
NS_IMETHODIMP nsXPCOMDetector::DoIt(const char* aBuf, uint32_t aLen,
|
||||
bool* oDontFeedMe) {
|
||||
NS_ASSERTION(mObserver != nullptr, "have not init yet");
|
||||
|
||||
if ((nullptr == aBuf) || (nullptr == oDontFeedMe))
|
||||
return NS_ERROR_ILLEGAL_VALUE;
|
||||
|
||||
this->Reset();
|
||||
nsresult rv = this->HandleData(aBuf, aLen);
|
||||
if (NS_FAILED(rv)) return rv;
|
||||
|
||||
if (mDone) {
|
||||
if (mDetectedCharset) Report(mDetectedCharset);
|
||||
|
||||
*oDontFeedMe = true;
|
||||
}
|
||||
*oDontFeedMe = false;
|
||||
return NS_OK;
|
||||
}
|
||||
//----------------------------------------------------------
|
||||
NS_IMETHODIMP nsXPCOMDetector::Done() {
|
||||
NS_ASSERTION(mObserver != nullptr, "have not init yet");
|
||||
#ifdef DEBUG_chardet
|
||||
for (int32_t i = 0; i < NUM_OF_CHARSET_PROBERS; i++) {
|
||||
// If no data was received the array might stay filled with nulls
|
||||
// the way it was initialized in the constructor.
|
||||
if (mCharSetProbers[i]) mCharSetProbers[i]->DumpStatus();
|
||||
}
|
||||
#endif
|
||||
|
||||
this->DataEnd();
|
||||
return NS_OK;
|
||||
}
|
||||
//----------------------------------------------------------
|
||||
void nsXPCOMDetector::Report(const char* aCharset) {
|
||||
NS_ASSERTION(mObserver != nullptr, "have not init yet");
|
||||
#ifdef DEBUG_chardet
|
||||
printf("Universal Charset Detector report charset %s . \r\n", aCharset);
|
||||
#endif
|
||||
mObserver->Notify(aCharset, eBestAnswer);
|
||||
}
|
|
@ -1,40 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef _nsUdetXPCOMWrapper_h__
|
||||
#define _nsUdetXPCOMWrapper_h__
|
||||
#include "nsISupports.h"
|
||||
#include "nsICharsetDetector.h"
|
||||
#include "nsIStringCharsetDetector.h"
|
||||
#include "nsICharsetDetectionObserver.h"
|
||||
#include "nsCOMPtr.h"
|
||||
#include "nsIFactory.h"
|
||||
#include "nsUniversalDetector.h"
|
||||
|
||||
//=====================================================================
|
||||
class nsXPCOMDetector : public nsUniversalDetector, public nsICharsetDetector {
|
||||
NS_DECL_ISUPPORTS
|
||||
public:
|
||||
nsXPCOMDetector();
|
||||
NS_IMETHOD Init(nsICharsetDetectionObserver* aObserver) override;
|
||||
NS_IMETHOD DoIt(const char* aBuf, uint32_t aLen, bool* oDontFeedMe) override;
|
||||
NS_IMETHOD Done() override;
|
||||
|
||||
protected:
|
||||
virtual ~nsXPCOMDetector();
|
||||
virtual void Report(const char* aCharset) override;
|
||||
|
||||
private:
|
||||
nsCOMPtr<nsICharsetDetectionObserver> mObserver;
|
||||
};
|
||||
|
||||
//=====================================================================
|
||||
|
||||
class nsJAPSMDetector final : public nsXPCOMDetector {
|
||||
public:
|
||||
nsJAPSMDetector() : nsXPCOMDetector() {}
|
||||
};
|
||||
|
||||
#endif //_nsUdetXPCOMWrapper_h__
|
|
@ -1,11 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef nsCharDetDll_h__
|
||||
#define nsCharDetDll_h__
|
||||
|
||||
#include "prtypes.h"
|
||||
|
||||
#endif /* nsCharDetDll_h__ */
|
|
@ -252,6 +252,14 @@ class Encoding final {
|
|||
return encoding_is_ascii_compatible(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether this is a Japanese legacy encoding.
|
||||
*/
|
||||
inline bool IsJapaneseLegacy() const {
|
||||
return this == SHIFT_JIS_ENCODING || this == EUC_JP_ENCODING ||
|
||||
this == ISO_2022_JP_ENCODING;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the _output encoding_ of this encoding. This is UTF-8 for
|
||||
* UTF-16BE, UTF-16LE and replacement and the encoding itself otherwise.
|
||||
|
|
|
@ -0,0 +1,124 @@
|
|||
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
// Mostly copied and pasted from
|
||||
// third_party/rust/shift_or_euc/src/lib.rs , so
|
||||
// "top-level directory of this distribution" above refers to
|
||||
// third_party/rust/shift_or_euc/
|
||||
|
||||
#ifndef mozilla_JapaneseDetector_h
|
||||
#define mozilla_JapaneseDetector_h
|
||||
|
||||
#include "mozilla/Encoding.h"
|
||||
|
||||
namespace mozilla {
|
||||
class JapaneseDetector;
|
||||
}; // namespace mozilla
|
||||
|
||||
#define SHIFT_OR_EUC_DETECTOR mozilla::JapaneseDetector
|
||||
|
||||
#include "shift_or_euc.h"
|
||||
|
||||
namespace mozilla {
|
||||
|
||||
/**
|
||||
* A Japanese legacy encoding detector for detecting between Shift_JIS,
|
||||
* EUC-JP, and, optionally, ISO-2022-JP _given_ the assumption that the
|
||||
* encoding is one of those.
|
||||
*
|
||||
* # Principle of Operation
|
||||
*
|
||||
* The detector is based on two observations:
|
||||
*
|
||||
* 1. The ISO-2022-JP escape sequences don't normally occur in Shift_JIS or
|
||||
* EUC-JP, so encountering such an escape sequence (before non-ASCII has been
|
||||
* encountered) can be taken as indication of ISO-2022-JP.
|
||||
* 2. When normal (full-with) kana or common kanji encoded as Shift_JIS is
|
||||
* decoded as EUC-JP, or vice versa, the result is either an error or
|
||||
* half-width katakana, and it's very uncommon for Japanese HTML to have
|
||||
* half-width katakana character before a normal kana or common kanji
|
||||
* character. Therefore, if decoding as Shift_JIS results in error or
|
||||
* have-width katakana, the detector decides that the content is EUC-JP, and
|
||||
* vice versa.
|
||||
*
|
||||
* # Failure Modes
|
||||
*
|
||||
* The detector gives the wrong answer if the text has a half-width katakana
|
||||
* character before normal kana or common kanji. Some uncommon kanji are
|
||||
* undecidable. (All JIS X 0208 Level 1 kanji are decidable.)
|
||||
*
|
||||
* The half-width katakana issue is mainly relevant for old 8-bit JIS X
|
||||
* 0201-only text files that would decode correctly as Shift_JIS but that the
|
||||
* detector detects as EUC-JP.
|
||||
*
|
||||
* The undecidable kanji issue does not realistically show up when a full
|
||||
* document is fed to the detector, because, realistically, in a full
|
||||
* document, there is at least one kana or common kanji. It can occur,
|
||||
* though, if the detector is only run on a prefix of a document and the
|
||||
* prefix only contains the title of the document. It is possible for
|
||||
* document title to consist entirely of undecidable kanji. (Indeed,
|
||||
* Japanese Wikipedia has articles with such titles.) If the detector is
|
||||
* undecided, a fallback to Shift_JIS should be used.
|
||||
*/
|
||||
class JapaneseDetector final {
|
||||
public:
|
||||
~JapaneseDetector() {}
|
||||
|
||||
static void operator delete(void* aDetector) {
|
||||
shift_or_euc_detector_free(reinterpret_cast<JapaneseDetector*>(aDetector));
|
||||
}
|
||||
|
||||
/**
|
||||
* Instantiates the detector. If `aAllow2022` is `true` the possible
|
||||
* guesses are Shift_JIS, EUC-JP, ISO-2022-JP, and undecided. If
|
||||
* `aAllow2022` is `false`, the possible guesses are Shift_JIS, EUC-JP,
|
||||
* and undecided.
|
||||
*/
|
||||
static inline UniquePtr<JapaneseDetector> Create(bool aAllow2022) {
|
||||
UniquePtr<JapaneseDetector> detector(shift_or_euc_detector_new(aAllow2022));
|
||||
return detector;
|
||||
}
|
||||
|
||||
/**
|
||||
* Feeds bytes to the detector. If `aLast` is `true` the end of the stream
|
||||
* is considered to occur immediately after the end of `aBuffer`.
|
||||
* Otherwise, the stream is expected to continue. `aBuffer` may be empty.
|
||||
*
|
||||
* If you're running the detector only on a prefix of a complete
|
||||
* document, _do not_ pass `aLast` as `true` after the prefix if the
|
||||
* stream as a whole still contains more content.
|
||||
*
|
||||
* Returns `SHIFT_JIS_ENCODING` if the detector guessed
|
||||
* Shift_JIS. Returns `EUC_JP_ENCODING` if the detector
|
||||
* guessed EUC-JP. Returns `ISO_2022_JP_ENCODING` if the
|
||||
* detector guessed ISO-2022-JP (only possible if `true` was passed as
|
||||
* `aAllow2022` when instantiating the detector). Returns `nullptr` if the
|
||||
* detector is undecided. If `nullptr` is returned even when passing `true`
|
||||
* as `aLast`, falling back to Shift_JIS is the best guess for Web
|
||||
* purposes.
|
||||
*
|
||||
* Do not call again after the method has returned non-`nullptr` or after
|
||||
* the method has been called with `true` as `aLast`. (Asserts if the
|
||||
* previous sentence isn't adhered to.)
|
||||
*/
|
||||
inline const mozilla::Encoding* Feed(Span<const uint8_t> aBuffer,
|
||||
bool aLast) {
|
||||
return shift_or_euc_detector_feed(this, aBuffer.Elements(),
|
||||
aBuffer.Length(), aLast);
|
||||
}
|
||||
|
||||
private:
|
||||
JapaneseDetector() = delete;
|
||||
JapaneseDetector(const JapaneseDetector&) = delete;
|
||||
JapaneseDetector& operator=(const JapaneseDetector&) = delete;
|
||||
};
|
||||
|
||||
}; // namespace mozilla
|
||||
|
||||
#endif // mozilla_JapaneseDetector_h
|
|
@ -26,11 +26,13 @@ DIRS += [
|
|||
|
||||
EXPORTS.mozilla += [
|
||||
'Encoding.h',
|
||||
'JapaneseDetector.h',
|
||||
]
|
||||
|
||||
EXPORTS += [
|
||||
'../third_party/rust/encoding_c/include/encoding_rs.h',
|
||||
'../third_party/rust/encoding_c/include/encoding_rs_statics.h',
|
||||
'../third_party/rust/shift_or_euc_c/include/shift_or_euc.h',
|
||||
]
|
||||
|
||||
with Files("**"):
|
||||
|
|
|
@ -3373,6 +3373,15 @@ VARCACHE_PREF(
|
|||
RelaxedAtomicBool, false
|
||||
)
|
||||
|
||||
// Whether ISO-2022-JP is a permitted content-based encoding detection
|
||||
// outcome.
|
||||
VARCACHE_PREF(
|
||||
Live,
|
||||
"intl.charset.detector.iso2022jp.allowed",
|
||||
intl_charset_detector_iso2022jp_allowed,
|
||||
bool, true
|
||||
)
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
// Prefs starting with "layers."
|
||||
//---------------------------------------------------------------------------
|
||||
|
|
|
@ -95,8 +95,6 @@ FINAL_LIBRARY = 'xul'
|
|||
|
||||
LOCAL_INCLUDES += [
|
||||
'/dom/base',
|
||||
'/extensions/universalchardet/src/base',
|
||||
'/extensions/universalchardet/src/xpcom',
|
||||
'/intl/chardet',
|
||||
]
|
||||
|
||||
|
|
|
@ -31,7 +31,6 @@
|
|||
#include "nsIThreadRetargetableRequest.h"
|
||||
#include "nsPrintfCString.h"
|
||||
#include "nsNetUtil.h"
|
||||
#include "nsUdetXPCOMWrapper.h"
|
||||
#include "nsXULAppAPI.h"
|
||||
#include "mozilla/SchedulerGroup.h"
|
||||
#include "nsJSEnvironment.h"
|
||||
|
@ -156,7 +155,7 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
|
|||
mBomState(eBomState::BOM_SNIFFING_NOT_STARTED),
|
||||
mCharsetSource(kCharsetUninitialized),
|
||||
mEncoding(WINDOWS_1252_ENCODING),
|
||||
mFeedChardetIfEncoding(nullptr),
|
||||
mFeedChardet(true),
|
||||
mReparseForbidden(false),
|
||||
mLastBuffer(nullptr), // Will be filled when starting
|
||||
mExecutor(aExecutor),
|
||||
|
@ -181,6 +180,8 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
|
|||
mEventTarget(nsHtml5Module::GetStreamParserThread()->SerialEventTarget()),
|
||||
mExecutorFlusher(new nsHtml5ExecutorFlusher(aExecutor)),
|
||||
mLoadFlusher(new nsHtml5LoadFlusher(aExecutor)),
|
||||
mJapaneseDetector(mozilla::JapaneseDetector::Create(
|
||||
StaticPrefs::intl_charset_detector_iso2022jp_allowed())),
|
||||
mInitialEncodingWasFromParentFrame(false),
|
||||
mHasHadErrors(false),
|
||||
mDecodingLocalFileAsUTF8(false),
|
||||
|
@ -210,16 +211,11 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
|
|||
nsAutoCString detectorName;
|
||||
Preferences::GetLocalizedCString("intl.charset.detector", detectorName);
|
||||
if (!detectorName.IsEmpty()) {
|
||||
// We recognize one of the three magic strings for the following languages.
|
||||
// We recognize one of the two magic strings for Russian and Ukranian.
|
||||
if (detectorName.EqualsLiteral("ruprob")) {
|
||||
mChardet = new nsRUProbDetector();
|
||||
mFeedChardetIfEncoding = WINDOWS_1251_ENCODING;
|
||||
} else if (detectorName.EqualsLiteral("ukprob")) {
|
||||
mChardet = new nsUKProbDetector();
|
||||
mFeedChardetIfEncoding = WINDOWS_1251_ENCODING;
|
||||
} else if (detectorName.EqualsLiteral("ja_parallel_state_machine")) {
|
||||
mChardet = new nsJAPSMDetector();
|
||||
mFeedChardetIfEncoding = SHIFT_JIS_ENCODING;
|
||||
}
|
||||
if (mChardet) {
|
||||
(void)mChardet->Init(this);
|
||||
|
@ -263,7 +259,7 @@ NS_IMETHODIMP
|
|||
nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf) {
|
||||
NS_ASSERTION(IsParserThread(), "Wrong thread!");
|
||||
if (aConf == eBestAnswer || aConf == eSureAnswer) {
|
||||
mFeedChardetIfEncoding = nullptr; // just in case
|
||||
mFeedChardet = false; // just in case
|
||||
auto encoding =
|
||||
Encoding::ForLabelNoReplacement(nsDependentCString(aCharset));
|
||||
if (!encoding) {
|
||||
|
@ -271,8 +267,8 @@ nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf) {
|
|||
}
|
||||
if (HasDecoder()) {
|
||||
if (mEncoding == encoding) {
|
||||
NS_ASSERTION(mCharsetSource < kCharsetFromAutoDetection,
|
||||
"Why are we running chardet at all?");
|
||||
MOZ_ASSERT(mCharsetSource < kCharsetFromAutoDetection,
|
||||
"Why are we running chardet at all?");
|
||||
mCharsetSource = kCharsetFromAutoDetection;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
} else {
|
||||
|
@ -294,6 +290,62 @@ nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf) {
|
|||
return NS_OK;
|
||||
}
|
||||
|
||||
void nsHtml5StreamParser::FeedJapaneseDetector(Span<const uint8_t> aBuffer,
|
||||
bool aLast) {
|
||||
const Encoding* detected = mJapaneseDetector->Feed(aBuffer, aLast);
|
||||
if (!detected) {
|
||||
return;
|
||||
}
|
||||
mFeedChardet = false;
|
||||
if (mDecodingLocalFileAsUTF8 && detected != ISO_2022_JP_ENCODING) {
|
||||
return;
|
||||
}
|
||||
int32_t source = kCharsetFromAutoDetection;
|
||||
if (mCharsetSource == kCharsetFromParentForced ||
|
||||
mCharsetSource == kCharsetFromUserForced) {
|
||||
source = kCharsetFromUserForcedAutoDetection;
|
||||
}
|
||||
if (detected == mEncoding) {
|
||||
MOZ_ASSERT(mCharsetSource < source, "Why are we running chardet at all?");
|
||||
mCharsetSource = source;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
} else if (HasDecoder()) {
|
||||
// We've already committed to a decoder. Request a reload from the
|
||||
// docshell.
|
||||
mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(detected), source, 0);
|
||||
FlushTreeOpsAndDisarmTimer();
|
||||
Interrupt();
|
||||
} else {
|
||||
// Got a confident answer from the sniffing buffer. That code will
|
||||
// take care of setting up the decoder.
|
||||
mEncoding = WrapNotNull(detected);
|
||||
mCharsetSource = source;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
}
|
||||
}
|
||||
|
||||
void nsHtml5StreamParser::FeedDetector(Span<const uint8_t> aBuffer,
|
||||
bool aLast) {
|
||||
if (mEncoding->IsJapaneseLegacy()) {
|
||||
FeedJapaneseDetector(aBuffer, aLast);
|
||||
} else if (mEncoding == WINDOWS_1251_ENCODING && mChardet &&
|
||||
!mDecodingLocalFileAsUTF8) {
|
||||
if (!aBuffer.IsEmpty()) {
|
||||
bool dontFeed = false;
|
||||
mozilla::Unused << mChardet->DoIt((const char*)aBuffer.Elements(),
|
||||
aBuffer.Length(), &dontFeed);
|
||||
if (dontFeed) {
|
||||
mFeedChardet = false;
|
||||
}
|
||||
}
|
||||
if (aLast) {
|
||||
mozilla::Unused << mChardet->Done();
|
||||
}
|
||||
} else {
|
||||
mFeedChardet = false;
|
||||
}
|
||||
}
|
||||
|
||||
void nsHtml5StreamParser::SetViewSourceTitle(nsIURI* aURL) {
|
||||
if (recordreplay::IsRecordingOrReplaying()) {
|
||||
nsAutoCString spec;
|
||||
|
@ -335,6 +387,11 @@ nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
|
|||
MOZ_ASSERT(mEncoding != UTF_8_ENCODING);
|
||||
mUnicodeDecoder = UTF_8_ENCODING->NewDecoderWithBOMRemoval();
|
||||
} else {
|
||||
if (mCharsetSource >= kCharsetFromAutoDetection &&
|
||||
!(mCharsetSource == kCharsetFromUserForced ||
|
||||
mCharsetSource == kCharsetFromParentForced)) {
|
||||
mFeedChardet = false;
|
||||
}
|
||||
mDecodingLocalFileAsUTF8 = false;
|
||||
mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
|
||||
}
|
||||
|
@ -354,7 +411,7 @@ nsresult nsHtml5StreamParser::SetupDecodingFromBom(
|
|||
mDecodingLocalFileAsUTF8 = false;
|
||||
mUnicodeDecoder = mEncoding->NewDecoderWithoutBOMHandling();
|
||||
mCharsetSource = kCharsetFromByteOrderMark;
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
mFeedChardet = false;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
mSniffingBuffer = nullptr;
|
||||
mMetaScanner = nullptr;
|
||||
|
@ -412,7 +469,7 @@ void nsHtml5StreamParser::SniffBOMlessUTF16BasicLatin(
|
|||
}
|
||||
mCharsetSource = kCharsetFromIrreversibleAutoDetection;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
mFeedChardet = false;
|
||||
mTreeBuilder->MaybeComplainAboutCharset("EncBomlessUtf16", true, 0);
|
||||
}
|
||||
|
||||
|
@ -479,12 +536,38 @@ static void HandleProcessingInstruction(void* aUserData,
|
|||
XML_StopParser(ud->mExpat, false);
|
||||
}
|
||||
|
||||
void nsHtml5StreamParser::FinalizeSniffingWithDetector(
|
||||
Span<const uint8_t> aFromSegment, uint32_t aCountToSniffingLimit,
|
||||
bool aEof) {
|
||||
if (mSniffingBuffer) {
|
||||
FeedDetector(MakeSpan(mSniffingBuffer.get(), mSniffingLength), false);
|
||||
}
|
||||
if (mFeedChardet && !aFromSegment.IsEmpty()) {
|
||||
// Avoid buffer boundary-dependent behavior when
|
||||
// reparsing is forbidden. If reparse is forbidden,
|
||||
// act as if we only saw the first 1024 bytes.
|
||||
// When reparsing isn't forbidden, buffer boundaries
|
||||
// can have an effect on whether the page is loaded
|
||||
// once or twice. :-(
|
||||
FeedDetector(mReparseForbidden ? aFromSegment.To(aCountToSniffingLimit)
|
||||
: aFromSegment,
|
||||
false);
|
||||
}
|
||||
if (mFeedChardet && aEof &&
|
||||
(!mReparseForbidden || aCountToSniffingLimit == aFromSegment.Length())) {
|
||||
// Don't signal EOF if reparse is forbidden and we didn't pass all input
|
||||
// to the detector above.
|
||||
mFeedChardet = false;
|
||||
FeedDetector(Span<const uint8_t>(), true);
|
||||
}
|
||||
}
|
||||
|
||||
nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
|
||||
uint32_t aCountToSniffingLimit,
|
||||
bool aEof) {
|
||||
NS_ASSERTION(IsParserThread(), "Wrong thread!");
|
||||
NS_ASSERTION(mCharsetSource < kCharsetFromParentForced,
|
||||
"Should not finalize sniffing when using forced charset.");
|
||||
MOZ_ASSERT(IsParserThread(), "Wrong thread!");
|
||||
MOZ_ASSERT(mCharsetSource < kCharsetFromUserForcedAutoDetection,
|
||||
"Should not finalize sniffing with strong decision already made.");
|
||||
if (mMode == VIEW_SOURCE_XML) {
|
||||
static const XML_Memory_Handling_Suite memsuite = {
|
||||
(void* (*)(size_t))moz_xmalloc, (void* (*)(void*, size_t))moz_xrealloc,
|
||||
|
@ -547,50 +630,15 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
|
|||
}
|
||||
|
||||
// meta scan failed.
|
||||
if (mCharsetSource >= kCharsetFromHintPrevDoc) {
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
|
||||
if (mCharsetSource < kCharsetFromMetaPrescan) {
|
||||
// Check for BOMless UTF-16 with Basic
|
||||
// Latin content for compat with IE. See bug 631751.
|
||||
SniffBOMlessUTF16BasicLatin(aFromSegment.To(aCountToSniffingLimit));
|
||||
}
|
||||
// Check for BOMless UTF-16 with Basic
|
||||
// Latin content for compat with IE. See bug 631751.
|
||||
SniffBOMlessUTF16BasicLatin(aFromSegment.To(aCountToSniffingLimit));
|
||||
// the charset may have been set now
|
||||
// maybe try chardet now;
|
||||
if ((mFeedChardetIfEncoding == mEncoding) && !mDecodingLocalFileAsUTF8) {
|
||||
bool dontFeed;
|
||||
nsresult rv;
|
||||
if (mSniffingBuffer) {
|
||||
rv = mChardet->DoIt((const char*)mSniffingBuffer.get(), mSniffingLength,
|
||||
&dontFeed);
|
||||
if (dontFeed) {
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
}
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
}
|
||||
if ((mFeedChardetIfEncoding == mEncoding) && !aFromSegment.IsEmpty()) {
|
||||
rv = mChardet->DoIt(
|
||||
(const char*)aFromSegment.Elements(),
|
||||
// Avoid buffer boundary-dependent behavior when
|
||||
// reparsing is forbidden. If reparse is forbidden,
|
||||
// act as if we only saw the first 1024 bytes.
|
||||
// When reparsing isn't forbidden, buffer boundaries
|
||||
// can have an effect on whether the page is loaded
|
||||
// once or twice. :-(
|
||||
mReparseForbidden ? aCountToSniffingLimit : aFromSegment.Length(),
|
||||
&dontFeed);
|
||||
if (dontFeed) {
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
}
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
}
|
||||
if ((mFeedChardetIfEncoding == mEncoding) && (aEof || mReparseForbidden)) {
|
||||
// mReparseForbidden is checked so that we get to use the sniffing
|
||||
// buffer with the best guess so far if we aren't allowed to guess
|
||||
// better later.
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
rv = mChardet->Done();
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
}
|
||||
if (mFeedChardet) {
|
||||
FinalizeSniffingWithDetector(aFromSegment, aCountToSniffingLimit, aEof);
|
||||
// fall thru; callback may have changed charset
|
||||
}
|
||||
if (mCharsetSource == kCharsetUninitialized) {
|
||||
|
@ -600,7 +648,7 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
|
|||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
} else if (mMode == LOAD_AS_DATA && mCharsetSource == kCharsetFromFallback) {
|
||||
NS_ASSERTION(mReparseForbidden, "Reparse should be forbidden for XHR");
|
||||
NS_ASSERTION(!mFeedChardetIfEncoding, "Should not feed chardet for XHR");
|
||||
NS_ASSERTION(!mFeedChardet, "Should not feed chardet for XHR");
|
||||
NS_ASSERTION(mEncoding == UTF_8_ENCODING, "XHR should default to UTF-8");
|
||||
// Now mark charset source as non-weak to signal that we have a decision
|
||||
mCharsetSource = kCharsetFromDocTypeDefault;
|
||||
|
@ -687,7 +735,6 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
|
|||
// earlier call to SetDocumentCharset(), since we didn't find a BOM and
|
||||
// overwrite mEncoding. (Note that if the user has overridden the charset,
|
||||
// we don't come here but check <meta> for XSS-dangerous charsets first.)
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
|
||||
}
|
||||
|
@ -719,12 +766,16 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
|
|||
(encoding->IsAsciiCompatible() ||
|
||||
encoding == ISO_2022_JP_ENCODING)) {
|
||||
// Honor override
|
||||
if (mEncoding->IsJapaneseLegacy()) {
|
||||
mFeedChardet = true;
|
||||
FinalizeSniffingWithDetector(aFromSegment, countToSniffingLimit,
|
||||
false);
|
||||
}
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
|
||||
aFromSegment);
|
||||
}
|
||||
mEncoding = WrapNotNull(encoding);
|
||||
mCharsetSource = kCharsetFromMetaPrescan;
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
|
||||
aFromSegment);
|
||||
|
@ -733,6 +784,10 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
|
|||
if (mCharsetSource == kCharsetFromParentForced ||
|
||||
mCharsetSource == kCharsetFromUserForced) {
|
||||
// meta not found, honor override
|
||||
if (mEncoding->IsJapaneseLegacy()) {
|
||||
mFeedChardet = true;
|
||||
FinalizeSniffingWithDetector(aFromSegment, countToSniffingLimit, false);
|
||||
}
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
|
||||
}
|
||||
return FinalizeSniffing(aFromSegment, countToSniffingLimit, false);
|
||||
|
@ -761,7 +816,6 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
|
|||
}
|
||||
mEncoding = WrapNotNull(encoding);
|
||||
mCharsetSource = kCharsetFromMetaPrescan;
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
|
||||
}
|
||||
|
@ -841,6 +895,12 @@ void nsHtml5StreamParser::ReDecodeLocalFile() {
|
|||
mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
|
||||
mHasHadErrors = false;
|
||||
|
||||
// We need the detector to start with fresh state.
|
||||
// Turn off ISO-2022-JP detection, because if this doc was
|
||||
// ISO-2022-JP, it would have already been detected.
|
||||
mJapaneseDetector = mozilla::JapaneseDetector::Create(false);
|
||||
mFeedChardet = true;
|
||||
|
||||
// Throw away previous decoded data
|
||||
mLastBuffer = mFirstBuffer;
|
||||
mLastBuffer->next = nullptr;
|
||||
|
@ -856,7 +916,7 @@ void nsHtml5StreamParser::ReDecodeLocalFile() {
|
|||
void nsHtml5StreamParser::CommitLocalFileToUTF8() {
|
||||
MOZ_ASSERT(mDecodingLocalFileAsUTF8);
|
||||
mDecodingLocalFileAsUTF8 = false;
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
mFeedChardet = false;
|
||||
mEncoding = UTF_8_ENCODING;
|
||||
mCharsetSource = kCharsetFromFileURLGuess;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
|
@ -1000,7 +1060,7 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
|
|||
// This is the old Gecko behavior but the HTML5 spec disagrees.
|
||||
// Don't reparse on POST.
|
||||
mReparseForbidden = true;
|
||||
mFeedChardetIfEncoding = nullptr; // can't restart anyway
|
||||
mFeedChardet = false; // can't restart anyway
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1031,8 +1091,10 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
|
|||
mInitialEncodingWasFromParentFrame = true;
|
||||
}
|
||||
|
||||
if (mCharsetSource >= kCharsetFromAutoDetection) {
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
if (mCharsetSource >= kCharsetFromAutoDetection &&
|
||||
!(mCharsetSource == kCharsetFromParentForced ||
|
||||
mCharsetSource == kCharsetFromUserForced)) {
|
||||
mFeedChardet = false;
|
||||
}
|
||||
|
||||
if (mCharsetSource < kCharsetFromUtf8OnlyMime) {
|
||||
|
@ -1041,11 +1103,11 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
|
|||
return NS_OK;
|
||||
}
|
||||
|
||||
// We are reloading a document.open()ed doc or loading JSON/WebVTT/etc. into
|
||||
// a browsing context. In the latter case, there's no need to remove the
|
||||
// BOM manually here, because the UTF-8 decoder removes it.
|
||||
// We are loading JSON/WebVTT/etc. into a browsing context.
|
||||
// There's no need to remove the BOM manually here, because
|
||||
// the UTF-8 decoder removes it.
|
||||
mReparseForbidden = true;
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
mFeedChardet = false;
|
||||
|
||||
// Instantiate the converter here to avoid BOM sniffing.
|
||||
mDecodingLocalFileAsUTF8 = false;
|
||||
|
@ -1085,8 +1147,9 @@ void nsHtml5StreamParser::DoStopRequest() {
|
|||
return;
|
||||
}
|
||||
}
|
||||
if ((mFeedChardetIfEncoding == mEncoding) && !mDecodingLocalFileAsUTF8) {
|
||||
mChardet->Done();
|
||||
if (mFeedChardet) {
|
||||
mFeedChardet = false;
|
||||
FeedDetector(Span<uint8_t>(), true);
|
||||
}
|
||||
|
||||
MOZ_ASSERT(mUnicodeDecoder,
|
||||
|
@ -1246,13 +1309,8 @@ void nsHtml5StreamParser::DoDataAvailable(Span<const uint8_t> aBuffer) {
|
|||
|
||||
nsresult rv;
|
||||
if (HasDecoder()) {
|
||||
if ((mFeedChardetIfEncoding == mEncoding) && !mDecodingLocalFileAsUTF8) {
|
||||
bool dontFeed;
|
||||
mChardet->DoIt((const char*)aBuffer.Elements(), aBuffer.Length(),
|
||||
&dontFeed);
|
||||
if (dontFeed) {
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
}
|
||||
if (mFeedChardet) {
|
||||
FeedDetector(aBuffer, false);
|
||||
}
|
||||
rv = WriteStreamBytes(aBuffer);
|
||||
} else {
|
||||
|
@ -1411,7 +1469,7 @@ const Encoding* nsHtml5StreamParser::PreferredForInternalEncodingDecl(
|
|||
}
|
||||
}
|
||||
mCharsetSource = kCharsetFromMetaTag; // become confident
|
||||
mFeedChardetIfEncoding = nullptr; // don't feed chardet when confident
|
||||
mFeedChardet = false; // don't feed chardet when confident
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -1450,7 +1508,7 @@ bool nsHtml5StreamParser::internalEncodingDeclaration(nsHtml5String aEncoding) {
|
|||
|
||||
// Avoid having the chardet ask for another restart after this restart
|
||||
// request.
|
||||
mFeedChardetIfEncoding = nullptr;
|
||||
mFeedChardet = false;
|
||||
mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(encoding), kCharsetFromMetaTag,
|
||||
mTokenizer->getLineNumber());
|
||||
FlushTreeOpsAndDisarmTimer();
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "nsICharsetDetectionObserver.h"
|
||||
#include "nsHtml5MetaScanner.h"
|
||||
#include "mozilla/Encoding.h"
|
||||
#include "mozilla/JapaneseDetector.h"
|
||||
#include "nsHtml5TreeOpExecutor.h"
|
||||
#include "nsHtml5OwningUTF16Buffer.h"
|
||||
#include "nsIInputStream.h"
|
||||
|
@ -148,6 +149,16 @@ class nsHtml5StreamParser final : public nsICharsetDetectionObserver {
|
|||
|
||||
// Not from an external interface
|
||||
|
||||
/**
|
||||
* Pass a buffer to the JapaneseDetector.
|
||||
*/
|
||||
void FeedJapaneseDetector(mozilla::Span<const uint8_t> aBuffer, bool aLast);
|
||||
|
||||
/**
|
||||
* Pass a buffer to the Japanese or Cyrillic detector as appropriate.
|
||||
*/
|
||||
void FeedDetector(mozilla::Span<const uint8_t> aBuffer, bool aLast);
|
||||
|
||||
/**
|
||||
* Call this method once you've created a parser, and want to instruct it
|
||||
* about what charset to load
|
||||
|
@ -282,6 +293,12 @@ class nsHtml5StreamParser final : public nsICharsetDetectionObserver {
|
|||
*/
|
||||
void SniffBOMlessUTF16BasicLatin(mozilla::Span<const uint8_t> aFromSegment);
|
||||
|
||||
/**
|
||||
* Write the start of the stream to detector.
|
||||
*/
|
||||
void FinalizeSniffingWithDetector(mozilla::Span<const uint8_t> aFromSegment,
|
||||
uint32_t aCountToSniffingLimit, bool aEof);
|
||||
|
||||
/**
|
||||
* <meta charset> scan failed. Try chardet if applicable. After this, the
|
||||
* the parser will have some encoding even if a last resolt fallback.
|
||||
|
@ -411,9 +428,9 @@ class nsHtml5StreamParser final : public nsICharsetDetectionObserver {
|
|||
NotNull<const Encoding*> mEncoding;
|
||||
|
||||
/**
|
||||
* The character encoding that is the base expectation for detection.
|
||||
* Whether the Cyrillic or Japanese detector should still be fed.
|
||||
*/
|
||||
const Encoding* mFeedChardetIfEncoding;
|
||||
bool mFeedChardet;
|
||||
|
||||
/**
|
||||
* Whether reparse is forbidden
|
||||
|
@ -529,10 +546,15 @@ class nsHtml5StreamParser final : public nsICharsetDetectionObserver {
|
|||
nsCOMPtr<nsIRunnable> mLoadFlusher;
|
||||
|
||||
/**
|
||||
* The chardet instance if chardet is enabled.
|
||||
* The Cyrillic detector if enabled.
|
||||
*/
|
||||
nsCOMPtr<nsICharsetDetector> mChardet;
|
||||
|
||||
/**
|
||||
* The Japanese detector.
|
||||
*/
|
||||
mozilla::UniquePtr<mozilla::JapaneseDetector> mJapaneseDetector;
|
||||
|
||||
/**
|
||||
* Whether the initial charset source was kCharsetFromParentFrame
|
||||
*/
|
||||
|
|
|
@ -14,14 +14,14 @@
|
|||
#define kCharsetFromCache 5
|
||||
#define kCharsetFromParentFrame 6
|
||||
#define kCharsetFromAutoDetection 7
|
||||
#define kCharsetFromHintPrevDoc 8
|
||||
#define kCharsetFromMetaPrescan 9 // this one and smaller: HTML5 Tentative
|
||||
#define kCharsetFromMetaTag 10 // this one and greater: HTML5 Confident
|
||||
#define kCharsetFromIrreversibleAutoDetection 11
|
||||
#define kCharsetFromChannel 12
|
||||
#define kCharsetFromOtherComponent 13
|
||||
#define kCharsetFromParentForced 14 // propagates to child frames
|
||||
#define kCharsetFromUserForced 15 // propagates to child frames
|
||||
#define kCharsetFromMetaPrescan 8 // this one and smaller: HTML5 Tentative
|
||||
#define kCharsetFromMetaTag 9 // this one and greater: HTML5 Confident
|
||||
#define kCharsetFromIrreversibleAutoDetection 10
|
||||
#define kCharsetFromChannel 11
|
||||
#define kCharsetFromOtherComponent 12
|
||||
#define kCharsetFromParentForced 13 // propagates to child frames
|
||||
#define kCharsetFromUserForced 14 // propagates to child frames
|
||||
#define kCharsetFromUserForcedAutoDetection 15
|
||||
#define kCharsetFromByteOrderMark 16
|
||||
#define kCharsetFromUtf8OnlyMime 17 // For JSON, WebVTT and such
|
||||
#define kCharsetFromBuiltIn 18 // resource: URLs
|
||||
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -12,7 +12,7 @@
|
|||
|
||||
[package]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.16"
|
||||
version = "0.8.17"
|
||||
authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
|
||||
description = "A Gecko-oriented implementation of the Encoding Standard"
|
||||
homepage = "https://docs.rs/encoding_rs/"
|
||||
|
@ -35,7 +35,7 @@ optional = true
|
|||
version = "1.0"
|
||||
optional = true
|
||||
[dev-dependencies.bincode]
|
||||
version = "0.8"
|
||||
version = "1.0"
|
||||
|
||||
[dev-dependencies.serde_derive]
|
||||
version = "1.0"
|
||||
|
|
|
@ -404,6 +404,10 @@ To regenerate the generated code:
|
|||
|
||||
## Release Notes
|
||||
|
||||
### 0.8.17
|
||||
|
||||
* Update `bincode` (dev dependency) version requirement to 1.0.
|
||||
|
||||
### 0.8.16
|
||||
|
||||
* Switch from the `simd` crate to `packed_simd`.
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
feature = "cargo-clippy",
|
||||
allow(doc_markdown, inline_always, new_ret_no_self)
|
||||
)]
|
||||
#![doc(html_root_url = "https://docs.rs/encoding_rs/0.8.16")]
|
||||
#![doc(html_root_url = "https://docs.rs/encoding_rs/0.8.17")]
|
||||
|
||||
//! encoding_rs is a Gecko-oriented Free Software / Open Source implementation
|
||||
//! of the [Encoding Standard](https://encoding.spec.whatwg.org/) in Rust.
|
||||
|
@ -5635,7 +5635,7 @@ mod tests {
|
|||
let deserialized: Demo = serde_json::from_str(&serialized).unwrap();
|
||||
assert_eq!(deserialized, demo);
|
||||
|
||||
let bincoded = bincode::serialize(&demo, bincode::Infinite).unwrap();
|
||||
let bincoded = bincode::serialize(&demo).unwrap();
|
||||
let debincoded: Demo = bincode::deserialize(&bincoded[..]).unwrap();
|
||||
assert_eq!(debincoded, demo);
|
||||
}
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
{"files":{"CONTRIBUTING.md":"0e64fb3dd5a00e3fd528de6442de3f2ca851bd718c45cca0871aaf4eedac9ee1","COPYRIGHT":"3a7313aa2f19bf7095a2fd731c3d5e76f38d5e4640bd2a115d53032f24b2aa6c","Cargo.toml":"f9f41b76ecbe257a312ab09ed1208189b8dc9952d12d17a216fe2846d1d471c8","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"dac4dde23582d18b01701032860d8f8a1979fb2cf626060ca8de77e081a2a3d5","README.md":"b7148745a7ef59788e76fbe638d4b41c54dcaa1313a809f4630a020645f892a8","examples/detect.rs":"eb7239ccc802290ef24331db600ca1226198801dd86df86876b4b738ef4b8470","src/lib.rs":"f2a83db125d553af5c6fabae0487ef211aad62f2d93c4418dc510cbd425d472a"},"package":"f930dea4685b9803954b9d74cdc175c6d946a22f2eafe5aa2e9a58cdcae7da8c"}
|
|
@ -0,0 +1,38 @@
|
|||
If you send a pull request / patch, please observe the following.
|
||||
|
||||
## Licensing
|
||||
|
||||
Since this crate is dual-licensed,
|
||||
[section 5 of the Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0#contributions)
|
||||
is considered to apply in the sense of Contributions being automatically
|
||||
under the Apache License 2.0 or MIT dual license (see the `COPYRIGHT` file).
|
||||
That is, by the act of offering a Contribution, you place your Contribution
|
||||
under the Apache License 2.0 or MIT dual license stated in the `COPYRIGHT`
|
||||
file. Please do not contribute if you aren't willing or allowed to license your
|
||||
contributions in this manner.
|
||||
|
||||
You are encouraged to dedicate test code that you contribute to the Public
|
||||
Domain using the CC0 dedication. If you contribute test code that is not
|
||||
dedicated to the Public Domain, please be sure not to put it in a part of
|
||||
source code that the comments designate as being dedicated to the Public
|
||||
Domain.
|
||||
|
||||
## Copyright Notices
|
||||
|
||||
If you require the addition of your copyright notice, it's up to you to edit in
|
||||
your notice as part of your Contribution. Not adding a copyright notice is
|
||||
taken as a waiver of copyright notice.
|
||||
|
||||
## Compatibility with Stable Rust
|
||||
|
||||
Please ensure that your Contribution compiles with the latest stable-channel
|
||||
rustc.
|
||||
|
||||
## rustfmt
|
||||
|
||||
The `rustfmt` version used for this code is `rustfmt-nightly`. Please either
|
||||
use that version or avoid using `rustfmt` (so as not to reformat all the code).
|
||||
|
||||
## Unit tests
|
||||
|
||||
Please ensure that `cargo test` succeeds.
|
|
@ -0,0 +1,9 @@
|
|||
shift_or_euc is copyright 2018 Mozilla Foundation.
|
||||
|
||||
Licensed under the Apache License, Version 2.0
|
||||
<LICENSE-APACHE or
|
||||
https://www.apache.org/licenses/LICENSE-2.0> or the MIT
|
||||
license <LICENSE-MIT or https://opensource.org/licenses/MIT>,
|
||||
at your option. All files in the project carrying such
|
||||
notice may not be copied, modified, or distributed except
|
||||
according to those terms.
|
|
@ -0,0 +1,30 @@
|
|||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||
#
|
||||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g., crates.io) dependencies
|
||||
#
|
||||
# If you believe there's an error in this file please file an
|
||||
# issue against the rust-lang/cargo repository. If you're
|
||||
# editing this file be aware that the upstream Cargo.toml
|
||||
# will likely look very different (and much more reasonable)
|
||||
|
||||
[package]
|
||||
edition = "2018"
|
||||
name = "shift_or_euc"
|
||||
version = "0.1.0"
|
||||
authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
|
||||
description = "Detects among the Japanese legacy encodings"
|
||||
homepage = "https://docs.rs/shift_or_euc/"
|
||||
documentation = "https://docs.rs/shift_or_euc/"
|
||||
readme = "README.md"
|
||||
keywords = ["encoding", "web", "charset"]
|
||||
categories = ["text-processing", "encoding", "web-programming", "internationalization"]
|
||||
license = "MIT/Apache-2.0"
|
||||
repository = "https://github.com/hsivonen/shift_or_euc"
|
||||
[dependencies.encoding_rs]
|
||||
version = "0.8.17"
|
||||
|
||||
[dependencies.memchr]
|
||||
version = "2.2.0"
|
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -0,0 +1,25 @@
|
|||
Copyright (c) 2018 Mozilla Foundation
|
||||
|
||||
Permission is hereby granted, free of charge, to any
|
||||
person obtaining a copy of this software and associated
|
||||
documentation files (the "Software"), to deal in the
|
||||
Software without restriction, including without
|
||||
limitation the rights to use, copy, modify, merge,
|
||||
publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software
|
||||
is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice
|
||||
shall be included in all copies or substantial portions
|
||||
of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
||||
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
||||
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
|
@ -0,0 +1,73 @@
|
|||
# shift_or_euc
|
||||
|
||||
[![Apache 2 / MIT dual-licensed](https://img.shields.io/badge/license-Apache%202%20%2F%20MIT-blue.svg)](https://github.com/hsivonen/shift_or_euc/blob/master/COPYRIGHT)
|
||||
|
||||
A Japanese legacy encoding detector for detecting between Shift_JIS, EUC-JP,
|
||||
and, optionally, ISO-2022-JP _given_ the assumption that the encoding is one
|
||||
of those.
|
||||
|
||||
This detector is generally more accurate (but see below about the failure
|
||||
mode on half-width katakana) and decides much sooner than machine
|
||||
learning-based detectors. To decide EUC-JP, machine learning-based detectors
|
||||
try to gain confidence that the input looks like EUC-JP. To decide EUC-JP,
|
||||
this detector instead looks for two simple rule-based signs of the input not
|
||||
being Shift_JIS.
|
||||
|
||||
As a consequence of not containing machine learning tables, the binary size
|
||||
footprint that this crate adds on top of
|
||||
[`encoding_rs`](https://docs.rs/crate/encoding_rs) is tiny.
|
||||
|
||||
## Documentation
|
||||
|
||||
[API documentation on docs.rs](https://docs.rs/crate/shift_or_euc)
|
||||
|
||||
## Licensing
|
||||
|
||||
See the file named [COPYRIGHT](https://github.com/hsivonen/shift_or_euc/blob/master/COPYRIGHT).
|
||||
|
||||
## Sample Program Usage
|
||||
|
||||
1. [Install Rust](https://rustup.rs/)
|
||||
2. `git clone https://github.com/hsivonen/shift_or_euc`
|
||||
3. `cd shift_or_euc`
|
||||
4. `cargo run --example detect PATH_TO_FILE`
|
||||
|
||||
The program prints one of:
|
||||
|
||||
* Shift_JIS
|
||||
* EUC-JP
|
||||
* ISO-2022-JP
|
||||
* Undecided
|
||||
|
||||
## Principle of Operation
|
||||
|
||||
The detector is based on two observations:
|
||||
|
||||
1. The ISO-2022-JP escape sequences don't normally occur in Shift_JIS or
|
||||
EUC-JP, so encountering such an escape sequence (before non-ASCII has been
|
||||
encountered) can be taken as indication of ISO-2022-JP.
|
||||
2. When normal (full-with) kana or common kanji encoded as Shift_JIS is
|
||||
decoded as EUC-JP, or vice versa, the result is either an error or half-width
|
||||
katakana, and it's very uncommon for Japanese HTML to have half-width katakana
|
||||
character before a normal kana or common kanji character. Therefore, if
|
||||
decoding as Shift_JIS results in error or have-width katakana, the detector
|
||||
decides that the content is EUC-JP, and vice versa.
|
||||
|
||||
## Failure Modes
|
||||
|
||||
The detector gives the wrong answer if the text has a half-width katakana
|
||||
character before normal kana or common kanji. Some uncommon kanji are
|
||||
undecidable. (All JIS X 0208 Level 1 kanji are decidable.)
|
||||
|
||||
The half-width katakana issue is mainly relevant for old 8-bit JIS X 0201-only
|
||||
text files that would decode correctly as Shift_JIS but that the detector
|
||||
detects as EUC-JP.
|
||||
|
||||
The undecidable kanji issue does not realistically show up when a full
|
||||
document is fed to the detector, because, realistically, in a full document,
|
||||
there is at least one kana or common kanji. It can occur, though, if the
|
||||
detector is only run on a prefix of a document and the prefix only contains
|
||||
the title of the document. It is possible for document title to consist
|
||||
entirely of undecidable kanji. (Indeed, Japanese Wikipedia has articles with
|
||||
such titles.) If the detector is undecided, falling back to Shift_JIS is
|
||||
typically the Web oriented better guess.
|
|
@ -0,0 +1,56 @@
|
|||
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
|
||||
use shift_or_euc::Detector;
|
||||
|
||||
fn main() {
|
||||
let mut args = std::env::args_os();
|
||||
if args.next().is_none() {
|
||||
eprintln!("Error: Program name missing from arguments.");
|
||||
std::process::exit(-1);
|
||||
}
|
||||
if let Some(path) = args.next() {
|
||||
if args.next().is_some() {
|
||||
eprintln!("Error: Too many arguments.");
|
||||
std::process::exit(-3);
|
||||
}
|
||||
if let Ok(mut file) = File::open(path) {
|
||||
let mut buffer = [0u8; 4096];
|
||||
let mut detector = Detector::new(true);
|
||||
loop {
|
||||
if let Ok(num_read) = file.read(&mut buffer[..]) {
|
||||
let opt_enc = if num_read == 0 {
|
||||
detector.feed(b"", true)
|
||||
} else {
|
||||
detector.feed(&buffer[..num_read], false)
|
||||
};
|
||||
if let Some(encoding) = opt_enc {
|
||||
println!("{}", encoding.name());
|
||||
return;
|
||||
} else if num_read == 0 {
|
||||
println!("Undecided");
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
eprintln!("Error: Error reading file.");
|
||||
std::process::exit(-5);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
eprintln!("Error: Could not open file.");
|
||||
std::process::exit(-4);
|
||||
}
|
||||
} else {
|
||||
eprintln!("Error: One path argument needed.");
|
||||
std::process::exit(-2);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,278 @@
|
|||
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#![doc(html_root_url = "https://docs.rs/shift_or_euc/0.1.0")]
|
||||
|
||||
//! A Japanese legacy encoding detector for detecting between Shift_JIS,
|
||||
//! EUC-JP, and, optionally, ISO-2022-JP _given_ the assumption that the
|
||||
//! encoding is one of those.
|
||||
//!
|
||||
//! This detector is generally more accurate (but see below about the failure
|
||||
//! mode on half-width katakana) and decides much sooner than machine
|
||||
//! learning-based detectors. To decide EUC-JP, machine learning-based
|
||||
//! detectors try to gain confidence that the input looks like EUC-JP. To
|
||||
//! decide EUC-JP, this detector instead looks for two simple rule-based
|
||||
//! signs of the input not being Shift_JIS.
|
||||
//!
|
||||
//! As a consequence of not containing machine learning tables, the binary
|
||||
//! size footprint that this crate adds on top of
|
||||
//! [`encoding_rs`](https://docs.rs/crate/encoding_rs) is tiny.
|
||||
//!
|
||||
//! # Licensing
|
||||
//!
|
||||
//! See the file named [COPYRIGHT](https://github.com/hsivonen/shift_or_euc/blob/master/COPYRIGHT).
|
||||
//!
|
||||
//! # Principle of Operation
|
||||
//!
|
||||
//! The detector is based on two observations:
|
||||
//!
|
||||
//! 1. The ISO-2022-JP escape sequences don't normally occur in Shift_JIS or
|
||||
//! EUC-JP, so encountering such an escape sequence (before non-ASCII has been
|
||||
//! encountered) can be taken as indication of ISO-2022-JP.
|
||||
//! 2. When normal (full-with) kana or common kanji encoded as Shift_JIS is
|
||||
//! decoded as EUC-JP, or vice versa, the result is either an error or
|
||||
//! half-width katakana, and it's very uncommon for Japanese HTML to have
|
||||
//! half-width katakana character before a normal kana or common kanji
|
||||
//! character. Therefore, if decoding as Shift_JIS results in error or
|
||||
//! have-width katakana, the detector decides that the content is EUC-JP, and
|
||||
//! vice versa.
|
||||
//!
|
||||
//! # Failure Modes
|
||||
//!
|
||||
//! The detector gives the wrong answer if the text has a half-width katakana
|
||||
//! character before normal kana or common kanji. Some uncommon kanji are
|
||||
//! undecidable. (All JIS X 0208 Level 1 kanji are decidable.)
|
||||
//!
|
||||
//! The half-width katakana issue is mainly relevant for old 8-bit JIS X
|
||||
//! 0201-only text files that would decode correctly as Shift_JIS but that the
|
||||
//! detector detects as EUC-JP.
|
||||
//!
|
||||
//! The undecidable kanji issue does not realistically show up when a full
|
||||
//! document is fed to the detector, because, realistically, in a full
|
||||
//! document, there is at least one kana or common kanji. It can occur,
|
||||
//! though, if the detector is only run on a prefix of a document and the
|
||||
//! prefix only contains the title of the document. It is possible for
|
||||
//! document title to consist entirely of undecidable kanji. (Indeed,
|
||||
//! Japanese Wikipedia has articles with such titles.) If the detector is
|
||||
//! undecided, falling back to Shift_JIS is typically the Web oriented better
|
||||
//! guess.
|
||||
|
||||
use encoding_rs::Decoder;
|
||||
use encoding_rs::DecoderResult;
|
||||
use encoding_rs::Encoding;
|
||||
use encoding_rs::EUC_JP;
|
||||
use encoding_rs::ISO_2022_JP;
|
||||
use encoding_rs::SHIFT_JIS;
|
||||
|
||||
/// Returns the index of the first non-ASCII byte or the first
|
||||
/// 0x1B, whichever comes first, or the length of the buffer
|
||||
/// if neither is found.
|
||||
fn find_non_ascii_or_escape(buffer: &[u8]) -> usize {
|
||||
let ascii_up_to = Encoding::ascii_valid_up_to(buffer);
|
||||
if let Some(escape) = memchr::memchr(0x1B, &buffer[..ascii_up_to]) {
|
||||
escape
|
||||
} else {
|
||||
ascii_up_to
|
||||
}
|
||||
}
|
||||
|
||||
/// Feed decoder with one byte (if `last` is `false`) or EOF (if `last` is
|
||||
/// `true`). `byte` is ignored if `last` is `true`.
|
||||
/// Returns `true` if there was no rejection or `false` upon rejecting the
|
||||
/// encoding hypothesis represented by this decoder.
|
||||
#[inline(always)]
|
||||
fn feed_decoder(decoder: &mut Decoder, byte: u8, last: bool) -> bool {
|
||||
let mut output = [0u16; 1];
|
||||
let input = [byte];
|
||||
let (result, _read, written) = decoder.decode_to_utf16_without_replacement(
|
||||
if last { b"" } else { &input },
|
||||
&mut output,
|
||||
last,
|
||||
);
|
||||
match result {
|
||||
DecoderResult::InputEmpty => {
|
||||
if written == 1 {
|
||||
match output[0] {
|
||||
0xFF61...0xFF9F => {
|
||||
return false;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
DecoderResult::Malformed(_, _) => {
|
||||
return false;
|
||||
}
|
||||
DecoderResult::OutputFull => {
|
||||
unreachable!();
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// A detector for detecting the character encoding of input on the
|
||||
/// precondition that the encoding is a Japanese legacy encoding.
|
||||
pub struct Detector {
|
||||
shift_jis_decoder: Decoder,
|
||||
euc_jp_decoder: Decoder,
|
||||
second_byte_in_escape: u8,
|
||||
iso_2022_jp_disqualified: bool,
|
||||
escape_seen: bool,
|
||||
finished: bool,
|
||||
}
|
||||
|
||||
impl Detector {
|
||||
/// Instantiates the detector. If `allow_2022` is `true` the possible
|
||||
/// guesses are Shift_JIS, EUC-JP, ISO-2022-JP, and undecided. If
|
||||
/// `allow_2022` is `false`, the possible guesses are Shift_JIS, EUC-JP,
|
||||
/// and undecided.
|
||||
pub fn new(allow_2022: bool) -> Self {
|
||||
Detector {
|
||||
shift_jis_decoder: SHIFT_JIS.new_decoder_without_bom_handling(),
|
||||
euc_jp_decoder: EUC_JP.new_decoder_without_bom_handling(),
|
||||
second_byte_in_escape: 0,
|
||||
iso_2022_jp_disqualified: !allow_2022,
|
||||
escape_seen: false,
|
||||
finished: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Feeds bytes to the detector. If `last` is `true` the end of the stream
|
||||
/// is considered to occur immediately after the end of `buffer`.
|
||||
/// Otherwise, the stream is expected to continue. `buffer` may be empty.
|
||||
///
|
||||
/// If you're running the detector only on a prefix of a complete
|
||||
/// document, _do not_ pass `last` as `true` after the prefix if the
|
||||
/// stream as a whole still contains more content.
|
||||
///
|
||||
/// Returns `Some(encoding_rs::SHIFT_JIS)` if the detector guessed
|
||||
/// Shift_JIS. Returns `Some(encoding_rs::EUC_JP)` if the detector
|
||||
/// guessed EUC-JP. Returns `Some(encoding_rs::ISO_2022_JP)` if the
|
||||
/// detector guessed ISO-2022-JP (only possible if `true` was passed as
|
||||
/// `allow_2022` when instantiating the detector). Returns `None` if the
|
||||
/// detector is undecided. If `None` is returned even when passing `true`
|
||||
/// as `last`, falling back to Shift_JIS is the best guess for Web
|
||||
/// purposes.
|
||||
///
|
||||
/// Do not call again after the method has returned `Some(_)` or after
|
||||
/// the method has been called with `true` as `last`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If called after the method has returned `Some(_)` or after the method
|
||||
/// has been called with `true` as `last`.
|
||||
pub fn feed(&mut self, buffer: &[u8], last: bool) -> Option<&'static Encoding> {
|
||||
assert!(
|
||||
!self.finished,
|
||||
"Tried to used a detector that has finished."
|
||||
);
|
||||
self.finished = true; // Will change back to false unless we return early
|
||||
let mut i = 0;
|
||||
if !self.iso_2022_jp_disqualified {
|
||||
if !self.escape_seen {
|
||||
i = find_non_ascii_or_escape(buffer);
|
||||
}
|
||||
while i < buffer.len() {
|
||||
let byte = buffer[i];
|
||||
if byte > 0x7F {
|
||||
self.iso_2022_jp_disqualified = true;
|
||||
break;
|
||||
}
|
||||
if !self.escape_seen && byte == 0x1B {
|
||||
self.escape_seen = true;
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
if self.escape_seen && self.second_byte_in_escape == 0 {
|
||||
self.second_byte_in_escape = byte;
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
match (self.second_byte_in_escape, byte) {
|
||||
(0x28, 0x42) | (0x28, 0x4A) | (0x28, 0x49) | (0x24, 0x40) | (0x24, 0x42) => {
|
||||
return Some(ISO_2022_JP);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
if self.escape_seen {
|
||||
self.iso_2022_jp_disqualified = true;
|
||||
break;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
for &byte in &buffer[i..] {
|
||||
if !feed_decoder(&mut self.euc_jp_decoder, byte, false) {
|
||||
return Some(SHIFT_JIS);
|
||||
}
|
||||
if !feed_decoder(&mut self.shift_jis_decoder, byte, false) {
|
||||
return Some(EUC_JP);
|
||||
}
|
||||
}
|
||||
if last {
|
||||
if !feed_decoder(&mut self.euc_jp_decoder, 0, true) {
|
||||
return Some(SHIFT_JIS);
|
||||
}
|
||||
if !feed_decoder(&mut self.shift_jis_decoder, 0, true) {
|
||||
return Some(EUC_JP);
|
||||
}
|
||||
return None;
|
||||
}
|
||||
self.finished = false;
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
// Any copyright to the test code below this comment is dedicated to the
|
||||
// Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
#[test]
|
||||
fn test_iso_2022_jp() {
|
||||
let mut detector = Detector::new(true);
|
||||
assert_eq!(
|
||||
detector.feed(b"abc\x1B\x28\x42\xFF", true),
|
||||
Some(ISO_2022_JP)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_precedence() {
|
||||
let mut detector = Detector::new(true);
|
||||
assert_eq!(detector.feed(b"abc\xFF", true), Some(SHIFT_JIS));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_euc_jp() {
|
||||
let mut detector = Detector::new(true);
|
||||
assert_eq!(detector.feed(b"abc\x81\x40", true), Some(SHIFT_JIS));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_shift_jis() {
|
||||
let mut detector = Detector::new(true);
|
||||
assert_eq!(detector.feed(b"abc\xEB\xA8", true), Some(EUC_JP));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_shift_jis_before_invalid_euc_jp() {
|
||||
let mut detector = Detector::new(true);
|
||||
assert_eq!(detector.feed(b"abc\xEB\xA8\x81\x40", true), Some(EUC_JP));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_undecided() {
|
||||
let mut detector = Detector::new(true);
|
||||
assert_eq!(detector.feed(b"abc", false), None);
|
||||
assert_eq!(detector.feed(b"abc", false), None);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
{"files":{"CONTRIBUTING.md":"0e64fb3dd5a00e3fd528de6442de3f2ca851bd718c45cca0871aaf4eedac9ee1","COPYRIGHT":"3a7313aa2f19bf7095a2fd731c3d5e76f38d5e4640bd2a115d53032f24b2aa6c","Cargo.toml":"342e5345f4fb433b89f397b07e4e7162376b30cbbc1d6f6ccb11523116e6ed6b","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"dac4dde23582d18b01701032860d8f8a1979fb2cf626060ca8de77e081a2a3d5","README.md":"a323f1f4537bc7b3f9b3b216c8ac5041b83aa0321f5349a52627aade947c6272","include/shift_or_euc.h":"47c3b9832cb7eb8995aa37dcc2e76be7d4f5c7b3fa6b43135e579831ab449cd8","src/lib.rs":"cab1898dd6724e0a0324a1e44f6348c107f13916da8873dba69c70dbc95ba9cd"},"package":"c81ec08c8a68c45c48d8ef58b80ce038cc9945891c4a4996761e2ec5cba05abc"}
|
|
@ -0,0 +1,38 @@
|
|||
If you send a pull request / patch, please observe the following.
|
||||
|
||||
## Licensing
|
||||
|
||||
Since this crate is dual-licensed,
|
||||
[section 5 of the Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0#contributions)
|
||||
is considered to apply in the sense of Contributions being automatically
|
||||
under the Apache License 2.0 or MIT dual license (see the `COPYRIGHT` file).
|
||||
That is, by the act of offering a Contribution, you place your Contribution
|
||||
under the Apache License 2.0 or MIT dual license stated in the `COPYRIGHT`
|
||||
file. Please do not contribute if you aren't willing or allowed to license your
|
||||
contributions in this manner.
|
||||
|
||||
You are encouraged to dedicate test code that you contribute to the Public
|
||||
Domain using the CC0 dedication. If you contribute test code that is not
|
||||
dedicated to the Public Domain, please be sure not to put it in a part of
|
||||
source code that the comments designate as being dedicated to the Public
|
||||
Domain.
|
||||
|
||||
## Copyright Notices
|
||||
|
||||
If you require the addition of your copyright notice, it's up to you to edit in
|
||||
your notice as part of your Contribution. Not adding a copyright notice is
|
||||
taken as a waiver of copyright notice.
|
||||
|
||||
## Compatibility with Stable Rust
|
||||
|
||||
Please ensure that your Contribution compiles with the latest stable-channel
|
||||
rustc.
|
||||
|
||||
## rustfmt
|
||||
|
||||
The `rustfmt` version used for this code is `rustfmt-nightly`. Please either
|
||||
use that version or avoid using `rustfmt` (so as not to reformat all the code).
|
||||
|
||||
## Unit tests
|
||||
|
||||
Please ensure that `cargo test` succeeds.
|
|
@ -0,0 +1,9 @@
|
|||
shift_or_euc is copyright 2018 Mozilla Foundation.
|
||||
|
||||
Licensed under the Apache License, Version 2.0
|
||||
<LICENSE-APACHE or
|
||||
https://www.apache.org/licenses/LICENSE-2.0> or the MIT
|
||||
license <LICENSE-MIT or https://opensource.org/licenses/MIT>,
|
||||
at your option. All files in the project carrying such
|
||||
notice may not be copied, modified, or distributed except
|
||||
according to those terms.
|
|
@ -0,0 +1,30 @@
|
|||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||
#
|
||||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g., crates.io) dependencies
|
||||
#
|
||||
# If you believe there's an error in this file please file an
|
||||
# issue against the rust-lang/cargo repository. If you're
|
||||
# editing this file be aware that the upstream Cargo.toml
|
||||
# will likely look very different (and much more reasonable)
|
||||
|
||||
[package]
|
||||
edition = "2018"
|
||||
name = "shift_or_euc_c"
|
||||
version = "0.1.0"
|
||||
authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
|
||||
description = "C API for shift_or_euc"
|
||||
homepage = "https://docs.rs/shift_or_euc_c/"
|
||||
documentation = "https://docs.rs/shift_or_euc_c/"
|
||||
readme = "README.md"
|
||||
keywords = ["encoding", "web", "charset"]
|
||||
categories = ["text-processing", "encoding", "web-programming", "internationalization"]
|
||||
license = "MIT/Apache-2.0"
|
||||
repository = "https://github.com/hsivonen/shift_or_euc_c"
|
||||
[dependencies.encoding_rs]
|
||||
version = "0.8.17"
|
||||
|
||||
[dependencies.shift_or_euc]
|
||||
version = "0.1.0"
|
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -0,0 +1,25 @@
|
|||
Copyright (c) 2018 Mozilla Foundation
|
||||
|
||||
Permission is hereby granted, free of charge, to any
|
||||
person obtaining a copy of this software and associated
|
||||
documentation files (the "Software"), to deal in the
|
||||
Software without restriction, including without
|
||||
limitation the rights to use, copy, modify, merge,
|
||||
publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software
|
||||
is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice
|
||||
shall be included in all copies or substantial portions
|
||||
of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
||||
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
||||
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
|
@ -0,0 +1,13 @@
|
|||
# shift_or_euc_c
|
||||
|
||||
[![Apache 2 / MIT dual-licensed](https://img.shields.io/badge/license-Apache%202%20%2F%20MIT-blue.svg)](https://github.com/hsivonen/shift_or_euc_c/blob/master/COPYRIGHT)
|
||||
|
||||
C API for [`shift_or_euc`](https://docs.rs/crate/shift_or_euc).
|
||||
|
||||
## Documentation
|
||||
|
||||
[API documentation on docs.rs](https://docs.rs/crate/shift_or_euc_c)
|
||||
|
||||
## Licensing
|
||||
|
||||
See the file named [COPYRIGHT](https://github.com/hsivonen/shift_or_euc_c/blob/master/COPYRIGHT).
|
|
@ -0,0 +1,88 @@
|
|||
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#ifndef shift_or_euc_h
|
||||
#define shift_or_euc_h
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include "encoding_rs.h"
|
||||
|
||||
#ifndef SHIFT_OR_EUC_DETECTOR
|
||||
#define SHIFT_OR_EUC_DETECTOR Detector
|
||||
#ifndef __cplusplus
|
||||
typedef struct Detector_ Detector;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/// Instantiates the detector. If `allow_2022` is `true` the possible
|
||||
/// guesses are Shift_JIS, EUC-JP, ISO-2022-JP, and undecided. If
|
||||
/// `allow_2022` is `false`, the possible guesses are Shift_JIS, EUC-JP,
|
||||
/// and undecided.
|
||||
///
|
||||
/// The instantiated detector must be freed after use using
|
||||
/// `shift_or_euc_detector_free`.
|
||||
SHIFT_OR_EUC_DETECTOR* shift_or_euc_detector_new(bool allow_2022);
|
||||
|
||||
/// Deallocates a detector obtained from `shift_or_euc_detector_new`.
|
||||
void shift_or_euc_detector_free(SHIFT_OR_EUC_DETECTOR* detector);
|
||||
|
||||
/// Feeds bytes to the detector. If `last` is `true` the end of the stream
|
||||
/// is considered to occur immediately after the end of `buffer`.
|
||||
/// Otherwise, the stream is expected to continue. `buffer_len` may be zero.
|
||||
/// `buffer` must not be `NULL` but may be undereferencable when
|
||||
/// `buffer_len` is zero.
|
||||
///
|
||||
/// If you're running the detector only on a prefix of a complete
|
||||
/// document, _do not_ pass `last` as `true` after the prefix if the
|
||||
/// stream as a whole still contains more content.
|
||||
///
|
||||
/// Returns `SHIFT_JIS_ENCODING` if the detector guessed
|
||||
/// Shift_JIS. Returns `EUC_JP_ENCODING` if the detector
|
||||
/// guessed EUC-JP. Returns `ISO_2022_JP_ENCODING` if the
|
||||
/// detector guessed ISO-2022-JP (only possible if `true` was passed as
|
||||
/// `allow_2022` when instantiating the detector). Returns `NULL` if the
|
||||
/// detector is undecided. If `NULL` is returned even when passing `true`
|
||||
/// as `last`, falling back to Shift_JIS is the best guess for Web
|
||||
/// purposes.
|
||||
///
|
||||
/// Do not call again after the function has returned non-`NULL` or after
|
||||
/// the function has been called with `true` as `last`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If called after the function has returned non-`NULL` or after the
|
||||
/// function has been called with `true` as `last`.
|
||||
///
|
||||
/// # Undefined Behavior
|
||||
///
|
||||
/// UB ensues if
|
||||
///
|
||||
/// * `detector` does not point to a detector obtained from
|
||||
/// `shift_or_euc_detector_new` but not yet freed with
|
||||
/// `shift_or_euc_detector_free`.
|
||||
/// * `buffer` is `NULL`.
|
||||
/// * `buffer` and `buffer_len` don't designate a range of memory
|
||||
/// valid for reading.
|
||||
ENCODING_RS_ENCODING const* shift_or_euc_detector_feed(
|
||||
SHIFT_OR_EUC_DETECTOR* detector,
|
||||
uint8_t const* buffer,
|
||||
size_t buffer_len,
|
||||
bool last
|
||||
);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // shift_or_euc_h
|
|
@ -0,0 +1,94 @@
|
|||
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#![doc(html_root_url = "https://docs.rs/shift_or_euc_c/0.1.0")]
|
||||
|
||||
//! C API for [`shift_or_euc`](https://docs.rs/shift_or_euc/)
|
||||
//!
|
||||
//! # Panics
|
||||
//!
|
||||
//! This crate is designed to be used only in a `panic=abort` scenario.
|
||||
//! Panic propagation across FFI is not handled!
|
||||
//!
|
||||
//! # Licensing
|
||||
//!
|
||||
//! See the file named [COPYRIGHT](https://github.com/hsivonen/shift_or_euc/blob/master/COPYRIGHT).
|
||||
|
||||
use encoding_rs::Encoding;
|
||||
use shift_or_euc::*;
|
||||
|
||||
/// Instantiates the detector. If `allow_2022` is `true` the possible
|
||||
/// guesses are Shift_JIS, EUC-JP, ISO-2022-JP, and undecided. If
|
||||
/// `allow_2022` is `false`, the possible guesses are Shift_JIS, EUC-JP,
|
||||
/// and undecided.
|
||||
///
|
||||
/// The instantiated detector must be freed after use using
|
||||
/// `shift_or_euc_detector_free`.
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn shift_or_euc_detector_new(allow_2022: bool) -> *mut Detector {
|
||||
Box::into_raw(Box::new(Detector::new(allow_2022)))
|
||||
}
|
||||
|
||||
/// Deallocates a detector obtained from `shift_or_euc_detector_new`.
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn shift_or_euc_detector_free(detector: *mut Detector) {
|
||||
let _ = Box::from_raw(detector);
|
||||
}
|
||||
|
||||
/// Feeds bytes to the detector. If `last` is `true` the end of the stream
|
||||
/// is considered to occur immediately after the end of `buffer`.
|
||||
/// Otherwise, the stream is expected to continue. `buffer_len` may be zero.
|
||||
/// `buffer` must not be `NULL` but may be undereferencable when
|
||||
/// `buffer_len` is zero.
|
||||
///
|
||||
/// If you're running the detector only on a prefix of a complete
|
||||
/// document, _do not_ pass `last` as `true` after the prefix if the
|
||||
/// stream as a whole still contains more content.
|
||||
///
|
||||
/// Returns `SHIFT_JIS_ENCODING` if the detector guessed
|
||||
/// Shift_JIS. Returns `EUC_JP_ENCODING` if the detector
|
||||
/// guessed EUC-JP. Returns `ISO_2022_JP_ENCODING` if the
|
||||
/// detector guessed ISO-2022-JP (only possible if `true` was passed as
|
||||
/// `allow_2022` when instantiating the detector). Returns `NULL` if the
|
||||
/// detector is undecided. If `NULL` is returned even when passing `true`
|
||||
/// as `last`, falling back to Shift_JIS is the best guess for Web
|
||||
/// purposes.
|
||||
///
|
||||
/// Do not call again after the function has returned non-`NULL` or after
|
||||
/// the function has been called with `true` as `last`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If called after the function has returned non-`NULL` or after the
|
||||
/// function has been called with `true` as `last`.
|
||||
///
|
||||
/// # Undefined Behavior
|
||||
///
|
||||
/// UB ensues if
|
||||
///
|
||||
/// * `detector` does not point to a detector obtained from
|
||||
/// `shift_or_euc_detector_new` but not yet freed with
|
||||
/// `shift_or_euc_detector_free`.
|
||||
/// * `buffer` is `NULL`.
|
||||
/// * `buffer` and `buffer_len` don't designate a range of memory
|
||||
/// valid for reading.
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn shift_or_euc_detector_feed(
|
||||
detector: *mut Detector,
|
||||
buffer: *const u8,
|
||||
buffer_len: usize,
|
||||
last: bool,
|
||||
) -> *const Encoding {
|
||||
if let Some(encoding) = (*detector).feed(::std::slice::from_raw_parts(buffer, buffer_len), last)
|
||||
{
|
||||
encoding
|
||||
} else {
|
||||
::std::ptr::null()
|
||||
}
|
||||
}
|
|
@ -245,6 +245,8 @@ class MozBrowser extends MozElements.MozElementMixin(XULFrameElement) {
|
|||
|
||||
this._mayEnableCharacterEncodingMenu = null;
|
||||
|
||||
this._charsetAutodetected = false;
|
||||
|
||||
this._contentPrincipal = null;
|
||||
|
||||
this._csp = null;
|
||||
|
@ -619,6 +621,16 @@ class MozBrowser extends MozElements.MozElementMixin(XULFrameElement) {
|
|||
}
|
||||
}
|
||||
|
||||
get charsetAutodetected() {
|
||||
return this.isRemoteBrowser ? this._charsetAutodetected : this.docShell.charsetAutodetected;
|
||||
}
|
||||
|
||||
set charsetAutodetected(aAutodetected) {
|
||||
if (this.isRemoteBrowser) {
|
||||
this._charsetAutodetected = aAutodetected;
|
||||
}
|
||||
}
|
||||
|
||||
get contentPrincipal() {
|
||||
return this.isRemoteBrowser ? this._contentPrincipal : this.contentDocument.nodePrincipal;
|
||||
}
|
||||
|
@ -1709,6 +1721,7 @@ class MozBrowser extends MozElements.MozElementMixin(XULFrameElement) {
|
|||
"_contentTitle",
|
||||
"_characterSet",
|
||||
"_mayEnableCharacterEncodingMenu",
|
||||
"_charsetAutodetected",
|
||||
"_contentPrincipal",
|
||||
"_imageDocument",
|
||||
"_fullZoom",
|
||||
|
|
|
@ -38,6 +38,7 @@ cert_storage = { path = "../../../../security/manager/ssl/cert_storage", optiona
|
|||
bitsdownload = { path = "../../../components/bitsdownload", optional = true }
|
||||
storage = { path = "../../../../storage/rust" }
|
||||
bookmark_sync = { path = "../../../components/places/bookmark_sync", optional = true }
|
||||
shift_or_euc_c = "0.1.0"
|
||||
|
||||
[build-dependencies]
|
||||
rustc_version = "0.2"
|
||||
|
|
|
@ -44,6 +44,7 @@ extern crate bitsdownload;
|
|||
extern crate storage;
|
||||
#[cfg(feature = "moz_places")]
|
||||
extern crate bookmark_sync;
|
||||
extern crate shift_or_euc_c;
|
||||
|
||||
extern crate arrayvec;
|
||||
|
||||
|
|
|
@ -31,8 +31,6 @@ charsetMenuAutodet = Auto-Detect
|
|||
charsetMenuAutodet.key = D
|
||||
charsetMenuAutodet.off = (off)
|
||||
charsetMenuAutodet.off.key = o
|
||||
charsetMenuAutodet.ja = Japanese
|
||||
charsetMenuAutodet.ja.key = J
|
||||
charsetMenuAutodet.ru = Russian
|
||||
charsetMenuAutodet.ru.key = R
|
||||
charsetMenuAutodet.uk = Ukrainian
|
||||
|
@ -104,25 +102,8 @@ windows-1255 = Hebrew
|
|||
# sorts right after that one in the collation order for your locale.
|
||||
ISO-8859-8 = Hebrew, Visual
|
||||
|
||||
# Japanese
|
||||
Shift_JIS.key = J
|
||||
Shift_JIS = Japanese (Shift_JIS)
|
||||
EUC-JP.key = p
|
||||
EUC-JP = Japanese (EUC-JP)
|
||||
ISO-2022-JP.key = n
|
||||
ISO-2022-JP = Japanese (ISO-2022-JP)
|
||||
|
||||
# UI string in anticipation of bug 1543077; deliberately not in use yet
|
||||
|
||||
# LOCALIZATION NOTE (Japanese.key): If taken into use, this string will appear
|
||||
# instead of the string for Shift_JIS.key, so the use of the same
|
||||
# accelerator is deliberate.
|
||||
# Japanese (NOT AN ENCODING NAME)
|
||||
Japanese.key = J
|
||||
# LOCALIZATION NOTE (Japanese): If taken into use, this string will appear
|
||||
# as a single item in place of the strings for the three items Shift_JIS,
|
||||
# EUC-JP, and ISO-2022-JP, so this string does not need to make sense together
|
||||
# with those strings and should be translated the way those were
|
||||
# but omitting the part in parentheses.
|
||||
Japanese = Japanese
|
||||
|
||||
# Korean
|
||||
|
|
|
@ -16,18 +16,16 @@ ChromeUtils.defineModuleGetter(this, "Deprecated",
|
|||
|
||||
const kAutoDetectors = [
|
||||
["off", ""],
|
||||
["ja", "ja_parallel_state_machine"],
|
||||
["ru", "ruprob"],
|
||||
["uk", "ukprob"],
|
||||
];
|
||||
|
||||
/**
|
||||
* This set contains encodings that are in the Encoding Standard, except:
|
||||
* - XSS-dangerous encodings (except ISO-2022-JP which is assumed to be
|
||||
* too common not to be included).
|
||||
* - Japanese encodings are represented by one autodetection item
|
||||
* - x-user-defined, which practically never makes sense as an end-user-chosen
|
||||
* override.
|
||||
* - Encodings that IE11 doesn't have in its correspoding menu.
|
||||
* - Encodings that IE11 doesn't have in its corresponding menu.
|
||||
*/
|
||||
const kEncodings = new Set([
|
||||
// Globally relevant
|
||||
|
@ -60,10 +58,8 @@ const kEncodings = new Set([
|
|||
// Hebrew
|
||||
"windows-1255",
|
||||
"ISO-8859-8",
|
||||
// Japanese
|
||||
"Shift_JIS",
|
||||
"EUC-JP",
|
||||
"ISO-2022-JP",
|
||||
// Japanese (NOT AN ENCODING NAME)
|
||||
"Japanese",
|
||||
// Korean
|
||||
"EUC-KR",
|
||||
// Thai
|
||||
|
@ -95,8 +91,7 @@ function CharsetComparator(a, b) {
|
|||
// happens to make the less frequently-used items first.
|
||||
let titleA = a.label.replace(/\(.*/, "") + b.value;
|
||||
let titleB = b.label.replace(/\(.*/, "") + a.value;
|
||||
// Secondarily reverse sort by encoding name to sort "windows" or
|
||||
// "shift_jis" first.
|
||||
// Secondarily reverse sort by encoding name to sort "windows"
|
||||
return titleA.localeCompare(titleB) || b.value.localeCompare(a.value);
|
||||
}
|
||||
|
||||
|
@ -239,7 +234,17 @@ var CharsetMenu = {
|
|||
* For substantially similar encodings, treat two encodings as the same
|
||||
* for the purpose of the check mark.
|
||||
*/
|
||||
foldCharset(charset) {
|
||||
foldCharset(charset, isAutodetected) {
|
||||
if (isAutodetected) {
|
||||
switch (charset) {
|
||||
case "Shift_JIS":
|
||||
case "EUC-JP":
|
||||
case "ISO-2022-JP":
|
||||
return "Japanese";
|
||||
default:
|
||||
// fall through
|
||||
}
|
||||
}
|
||||
switch (charset) {
|
||||
case "ISO-8859-8-I":
|
||||
return "windows-1255";
|
||||
|
@ -252,8 +257,11 @@ var CharsetMenu = {
|
|||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* This method is for comm-central callers only.
|
||||
*/
|
||||
update(parent, charset) {
|
||||
let menuitem = parent.getElementsByAttribute("charset", this.foldCharset(charset)).item(0);
|
||||
let menuitem = parent.getElementsByAttribute("charset", this.foldCharset(charset, false)).item(0);
|
||||
if (menuitem) {
|
||||
menuitem.setAttribute("checked", "true");
|
||||
}
|
||||
|
|
|
@ -196,6 +196,7 @@ class RemoteWebProgressManager {
|
|||
if (json.charset) {
|
||||
this._browser._characterSet = json.charset;
|
||||
this._browser._mayEnableCharacterEncodingMenu = json.mayEnableCharacterEncodingMenu;
|
||||
this._browser._charsetAutodetected = json.charsetAutodetected;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -116,6 +116,7 @@ class WebProgressChild {
|
|||
json.title = this.mm.content.document.title;
|
||||
json.charset = this.mm.content.document.characterSet;
|
||||
json.mayEnableCharacterEncodingMenu = this.mm.docShell.mayEnableCharacterEncodingMenu;
|
||||
json.charsetAutodetected = this.mm.docShell.charsetAutodetected;
|
||||
json.principal = this.mm.content.document.nodePrincipal;
|
||||
let csp = this.mm.content.document.csp;
|
||||
json.csp = E10SUtils.serializeCSP(csp);
|
||||
|
|
Загрузка…
Ссылка в новой задаче