Merge mozilla-inbound to mozilla-central. a=merge

This commit is contained in:
Cosmin Sabou 2019-06-03 19:01:53 +03:00
Родитель 0a9fe4ff98 48a6016aba
Коммит e13e0af55b
89 изменённых файлов: 1841 добавлений и 3091 удалений

31
Cargo.lock сгенерированный
Просмотреть файл

@ -1009,21 +1009,21 @@ name = "encoding_c"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "encoding_glue"
version = "0.1.0"
dependencies = [
"encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
"nserror 0.1.0",
"nsstring 0.1.0",
]
[[package]]
name = "encoding_rs"
version = "0.8.16"
version = "0.8.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1299,6 +1299,7 @@ dependencies = [
"profiler_helper 0.1.0",
"rsdparsa_capi 0.1.0",
"rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
"shift_or_euc_c 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"storage 0.1.0",
"webrender_bindings 0.1.0",
"xpcom 0.1.0",
@ -2017,7 +2018,7 @@ name = "nsstring"
version = "0.1.0"
dependencies = [
"bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@ -2773,6 +2774,24 @@ dependencies = [
"opaque-debug 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "shift_or_euc"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "shift_or_euc_c"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
"shift_or_euc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "shlex"
version = "0.1.1"
@ -3805,7 +3824,7 @@ dependencies = [
"checksum either 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18785c1ba806c258137c937e44ada9ee7e69a37e3c72077542cd2f069d78562a"
"checksum ena 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "25b4e5febb25f08c49f1b07dc33a182729a6b21edfb562b5aef95f78e0dbe5bb"
"checksum encoding_c 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "769ecb8b33323998e482b218c0d13cd64c267609023b4b7ec3ee740714c318ee"
"checksum encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)" = "0535f350c60aac0b87ccf28319abc749391e912192255b0c00a2c12c6917bd73"
"checksum encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)" = "4155785c79f2f6701f185eb2e6b4caf0555ec03477cb4c70db67b465311620ed"
"checksum env_logger 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0561146661ae44c579e993456bc76d11ce1e0c7d745e57b2fa7146b6e49fa2ad"
"checksum error-chain 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ff511d5dc435d703f4971bc399647c9bc38e20cb41452e3b9feb4765419ed3f3"
"checksum euclid 0.19.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d1a7698bdda3d7444a79d33bdc96e8b518d44ea3ff101d8492a6ca1207b886ea"
@ -3967,6 +3986,8 @@ dependencies = [
"checksum serde_json 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)" = "44dd2cfde475037451fa99b7e5df77aa3cfd1536575fa8e7a538ab36dcde49ae"
"checksum sha-1 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "23962131a91661d643c98940b20fcaffe62d776a823247be80a48fcb8b6fce68"
"checksum sha2 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b4d8bfd0e469f417657573d8451fb33d16cfe0989359b93baf3a1ffc639543d"
"checksum shift_or_euc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f930dea4685b9803954b9d74cdc175c6d946a22f2eafe5aa2e9a58cdcae7da8c"
"checksum shift_or_euc_c 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c81ec08c8a68c45c48d8ef58b80ce038cc9945891c4a4996761e2ec5cba05abc"
"checksum shlex 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2"
"checksum siphasher 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2ffc669b726f2bc9a3bcff66e5e23b56ba6bf70e22a34c3d7b6d0b3450b65b84"
"checksum slab 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "17b4fcaed89ab08ef143da37bc52adbcc04d4a69014f4c1208d6b51f0c47bc23"

Просмотреть файл

@ -6615,6 +6615,9 @@ function handleDroppedLink(event, urlOrLinks, nameOrTriggeringPrincipal, trigger
function BrowserSetForcedCharacterSet(aCharset) {
if (aCharset) {
if (aCharset == "Japanese") {
aCharset = "Shift_JIS";
}
gBrowser.selectedBrowser.characterSet = aCharset;
// Save the forced character-set
PlacesUIUtils.setCharsetForPage(getWebNavigation().currentURI,
@ -6629,7 +6632,8 @@ function BrowserCharsetReload() {
}
function UpdateCurrentCharset(target) {
let selectedCharset = CharsetMenu.foldCharset(gBrowser.selectedBrowser.characterSet);
let selectedCharset = CharsetMenu.foldCharset(gBrowser.selectedBrowser.characterSet,
gBrowser.selectedBrowser.charsetAutodetected);
for (let menuItem of target.getElementsByTagName("menuitem")) {
let isSelected = menuItem.getAttribute("charset") === selectedCharset;
menuItem.setAttribute("checked", isSelected);

Просмотреть файл

@ -358,7 +358,8 @@ const CustomizableWidgets = [
},
updateCurrentCharset(aDocument) {
let currentCharset = aDocument.defaultView.gBrowser.selectedBrowser.characterSet;
currentCharset = CharsetMenu.foldCharset(currentCharset);
let {charsetAutodetected} = aDocument.defaultView.gBrowser.selectedBrowser;
currentCharset = CharsetMenu.foldCharset(currentCharset, charsetAutodetected);
let pinnedContainer = aDocument.getElementById("PanelUI-characterEncodingView-pinned");
let charsetContainer = aDocument.getElementById("PanelUI-characterEncodingView-charsets");

Просмотреть файл

@ -1276,7 +1276,6 @@ nsDocShell::GatherCharsetMenuTelemetry() {
case kCharsetFromDocTypeDefault:
case kCharsetFromCache:
case kCharsetFromParentFrame:
case kCharsetFromHintPrevDoc:
// Changing charset on an unlabeled doc.
if (isFileURL) {
Telemetry::AccumulateCategorical(
@ -1892,6 +1891,26 @@ nsDocShell::GetMayEnableCharacterEncodingMenu(
return NS_OK;
}
NS_IMETHODIMP
nsDocShell::GetCharsetAutodetected(bool* aCharsetAutodetected) {
*aCharsetAutodetected = false;
if (!mContentViewer) {
return NS_OK;
}
Document* doc = mContentViewer->GetDocument();
if (!doc) {
return NS_OK;
}
int32_t source = doc->GetDocumentCharacterSetSource();
if (source == kCharsetFromAutoDetection ||
source == kCharsetFromUserForcedAutoDetection) {
*aCharsetAutodetected = true;
}
return NS_OK;
}
NS_IMETHODIMP
nsDocShell::GetDocShellEnumerator(int32_t aItemType,
DocShellEnumeratorDirection aDirection,
@ -8372,11 +8391,11 @@ nsresult nsDocShell::SetupNewViewer(nsIContentViewer* aNewViewer) {
const Encoding* forceCharset = nullptr;
const Encoding* hintCharset = nullptr;
int32_t hintCharsetSource;
float textZoom;
float pageZoom;
float overrideDPPX;
bool styleDisabled;
int32_t hintCharsetSource = kCharsetUninitialized;
float textZoom = 1.0;
float pageZoom = 1.0;
float overrideDPPX = 1.0;
bool styleDisabled = false;
// |newMUDV| also serves as a flag to set the data from the above vars
nsCOMPtr<nsIContentViewer> newCv;
@ -10188,6 +10207,8 @@ nsresult nsDocShell::DoURILoad(nsDocShellLoadState* aLoadState,
MOZ_ASSERT(NS_SUCCEEDED(rv));
}
Unused << rv; // Keep Coverity happy
nsCOMPtr<nsIWritablePropertyBag2> props(do_QueryInterface(channel));
if (props) {
// save true referrer for those who need it (e.g. xpinstall whitelisting)

Просмотреть файл

@ -878,6 +878,11 @@ interface nsIDocShell : nsIDocShellTreeItem
*/
[infallible] readonly attribute boolean mayEnableCharacterEncodingMenu;
/**
* Indicates that the character encoding was autodetected.
*/
[infallible] readonly attribute boolean charsetAutodetected;
attribute nsIEditor editor;
readonly attribute boolean editable; /* this docShell is editable */
readonly attribute boolean hasEditingSession; /* this docShell has an editing session */

Просмотреть файл

@ -44,6 +44,14 @@ support-files =
file_bug1415918_beforeunload_iframe_2.html
file_bug1415918_beforeunload_iframe.html
file_bug1415918_beforeunload.html
file_bug1543077-1-child.html
file_bug1543077-1.html
file_bug1543077-2-child.html
file_bug1543077-2.html
file_bug1543077-3-child.html
file_bug1543077-3.html
file_bug1543077-4-child.html
file_bug1543077-4.html
file_multiple_pushState.html
print_postdata.sjs
test-form_sjis.html
@ -60,6 +68,10 @@ support-files =
onpageshow_message.html
file_cross_process_csp_inheritance.html
[browser_bug1543077-1.js]
[browser_bug1543077-2.js]
[browser_bug1543077-3.js]
[browser_bug1543077-4.js]
[browser_bug1206879.js]
[browser_bug1309900_crossProcessHistoryNavigation.js]
[browser_bug1328501.js]

Просмотреть файл

@ -0,0 +1,18 @@
function test() {
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
runCharsetTest(rootDir + "file_bug1543077-1.html", afterOpen, "Japanese", afterChangeCharset);
}
function afterOpen() {
is(content.document.documentElement.textContent.indexOf("\u00A4"), 131, "Parent doc should be windows-1252 initially");
is(content.frames[0].document.documentElement.textContent.indexOf("\u00A4"), 87, "Child doc should be windows-1252 initially");
}
function afterChangeCharset() {
is(content.document.documentElement.textContent.indexOf("\u3042"), 131, "Parent doc should decode as EUC-JP subsequently");
is(content.frames[0].document.documentElement.textContent.indexOf("\u3042"), 87, "Child doc should decode as EUC-JP subsequently");
is(content.document.characterSet, "EUC-JP", "Parent doc should report EUC-JP subsequently");
is(content.frames[0].document.characterSet, "EUC-JP", "Child doc should report EUC-JP subsequently");
}

Просмотреть файл

@ -0,0 +1,18 @@
function test() {
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
runCharsetTest(rootDir + "file_bug1543077-2.html", afterOpen, "Japanese", afterChangeCharset);
}
function afterOpen() {
is(content.document.documentElement.textContent.indexOf("\u201A"), 134, "Parent doc should be windows-1252 initially");
is(content.frames[0].document.documentElement.textContent.indexOf("\u201A"), 90, "Child doc should be windows-1252 initially");
}
function afterChangeCharset() {
is(content.document.documentElement.textContent.indexOf("\u3042"), 134, "Parent doc should decode as Shift_JIS subsequently");
is(content.frames[0].document.documentElement.textContent.indexOf("\u3042"), 90, "Child doc should decode as Shift_JIS subsequently");
is(content.document.characterSet, "Shift_JIS", "Parent doc should report Shift_JIS subsequently");
is(content.frames[0].document.characterSet, "Shift_JIS", "Child doc should report Shift_JIS subsequently");
}

Просмотреть файл

@ -0,0 +1,18 @@
function test() {
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
runCharsetTest(rootDir + "file_bug1543077-3.html", afterOpen, "Japanese", afterChangeCharset);
}
function afterOpen() {
is(content.document.documentElement.textContent.indexOf("\u001B"), 136, "Parent doc should be windows-1252 initially");
is(content.frames[0].document.documentElement.textContent.indexOf("\u001B"), 92, "Child doc should be windows-1252 initially");
}
function afterChangeCharset() {
is(content.document.documentElement.textContent.indexOf("\u3042"), 136, "Parent doc should decode as ISO-2022-JP subsequently");
is(content.frames[0].document.documentElement.textContent.indexOf("\u3042"), 92, "Child doc should decode as ISO-2022-JP subsequently");
is(content.document.characterSet, "ISO-2022-JP", "Parent doc should report ISO-2022-JP subsequently");
is(content.frames[0].document.characterSet, "ISO-2022-JP", "Child doc should report ISO-2022-JP subsequently");
}

Просмотреть файл

@ -0,0 +1,18 @@
function test() {
var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/";
runCharsetTest(rootDir + "file_bug1543077-4.html", afterOpen, "Japanese", afterChangeCharset);
}
function afterOpen() {
is(content.document.documentElement.textContent.indexOf("\u00A4"), 131, "Parent doc should be windows-1252 initially");
is(content.frames[0].document.documentElement.textContent.indexOf("\u201A"), 90, "Child doc should be windows-1252 initially");
}
function afterChangeCharset() {
is(content.document.documentElement.textContent.indexOf("\u3042"), 131, "Parent doc should decode as EUC-JP subsequently");
is(content.frames[0].document.documentElement.textContent.indexOf("\u3042"), 90, "Child doc should decode as Shift_JIS subsequently");
is(content.document.characterSet, "EUC-JP", "Parent doc should report EUC-JP subsequently");
is(content.frames[0].document.characterSet, "Shift_JIS", "Child doc should report Shift_JIS subsequently");
}

Просмотреть файл

@ -0,0 +1,11 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta content="width=device-width, initial-scale=1" name="viewport">
<title>No encoding declaration in parent or child</title>
</head>
<body>
<p>Hiragana letter a if decoded as EUC-JP: ¤¢</p>
</body>
</html>

Просмотреть файл

@ -0,0 +1,16 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta content="width=device-width, initial-scale=1" name="viewport">
<title>No encoding declaration in parent or child</title>
</head>
<body>
<h1>No encoding declaration in parent or child</h1>
<p>Hiragana letter a if decoded as EUC-JP: ¤¢</p>
<iframe src="file_bug1543077-1-child.html"></iframe>
</body>
</html>

Просмотреть файл

@ -0,0 +1,11 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta content="width=device-width, initial-scale=1" name="viewport">
<title>No encoding declaration in parent or child</title>
</head>
<body>
<p>Hiragana letter a if decoded as Shift_JIS:  </p>
</body>
</html>

Просмотреть файл

@ -0,0 +1,16 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta content="width=device-width, initial-scale=1" name="viewport">
<title>No encoding declaration in parent or child</title>
</head>
<body>
<h1>No encoding declaration in parent or child</h1>
<p>Hiragana letter a if decoded as Shift_JIS:  </p>
<iframe src="file_bug1543077-2-child.html"></iframe>
</body>
</html>

Просмотреть файл

@ -0,0 +1,11 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta content="width=device-width, initial-scale=1" name="viewport">
<title>No encoding declaration in parent or child</title>
</head>
<body>
<p>Hiragana letter a if decoded as ISO-2022-JP: $B$"(B</p>
</body>
</html>

Просмотреть файл

@ -0,0 +1,16 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta content="width=device-width, initial-scale=1" name="viewport">
<title>No encoding declaration in parent or child</title>
</head>
<body>
<h1>No encoding declaration in parent or child</h1>
<p>Hiragana letter a if decoded as ISO-2022-JP: $B$"(B</p>
<iframe src="file_bug1543077-3-child.html"></iframe>
</body>
</html>

Просмотреть файл

@ -0,0 +1,11 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta content="width=device-width, initial-scale=1" name="viewport">
<title>No encoding declaration in parent or child</title>
</head>
<body>
<p>Hiragana letter a if decoded as Shift_JIS:  </p>
</body>
</html>

Просмотреть файл

@ -0,0 +1,16 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta content="width=device-width, initial-scale=1" name="viewport">
<title>No encoding declaration in parent or child</title>
</head>
<body>
<h1>No encoding declaration in parent or child</h1>
<p>Hiragana letter a if decoded as EUC-JP: ¤¢</p>
<iframe src="file_bug1543077-4-child.html"></iframe>
</body>
</html>

Просмотреть файл

@ -93,6 +93,12 @@ interface nsIBrowser : nsISupports
*/
attribute boolean mayEnableCharacterEncodingMenu;
/**
* Whether or not the character encoding was detected by analyzing
* content (as opposed to reading a protocol label).
*/
attribute boolean charsetAutodetected;
/**
* Called by Gecko to update the browser when its state changes.
*

Просмотреть файл

@ -3539,6 +3539,8 @@ NS_IMETHODIMP BrowserChild::OnStateChange(nsIWebProgress* aWebProgress,
stateChangeData->isNavigating() = docShell->GetIsNavigating();
stateChangeData->mayEnableCharacterEncodingMenu() =
docShell->GetMayEnableCharacterEncodingMenu();
stateChangeData->charsetAutodetected() =
docShell->GetCharsetAutodetected();
if (document && aStateFlags & nsIWebProgressListener::STATE_STOP) {
document->GetContentType(stateChangeData->contentType());

Просмотреть файл

@ -2394,6 +2394,8 @@ mozilla::ipc::IPCResult BrowserParent::RecvOnStateChange(
Unused << browser->SetIsNavigating(aStateChangeData->isNavigating());
Unused << browser->SetMayEnableCharacterEncodingMenu(
aStateChangeData->mayEnableCharacterEncodingMenu());
Unused << browser->SetCharsetAutodetected(
aStateChangeData->charsetAutodetected());
Unused << browser->UpdateForStateChange(aStateChangeData->charset(),
aStateChangeData->documentURI(),
aStateChangeData->contentType());

Просмотреть файл

@ -123,6 +123,7 @@ struct WebProgressStateChangeData
{
bool isNavigating;
bool mayEnableCharacterEncodingMenu;
bool charsetAutodetected;
// The following fields are only set when the aStateFlags param passed with
// this struct is |nsIWebProgress.STATE_STOP|.

Просмотреть файл

@ -4,7 +4,6 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
DIRS += ['src']
TEST_DIRS += ['tests']
with Files('**'):

Просмотреть файл

@ -1,41 +0,0 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "CharDistribution.h"
#include "JISFreq.tab"
#include "mozilla/ArrayUtils.h"
#define SURE_YES 0.99f
#define SURE_NO 0.01f
// return confidence base on received data
float CharDistributionAnalysis::GetConfidence(void) {
// if we didn't receive any character in our consideration range, or the
// number of frequent characters is below the minimum threshold, return
// negative answer
if (mTotalChars <= 0 || mFreqChars <= mDataThreshold) return SURE_NO;
if (mTotalChars != mFreqChars) {
float r =
mFreqChars / ((mTotalChars - mFreqChars) * mTypicalDistributionRatio);
if (r < SURE_YES) return r;
}
// normalize confidence, (we don't want to be 100% sure)
return SURE_YES;
}
SJISDistributionAnalysis::SJISDistributionAnalysis() {
mCharToFreqOrder = JISCharToFreqOrder;
mTableSize = mozilla::ArrayLength(JISCharToFreqOrder);
mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO;
}
EUCJPDistributionAnalysis::EUCJPDistributionAnalysis() {
mCharToFreqOrder = JISCharToFreqOrder;
mTableSize = mozilla::ArrayLength(JISCharToFreqOrder);
mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO;
}

Просмотреть файл

@ -1,201 +0,0 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef CharDistribution_h__
#define CharDistribution_h__
#include "nscore.h"
#define ENOUGH_DATA_THRESHOLD 1024
class CharDistributionAnalysis {
public:
CharDistributionAnalysis() { Reset(); }
// feed a block of data and do distribution analysis
void HandleData(const char* aBuf, uint32_t aLen) {}
// Feed a character with known length
void HandleOneChar(const char* aStr, uint32_t aCharLen) {
int32_t order;
// we only care about 2-bytes character in our distribution analysis
order = (aCharLen == 2) ? GetOrder(aStr) : -1;
if (order >= 0) {
mTotalChars++;
// order is valid
if ((uint32_t)order < mTableSize) {
if (512 > mCharToFreqOrder[order]) mFreqChars++;
}
}
}
// return confidence base on existing data
float GetConfidence(void);
// Reset analyser, clear any state
void Reset() {
mDone = false;
mTotalChars = 0;
mFreqChars = 0;
mDataThreshold = 0;
}
// It is not necessary to receive all data to draw conclusion. For charset
// detection,
// certain amount of data is enough
bool GotEnoughData() { return mTotalChars > ENOUGH_DATA_THRESHOLD; }
protected:
// we do not handle character base on its original encoding string, but
// convert this encoding string to a number, here called order.
// This allow multiple encoding of a language to share one frequency table
virtual int32_t GetOrder(const char* str) { return -1; }
// If this flag is set to true, detection is done and conclusion has been made
bool mDone;
// The number of characters whose frequency order is less than 512
uint32_t mFreqChars;
// Total character encounted.
uint32_t mTotalChars;
// Number of hi-byte characters needed to trigger detection
uint32_t mDataThreshold;
// Mapping table to get frequency order from char order (get from GetOrder())
const int16_t* mCharToFreqOrder;
// Size of above table
uint32_t mTableSize;
// This is a constant value varies from language to language, it is used in
// calculating confidence. See my paper for further detail.
float mTypicalDistributionRatio;
};
class EUCTWDistributionAnalysis : public CharDistributionAnalysis {
public:
EUCTWDistributionAnalysis();
protected:
// for euc-TW encoding, we are interested
// first byte range: 0xc4 -- 0xfe
// second byte range: 0xa1 -- 0xfe
// no validation needed here. State machine has done that
int32_t GetOrder(const char* str) override {
if ((unsigned char)*str >= (unsigned char)0xc4)
return 94 * ((unsigned char)str[0] - (unsigned char)0xc4) +
(unsigned char)str[1] - (unsigned char)0xa1;
else
return -1;
}
};
class EUCKRDistributionAnalysis : public CharDistributionAnalysis {
public:
EUCKRDistributionAnalysis();
protected:
// for euc-KR encoding, we are interested
// first byte range: 0xb0 -- 0xfe
// second byte range: 0xa1 -- 0xfe
// no validation needed here. State machine has done that
int32_t GetOrder(const char* str) override {
if ((unsigned char)*str >= (unsigned char)0xb0)
return 94 * ((unsigned char)str[0] - (unsigned char)0xb0) +
(unsigned char)str[1] - (unsigned char)0xa1;
else
return -1;
}
};
class GB2312DistributionAnalysis : public CharDistributionAnalysis {
public:
GB2312DistributionAnalysis();
protected:
// for GB2312 encoding, we are interested
// first byte range: 0xb0 -- 0xfe
// second byte range: 0xa1 -- 0xfe
// no validation needed here. State machine has done that
int32_t GetOrder(const char* str) override {
if ((unsigned char)*str >= (unsigned char)0xb0 &&
(unsigned char)str[1] >= (unsigned char)0xa1)
return 94 * ((unsigned char)str[0] - (unsigned char)0xb0) +
(unsigned char)str[1] - (unsigned char)0xa1;
else
return -1;
}
};
class Big5DistributionAnalysis : public CharDistributionAnalysis {
public:
Big5DistributionAnalysis();
protected:
// for big5 encoding, we are interested
// first byte range: 0xa4 -- 0xfe
// second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
// no validation needed here. State machine has done that
int32_t GetOrder(const char* str) override {
if ((unsigned char)*str >= (unsigned char)0xa4)
if ((unsigned char)str[1] >= (unsigned char)0xa1)
return 157 * ((unsigned char)str[0] - (unsigned char)0xa4) +
(unsigned char)str[1] - (unsigned char)0xa1 + 63;
else
return 157 * ((unsigned char)str[0] - (unsigned char)0xa4) +
(unsigned char)str[1] - (unsigned char)0x40;
else
return -1;
}
};
class SJISDistributionAnalysis : public CharDistributionAnalysis {
public:
SJISDistributionAnalysis();
protected:
// for sjis encoding, we are interested
// first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
// second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
// no validation needed here. State machine has done that
int32_t GetOrder(const char* str) override {
int32_t order;
if ((unsigned char)*str >= (unsigned char)0x81 &&
(unsigned char)*str <= (unsigned char)0x9f)
order = 188 * ((unsigned char)str[0] - (unsigned char)0x81);
else if ((unsigned char)*str >= (unsigned char)0xe0 &&
(unsigned char)*str <= (unsigned char)0xef)
order = 188 * ((unsigned char)str[0] - (unsigned char)0xe0 + 31);
else
return -1;
order += (unsigned char)*(str + 1) - 0x40;
if ((unsigned char)str[1] > (unsigned char)0x7f) order--;
return order;
}
};
class EUCJPDistributionAnalysis : public CharDistributionAnalysis {
public:
EUCJPDistributionAnalysis();
protected:
// for euc-JP encoding, we are interested
// first byte range: 0xa0 -- 0xfe
// second byte range: 0xa1 -- 0xfe
// no validation needed here. State machine has done that
int32_t GetOrder(const char* str) override {
if ((unsigned char)*str >= (unsigned char)0xa0)
return 94 * ((unsigned char)str[0] - (unsigned char)0xa1) +
(unsigned char)str[1] - (unsigned char)0xa1;
else
return -1;
}
};
#endif // CharDistribution_h__

Просмотреть файл

@ -1,554 +0,0 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
//Sampling from about 20M text materials include literature and computer technology
// Japanese frequency table, applied to both S-JIS and EUC-JP
//They are sorted in order.
/******************************************************************************
* 128 --> 0.77094
* 256 --> 0.85710
* 512 --> 0.92635
* 1024 --> 0.97130
* 2048 --> 0.99431
*
* Idea Distribution Ratio = 0.92635 / (1-0.92635) = 12.58
* Random Distribution Ration = 512 / (2965+62+83+86-512) = 0.191
*
* Typical Distribution Ratio, 25% of IDR
*****************************************************************************/
#define JIS_TYPICAL_DISTRIBUTION_RATIO (float) 3.0
// Char to FreqOrder table
static const int16_t JISCharToFreqOrder[] =
{
40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, // 16
3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, // 32
1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, // 48
2042,1061,1062, 48, 49, 44, 45, 433, 434,1040,1041, 996, 787,2997,1255,4305, // 64
2108,4609,1684,1648,5073,5074,5075,5076,5077,5078,3687,5079,4610,5080,3927,3928, // 80
5081,3296,3432, 290,2285,1471,2187,5082,2580,2825,1303,2140,1739,1445,2691,3375, // 96
1691,3297,4306,4307,4611, 452,3376,1182,2713,3688,3069,4308,5083,5084,5085,5086, // 112
5087,5088,5089,5090,5091,5092,5093,5094,5095,5096,5097,5098,5099,5100,5101,5102, // 128
5103,5104,5105,5106,5107,5108,5109,5110,5111,5112,4097,5113,5114,5115,5116,5117, // 144
5118,5119,5120,5121,5122,5123,5124,5125,5126,5127,5128,5129,5130,5131,5132,5133, // 160
5134,5135,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145,5146,5147,5148,5149, // 176
5150,5151,5152,4612,5153,5154,5155,5156,5157,5158,5159,5160,5161,5162,5163,5164, // 192
5165,5166,5167,5168,5169,5170,5171,5172,5173,5174,5175,1472, 598, 618, 820,1205, // 208
1309,1412,1858,1307,1692,5176,5177,5178,5179,5180,5181,5182,1142,1452,1234,1172, // 224
1875,2043,2149,1793,1382,2973, 925,2404,1067,1241, 960,1377,2935,1491, 919,1217, // 240
1865,2030,1406,1499,2749,4098,5183,5184,5185,5186,5187,5188,2561,4099,3117,1804, // 256
2049,3689,4309,3513,1663,5189,3166,3118,3298,1587,1561,3433,5190,3119,1625,2998, // 272
3299,4613,1766,3690,2786,4614,5191,5192,5193,5194,2161, 26,3377, 2,3929, 20, // 288
3691, 47,4100, 50, 17, 16, 35, 268, 27, 243, 42, 155, 24, 154, 29, 184, // 304
4, 91, 14, 92, 53, 396, 33, 289, 9, 37, 64, 620, 21, 39, 321, 5, // 320
12, 11, 52, 13, 3, 208, 138, 0, 7, 60, 526, 141, 151,1069, 181, 275, // 336
1591, 83, 132,1475, 126, 331, 829, 15, 69, 160, 59, 22, 157, 55,1079, 312, // 352
109, 38, 23, 25, 10, 19, 79,5195, 61, 382,1124, 8, 30,5196,5197,5198, // 368
5199,5200,5201,5202,5203,5204,5205,5206, 89, 62, 74, 34,2416, 112, 139, 196, // 384
271, 149, 84, 607, 131, 765, 46, 88, 153, 683, 76, 874, 101, 258, 57, 80, // 400
32, 364, 121,1508, 169,1547, 68, 235, 145,2999, 41, 360,3027, 70, 63, 31, // 416
43, 259, 262,1383, 99, 533, 194, 66, 93, 846, 217, 192, 56, 106, 58, 565, // 432
280, 272, 311, 256, 146, 82, 308, 71, 100, 128, 214, 655, 110, 261, 104,1140, // 448
54, 51, 36, 87, 67,3070, 185,2618,2936,2020, 28,1066,2390,2059,5207,5208, // 464
5209,5210,5211,5212,5213,5214,5215,5216,4615,5217,5218,5219,5220,5221,5222,5223, // 480
5224,5225,5226,5227,5228,5229,5230,5231,5232,5233,5234,5235,5236,3514,5237,5238, // 496
5239,5240,5241,5242,5243,5244,2297,2031,4616,4310,3692,5245,3071,5246,3598,5247, // 512
4617,3231,3515,5248,4101,4311,4618,3808,4312,4102,5249,4103,4104,3599,5250,5251, // 528
5252,5253,5254,5255,5256,5257,5258,5259,5260,5261,5262,5263,5264,5265,5266,5267, // 544
5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278,5279,5280,5281,5282,5283, // 560
5284,5285,5286,5287,5288,5289,5290,5291,5292,5293,5294,5295,5296,5297,5298,5299, // 576
5300,5301,5302,5303,5304,5305,5306,5307,5308,5309,5310,5311,5312,5313,5314,5315, // 592
5316,5317,5318,5319,5320,5321,5322,5323,5324,5325,5326,5327,5328,5329,5330,5331, // 608
5332,5333,5334,5335,5336,5337,5338,5339,5340,5341,5342,5343,5344,5345,5346,5347, // 624
5348,5349,5350,5351,5352,5353,5354,5355,5356,5357,5358,5359,5360,5361,5362,5363, // 640
5364,5365,5366,5367,5368,5369,5370,5371,5372,5373,5374,5375,5376,5377,5378,5379, // 656
5380,5381, 363, 642,2787,2878,2788,2789,2316,3232,2317,3434,2011, 165,1942,3930, // 672
3931,3932,3933,5382,4619,5383,4620,5384,5385,5386,5387,5388,5389,5390,5391,5392, // 688
5393,5394,5395,5396,5397,5398,5399,5400,5401,5402,5403,5404,5405,5406,5407,5408, // 704
5409,5410,5411,5412,5413,5414,5415,5416,5417,5418,5419,5420,5421,5422,5423,5424, // 720
5425,5426,5427,5428,5429,5430,5431,5432,5433,5434,5435,5436,5437,5438,5439,5440, // 736
5441,5442,5443,5444,5445,5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456, // 752
5457,5458,5459,5460,5461,5462,5463,5464,5465,5466,5467,5468,5469,5470,5471,5472, // 768
5473,5474,5475,5476,5477,5478,5479,5480,5481,5482,5483,5484,5485,5486,5487,5488, // 784
5489,5490,5491,5492,5493,5494,5495,5496,5497,5498,5499,5500,5501,5502,5503,5504, // 800
5505,5506,5507,5508,5509,5510,5511,5512,5513,5514,5515,5516,5517,5518,5519,5520, // 816
5521,5522,5523,5524,5525,5526,5527,5528,5529,5530,5531,5532,5533,5534,5535,5536, // 832
5537,5538,5539,5540,5541,5542,5543,5544,5545,5546,5547,5548,5549,5550,5551,5552, // 848
5553,5554,5555,5556,5557,5558,5559,5560,5561,5562,5563,5564,5565,5566,5567,5568, // 864
5569,5570,5571,5572,5573,5574,5575,5576,5577,5578,5579,5580,5581,5582,5583,5584, // 880
5585,5586,5587,5588,5589,5590,5591,5592,5593,5594,5595,5596,5597,5598,5599,5600, // 896
5601,5602,5603,5604,5605,5606,5607,5608,5609,5610,5611,5612,5613,5614,5615,5616, // 912
5617,5618,5619,5620,5621,5622,5623,5624,5625,5626,5627,5628,5629,5630,5631,5632, // 928
5633,5634,5635,5636,5637,5638,5639,5640,5641,5642,5643,5644,5645,5646,5647,5648, // 944
5649,5650,5651,5652,5653,5654,5655,5656,5657,5658,5659,5660,5661,5662,5663,5664, // 960
5665,5666,5667,5668,5669,5670,5671,5672,5673,5674,5675,5676,5677,5678,5679,5680, // 976
5681,5682,5683,5684,5685,5686,5687,5688,5689,5690,5691,5692,5693,5694,5695,5696, // 992
5697,5698,5699,5700,5701,5702,5703,5704,5705,5706,5707,5708,5709,5710,5711,5712, // 1008
5713,5714,5715,5716,5717,5718,5719,5720,5721,5722,5723,5724,5725,5726,5727,5728, // 1024
5729,5730,5731,5732,5733,5734,5735,5736,5737,5738,5739,5740,5741,5742,5743,5744, // 1040
5745,5746,5747,5748,5749,5750,5751,5752,5753,5754,5755,5756,5757,5758,5759,5760, // 1056
5761,5762,5763,5764,5765,5766,5767,5768,5769,5770,5771,5772,5773,5774,5775,5776, // 1072
5777,5778,5779,5780,5781,5782,5783,5784,5785,5786,5787,5788,5789,5790,5791,5792, // 1088
5793,5794,5795,5796,5797,5798,5799,5800,5801,5802,5803,5804,5805,5806,5807,5808, // 1104
5809,5810,5811,5812,5813,5814,5815,5816,5817,5818,5819,5820,5821,5822,5823,5824, // 1120
5825,5826,5827,5828,5829,5830,5831,5832,5833,5834,5835,5836,5837,5838,5839,5840, // 1136
5841,5842,5843,5844,5845,5846,5847,5848,5849,5850,5851,5852,5853,5854,5855,5856, // 1152
5857,5858,5859,5860,5861,5862,5863,5864,5865,5866,5867,5868,5869,5870,5871,5872, // 1168
5873,5874,5875,5876,5877,5878,5879,5880,5881,5882,5883,5884,5885,5886,5887,5888, // 1184
5889,5890,5891,5892,5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904, // 1200
5905,5906,5907,5908,5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920, // 1216
5921,5922,5923,5924,5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,5936, // 1232
5937,5938,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,5952, // 1248
5953,5954,5955,5956,5957,5958,5959,5960,5961,5962,5963,5964,5965,5966,5967,5968, // 1264
5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984, // 1280
5985,5986,5987,5988,5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999,6000, // 1296
6001,6002,6003,6004,6005,6006,6007,6008,6009,6010,6011,6012,6013,6014,6015,6016, // 1312
6017,6018,6019,6020,6021,6022,6023,6024,6025,6026,6027,6028,6029,6030,6031,6032, // 1328
6033,6034,6035,6036,6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048, // 1344
6049,6050,6051,6052,6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064, // 1360
6065,6066,6067,6068,6069,6070,6071,6072,6073,6074,6075,6076,6077,6078,6079,6080, // 1376
6081,6082,6083,6084,6085,6086,6087,6088,6089,6090,6091,6092,6093,6094,6095,6096, // 1392
6097,6098,6099,6100,6101,6102,6103,6104,6105,6106,6107,6108,6109,6110,6111,6112, // 1408
6113,6114,2044,2060,4621, 997,1235, 473,1186,4622, 920,3378,6115,6116, 379,1108, // 1424
4313,2657,2735,3934,6117,3809, 636,3233, 573,1026,3693,3435,2974,3300,2298,4105, // 1440
854,2937,2463, 393,2581,2417, 539, 752,1280,2750,2480, 140,1161, 440, 708,1569, // 1456
665,2497,1746,1291,1523,3000, 164,1603, 847,1331, 537,1997, 486, 508,1693,2418, // 1472
1970,2227, 878,1220, 299,1030, 969, 652,2751, 624,1137,3301,2619, 65,3302,2045, // 1488
1761,1859,3120,1930,3694,3516, 663,1767, 852, 835,3695, 269, 767,2826,2339,1305, // 1504
896,1150, 770,1616,6118, 506,1502,2075,1012,2519, 775,2520,2975,2340,2938,4314, // 1520
3028,2086,1224,1943,2286,6119,3072,4315,2240,1273,1987,3935,1557, 175, 597, 985, // 1536
3517,2419,2521,1416,3029, 585, 938,1931,1007,1052,1932,1685,6120,3379,4316,4623, // 1552
804, 599,3121,1333,2128,2539,1159,1554,2032,3810, 687,2033,2904, 952, 675,1467, // 1568
3436,6121,2241,1096,1786,2440,1543,1924, 980,1813,2228, 781,2692,1879, 728,1918, // 1584
3696,4624, 548,1950,4625,1809,1088,1356,3303,2522,1944, 502, 972, 373, 513,2827, // 1600
586,2377,2391,1003,1976,1631,6122,2464,1084, 648,1776,4626,2141, 324, 962,2012, // 1616
2177,2076,1384, 742,2178,1448,1173,1810, 222, 102, 301, 445, 125,2420, 662,2498, // 1632
277, 200,1476,1165,1068, 224,2562,1378,1446, 450,1880, 659, 791, 582,4627,2939, // 1648
3936,1516,1274, 555,2099,3697,1020,1389,1526,3380,1762,1723,1787,2229, 412,2114, // 1664
1900,2392,3518, 512,2597, 427,1925,2341,3122,1653,1686,2465,2499, 697, 330, 273, // 1680
380,2162, 951, 832, 780, 991,1301,3073, 965,2270,3519, 668,2523,2636,1286, 535, // 1696
1407, 518, 671, 957,2658,2378, 267, 611,2197,3030,6123, 248,2299, 967,1799,2356, // 1712
850,1418,3437,1876,1256,1480,2828,1718,6124,6125,1755,1664,2405,6126,4628,2879, // 1728
2829, 499,2179, 676,4629, 557,2329,2214,2090, 325,3234, 464, 811,3001, 992,2342, // 1744
2481,1232,1469, 303,2242, 466,1070,2163, 603,1777,2091,4630,2752,4631,2714, 322, // 1760
2659,1964,1768, 481,2188,1463,2330,2857,3600,2092,3031,2421,4632,2318,2070,1849, // 1776
2598,4633,1302,2254,1668,1701,2422,3811,2905,3032,3123,2046,4106,1763,1694,4634, // 1792
1604, 943,1724,1454, 917, 868,2215,1169,2940, 552,1145,1800,1228,1823,1955, 316, // 1808
1080,2510, 361,1807,2830,4107,2660,3381,1346,1423,1134,4108,6127, 541,1263,1229, // 1824
1148,2540, 545, 465,1833,2880,3438,1901,3074,2482, 816,3937, 713,1788,2500, 122, // 1840
1575, 195,1451,2501,1111,6128, 859, 374,1225,2243,2483,4317, 390,1033,3439,3075, // 1856
2524,1687, 266, 793,1440,2599, 946, 779, 802, 507, 897,1081, 528,2189,1292, 711, // 1872
1866,1725,1167,1640, 753, 398,2661,1053, 246, 348,4318, 137,1024,3440,1600,2077, // 1888
2129, 825,4319, 698, 238, 521, 187,2300,1157,2423,1641,1605,1464,1610,1097,2541, // 1904
1260,1436, 759,2255,1814,2150, 705,3235, 409,2563,3304, 561,3033,2005,2564, 726, // 1920
1956,2343,3698,4109, 949,3812,3813,3520,1669, 653,1379,2525, 881,2198, 632,2256, // 1936
1027, 778,1074, 733,1957, 514,1481,2466, 554,2180, 702,3938,1606,1017,1398,6129, // 1952
1380,3521, 921, 993,1313, 594, 449,1489,1617,1166, 768,1426,1360, 495,1794,3601, // 1968
1177,3602,1170,4320,2344, 476, 425,3167,4635,3168,1424, 401,2662,1171,3382,1998, // 1984
1089,4110, 477,3169, 474,6130,1909, 596,2831,1842, 494, 693,1051,1028,1207,3076, // 2000
606,2115, 727,2790,1473,1115, 743,3522, 630, 805,1532,4321,2021, 366,1057, 838, // 2016
684,1114,2142,4322,2050,1492,1892,1808,2271,3814,2424,1971,1447,1373,3305,1090, // 2032
1536,3939,3523,3306,1455,2199, 336, 369,2331,1035, 584,2393, 902, 718,2600,6131, // 2048
2753, 463,2151,1149,1611,2467, 715,1308,3124,1268, 343,1413,3236,1517,1347,2663, // 2064
2093,3940,2022,1131,1553,2100,2941,1427,3441,2942,1323,2484,6132,1980, 872,2368, // 2080
2441,2943, 320,2369,2116,1082, 679,1933,3941,2791,3815, 625,1143,2023, 422,2200, // 2096
3816,6133, 730,1695, 356,2257,1626,2301,2858,2637,1627,1778, 937, 883,2906,2693, // 2112
3002,1769,1086, 400,1063,1325,3307,2792,4111,3077, 456,2345,1046, 747,6134,1524, // 2128
884,1094,3383,1474,2164,1059, 974,1688,2181,2258,1047, 345,1665,1187, 358, 875, // 2144
3170, 305, 660,3524,2190,1334,1135,3171,1540,1649,2542,1527, 927, 968,2793, 885, // 2160
1972,1850, 482, 500,2638,1218,1109,1085,2543,1654,2034, 876, 78,2287,1482,1277, // 2176
861,1675,1083,1779, 724,2754, 454, 397,1132,1612,2332, 893, 672,1237, 257,2259, // 2192
2370, 135,3384, 337,2244, 547, 352, 340, 709,2485,1400, 788,1138,2511, 540, 772, // 2208
1682,2260,2272,2544,2013,1843,1902,4636,1999,1562,2288,4637,2201,1403,1533, 407, // 2224
576,3308,1254,2071, 978,3385, 170, 136,1201,3125,2664,3172,2394, 213, 912, 873, // 2240
3603,1713,2202, 699,3604,3699, 813,3442, 493, 531,1054, 468,2907,1483, 304, 281, // 2256
4112,1726,1252,2094, 339,2319,2130,2639, 756,1563,2944, 748, 571,2976,1588,2425, // 2272
2715,1851,1460,2426,1528,1392,1973,3237, 288,3309, 685,3386, 296, 892,2716,2216, // 2288
1570,2245, 722,1747,2217, 905,3238,1103,6135,1893,1441,1965, 251,1805,2371,3700, // 2304
2601,1919,1078, 75,2182,1509,1592,1270,2640,4638,2152,6136,3310,3817, 524, 706, // 2320
1075, 292,3818,1756,2602, 317, 98,3173,3605,3525,1844,2218,3819,2502, 814, 567, // 2336
385,2908,1534,6137, 534,1642,3239, 797,6138,1670,1529, 953,4323, 188,1071, 538, // 2352
178, 729,3240,2109,1226,1374,2000,2357,2977, 731,2468,1116,2014,2051,6139,1261, // 2368
1593, 803,2859,2736,3443, 556, 682, 823,1541,6140,1369,2289,1706,2794, 845, 462, // 2384
2603,2665,1361, 387, 162,2358,1740, 739,1770,1720,1304,1401,3241,1049, 627,1571, // 2400
2427,3526,1877,3942,1852,1500, 431,1910,1503, 677, 297,2795, 286,1433,1038,1198, // 2416
2290,1133,1596,4113,4639,2469,1510,1484,3943,6141,2442, 108, 712,4640,2372, 866, // 2432
3701,2755,3242,1348, 834,1945,1408,3527,2395,3243,1811, 824, 994,1179,2110,1548, // 2448
1453, 790,3003, 690,4324,4325,2832,2909,3820,1860,3821, 225,1748, 310, 346,1780, // 2464
2470, 821,1993,2717,2796, 828, 877,3528,2860,2471,1702,2165,2910,2486,1789, 453, // 2480
359,2291,1676, 73,1164,1461,1127,3311, 421, 604, 314,1037, 589, 116,2487, 737, // 2496
837,1180, 111, 244, 735,6142,2261,1861,1362, 986, 523, 418, 581,2666,3822, 103, // 2512
855, 503,1414,1867,2488,1091, 657,1597, 979, 605,1316,4641,1021,2443,2078,2001, // 2528
1209, 96, 587,2166,1032, 260,1072,2153, 173, 94, 226,3244, 819,2006,4642,4114, // 2544
2203, 231,1744, 782, 97,2667, 786,3387, 887, 391, 442,2219,4326,1425,6143,2694, // 2560
633,1544,1202, 483,2015, 592,2052,1958,2472,1655, 419, 129,4327,3444,3312,1714, // 2576
1257,3078,4328,1518,1098, 865,1310,1019,1885,1512,1734, 469,2444, 148, 773, 436, // 2592
1815,1868,1128,1055,4329,1245,2756,3445,2154,1934,1039,4643, 579,1238, 932,2320, // 2608
353, 205, 801, 115,2428, 944,2321,1881, 399,2565,1211, 678, 766,3944, 335,2101, // 2624
1459,1781,1402,3945,2737,2131,1010, 844, 981,1326,1013, 550,1816,1545,2620,1335, // 2640
1008, 371,2881, 936,1419,1613,3529,1456,1395,2273,1834,2604,1317,2738,2503, 416, // 2656
1643,4330, 806,1126, 229, 591,3946,1314,1981,1576,1837,1666, 347,1790, 977,3313, // 2672
764,2861,1853, 688,2429,1920,1462, 77, 595, 415,2002,3034, 798,1192,4115,6144, // 2688
2978,4331,3035,2695,2582,2072,2566, 430,2430,1727, 842,1396,3947,3702, 613, 377, // 2704
278, 236,1417,3388,3314,3174, 757,1869, 107,3530,6145,1194, 623,2262, 207,1253, // 2720
2167,3446,3948, 492,1117,1935, 536,1838,2757,1246,4332, 696,2095,2406,1393,1572, // 2736
3175,1782, 583, 190, 253,1390,2230, 830,3126,3389, 934,3245,1703,1749,2979,1870, // 2752
2545,1656,2204, 869,2346,4116,3176,1817, 496,1764,4644, 942,1504, 404,1903,1122, // 2768
1580,3606,2945,1022, 515, 372,1735, 955,2431,3036,6146,2797,1110,2302,2798, 617, // 2784
6147, 441, 762,1771,3447,3607,3608,1904, 840,3037, 86, 939,1385, 572,1370,2445, // 2800
1336, 114,3703, 898, 294, 203,3315, 703,1583,2274, 429, 961,4333,1854,1951,3390, // 2816
2373,3704,4334,1318,1381, 966,1911,2322,1006,1155, 309, 989, 458,2718,1795,1372, // 2832
1203, 252,1689,1363,3177, 517,1936, 168,1490, 562, 193,3823,1042,4117,1835, 551, // 2848
470,4645, 395, 489,3448,1871,1465,2583,2641, 417,1493, 279,1295, 511,1236,1119, // 2864
72,1231,1982,1812,3004, 871,1564, 984,3449,1667,2696,2096,4646,2347,2833,1673, // 2880
3609, 695,3246,2668, 807,1183,4647, 890, 388,2333,1801,1457,2911,1765,1477,1031, // 2896
3316,3317,1278,3391,2799,2292,2526, 163,3450,4335,2669,1404,1802,6148,2323,2407, // 2912
1584,1728,1494,1824,1269, 298, 909,3318,1034,1632, 375, 776,1683,2061, 291, 210, // 2928
1123, 809,1249,1002,2642,3038, 206,1011,2132, 144, 975, 882,1565, 342, 667, 754, // 2944
1442,2143,1299,2303,2062, 447, 626,2205,1221,2739,2912,1144,1214,2206,2584, 760, // 2960
1715, 614, 950,1281,2670,2621, 810, 577,1287,2546,4648, 242,2168, 250,2643, 691, // 2976
123,2644, 647, 313,1029, 689,1357,2946,1650, 216, 771,1339,1306, 808,2063, 549, // 2992
913,1371,2913,2914,6149,1466,1092,1174,1196,1311,2605,2396,1783,1796,3079, 406, // 3008
2671,2117,3949,4649, 487,1825,2220,6150,2915, 448,2348,1073,6151,2397,1707, 130, // 3024
900,1598, 329, 176,1959,2527,1620,6152,2275,4336,3319,1983,2191,3705,3610,2155, // 3040
3706,1912,1513,1614,6153,1988, 646, 392,2304,1589,3320,3039,1826,1239,1352,1340, // 3056
2916, 505,2567,1709,1437,2408,2547, 906,6154,2672, 384,1458,1594,1100,1329, 710, // 3072
423,3531,2064,2231,2622,1989,2673,1087,1882, 333, 841,3005,1296,2882,2379, 580, // 3088
1937,1827,1293,2585, 601, 574, 249,1772,4118,2079,1120, 645, 901,1176,1690, 795, // 3104
2207, 478,1434, 516,1190,1530, 761,2080, 930,1264, 355, 435,1552, 644,1791, 987, // 3120
220,1364,1163,1121,1538, 306,2169,1327,1222, 546,2645, 218, 241, 610,1704,3321, // 3136
1984,1839,1966,2528, 451,6155,2586,3707,2568, 907,3178, 254,2947, 186,1845,4650, // 3152
745, 432,1757, 428,1633, 888,2246,2221,2489,3611,2118,1258,1265, 956,3127,1784, // 3168
4337,2490, 319, 510, 119, 457,3612, 274,2035,2007,4651,1409,3128, 970,2758, 590, // 3184
2800, 661,2247,4652,2008,3950,1420,1549,3080,3322,3951,1651,1375,2111, 485,2491, // 3200
1429,1156,6156,2548,2183,1495, 831,1840,2529,2446, 501,1657, 307,1894,3247,1341, // 3216
666, 899,2156,1539,2549,1559, 886, 349,2208,3081,2305,1736,3824,2170,2759,1014, // 3232
1913,1386, 542,1397,2948, 490, 368, 716, 362, 159, 282,2569,1129,1658,1288,1750, // 3248
2674, 276, 649,2016, 751,1496, 658,1818,1284,1862,2209,2087,2512,3451, 622,2834, // 3264
376, 117,1060,2053,1208,1721,1101,1443, 247,1250,3179,1792,3952,2760,2398,3953, // 3280
6157,2144,3708, 446,2432,1151,2570,3452,2447,2761,2835,1210,2448,3082, 424,2222, // 3296
1251,2449,2119,2836, 504,1581,4338, 602, 817, 857,3825,2349,2306, 357,3826,1470, // 3312
1883,2883, 255, 958, 929,2917,3248, 302,4653,1050,1271,1751,2307,1952,1430,2697, // 3328
2719,2359, 354,3180, 777, 158,2036,4339,1659,4340,4654,2308,2949,2248,1146,2232, // 3344
3532,2720,1696,2623,3827,6158,3129,1550,2698,1485,1297,1428, 637, 931,2721,2145, // 3360
914,2550,2587, 81,2450, 612, 827,2646,1242,4655,1118,2884, 472,1855,3181,3533, // 3376
3534, 569,1353,2699,1244,1758,2588,4119,2009,2762,2171,3709,1312,1531,6159,1152, // 3392
1938, 134,1830, 471,3710,2276,1112,1535,3323,3453,3535, 982,1337,2950, 488, 826, // 3408
674,1058,1628,4120,2017, 522,2399, 211, 568,1367,3454, 350, 293,1872,1139,3249, // 3424
1399,1946,3006,1300,2360,3324, 588, 736,6160,2606, 744, 669,3536,3828,6161,1358, // 3440
199, 723, 848, 933, 851,1939,1505,1514,1338,1618,1831,4656,1634,3613, 443,2740, // 3456
3829, 717,1947, 491,1914,6162,2551,1542,4121,1025,6163,1099,1223, 198,3040,2722, // 3472
370, 410,1905,2589, 998,1248,3182,2380, 519,1449,4122,1710, 947, 928,1153,4341, // 3488
2277, 344,2624,1511, 615, 105, 161,1212,1076,1960,3130,2054,1926,1175,1906,2473, // 3504
414,1873,2801,6164,2309, 315,1319,3325, 318,2018,2146,2157, 963, 631, 223,4342, // 3520
4343,2675, 479,3711,1197,2625,3712,2676,2361,6165,4344,4123,6166,2451,3183,1886, // 3536
2184,1674,1330,1711,1635,1506, 799, 219,3250,3083,3954,1677,3713,3326,2081,3614, // 3552
1652,2073,4657,1147,3041,1752, 643,1961, 147,1974,3955,6167,1716,2037, 918,3007, // 3568
1994, 120,1537, 118, 609,3184,4345, 740,3455,1219, 332,1615,3830,6168,1621,2980, // 3584
1582, 783, 212, 553,2350,3714,1349,2433,2082,4124, 889,6169,2310,1275,1410, 973, // 3600
166,1320,3456,1797,1215,3185,2885,1846,2590,2763,4658, 629, 822,3008, 763, 940, // 3616
1990,2862, 439,2409,1566,1240,1622, 926,1282,1907,2764, 654,2210,1607, 327,1130, // 3632
3956,1678,1623,6170,2434,2192, 686, 608,3831,3715, 903,3957,3042,6171,2741,1522, // 3648
1915,1105,1555,2552,1359, 323,3251,4346,3457, 738,1354,2553,2311,2334,1828,2003, // 3664
3832,1753,2351,1227,6172,1887,4125,1478,6173,2410,1874,1712,1847, 520,1204,2607, // 3680
264,4659, 836,2677,2102, 600,4660,3833,2278,3084,6174,4347,3615,1342, 640, 532, // 3696
543,2608,1888,2400,2591,1009,4348,1497, 341,1737,3616,2723,1394, 529,3252,1321, // 3712
983,4661,1515,2120, 971,2592, 924, 287,1662,3186,4349,2700,4350,1519, 908,1948, // 3728
2452, 156, 796,1629,1486,2223,2055, 694,4126,1259,1036,3392,1213,2249,2742,1889, // 3744
1230,3958,1015, 910, 408, 559,3617,4662, 746, 725, 935,4663,3959,3009,1289, 563, // 3760
867,4664,3960,1567,2981,2038,2626, 988,2263,2381,4351, 143,2374, 704,1895,6175, // 3776
1188,3716,2088, 673,3085,2362,4352, 484,1608,1921,2765,2918, 215, 904,3618,3537, // 3792
894, 509, 976,3043,2701,3961,4353,2837,2982, 498,6176,6177,1102,3538,1332,3393, // 3808
1487,1636,1637, 233, 245,3962, 383, 650, 995,3044, 460,1520,1206,2352, 749,3327, // 3824
530, 700, 389,1438,1560,1773,3963,2264, 719,2951,2724,3834, 870,1832,1644,1000, // 3840
839,2474,3717, 197,1630,3394, 365,2886,3964,1285,2133, 734, 922, 818,1106, 732, // 3856
480,2083,1774,3458, 923,2279,1350, 221,3086, 85,2233,2234,3835,1585,3010,2147, // 3872
1387,1705,2382,1619,2475, 133, 239,2802,1991,1016,2084,2383, 411,2838,1113, 651, // 3888
1985,1160,3328, 990,1863,3087,1048,1276,2647, 265,2627,1599,3253,2056, 150, 638, // 3904
2019, 656, 853, 326,1479, 680,1439,4354,1001,1759, 413,3459,3395,2492,1431, 459, // 3920
4355,1125,3329,2265,1953,1450,2065,2863, 849, 351,2678,3131,3254,3255,1104,1577, // 3936
227,1351,1645,2453,2193,1421,2887, 812,2121, 634, 95,2435, 201,2312,4665,1646, // 3952
1671,2743,1601,2554,2702,2648,2280,1315,1366,2089,3132,1573,3718,3965,1729,1189, // 3968
328,2679,1077,1940,1136, 558,1283, 964,1195, 621,2074,1199,1743,3460,3619,1896, // 3984
1916,1890,3836,2952,1154,2112,1064, 862, 378,3011,2066,2113,2803,1568,2839,6178, // 4000
3088,2919,1941,1660,2004,1992,2194, 142, 707,1590,1708,1624,1922,1023,1836,1233, // 4016
1004,2313, 789, 741,3620,6179,1609,2411,1200,4127,3719,3720,4666,2057,3721, 593, // 4032
2840, 367,2920,1878,6180,3461,1521, 628,1168, 692,2211,2649, 300, 720,2067,2571, // 4048
2953,3396, 959,2504,3966,3539,3462,1977, 701,6181, 954,1043, 800, 681, 183,3722, // 4064
1803,1730,3540,4128,2103, 815,2314, 174, 467, 230,2454,1093,2134, 755,3541,3397, // 4080
1141,1162,6182,1738,2039, 270,3256,2513,1005,1647,2185,3837, 858,1679,1897,1719, // 4096
2954,2324,1806, 402, 670, 167,4129,1498,2158,2104, 750,6183, 915, 189,1680,1551, // 4112
455,4356,1501,2455, 405,1095,2955, 338,1586,1266,1819, 570, 641,1324, 237,1556, // 4128
2650,1388,3723,6184,1368,2384,1343,1978,3089,2436, 879,3724, 792,1191, 758,3012, // 4144
1411,2135,1322,4357, 240,4667,1848,3725,1574,6185, 420,3045,1546,1391, 714,4358, // 4160
1967, 941,1864, 863, 664, 426, 560,1731,2680,1785,2864,1949,2363, 403,3330,1415, // 4176
1279,2136,1697,2335, 204, 721,2097,3838, 90,6186,2085,2505, 191,3967, 124,2148, // 4192
1376,1798,1178,1107,1898,1405, 860,4359,1243,1272,2375,2983,1558,2456,1638, 113, // 4208
3621, 578,1923,2609, 880, 386,4130, 784,2186,2266,1422,2956,2172,1722, 497, 263, // 4224
2514,1267,2412,2610, 177,2703,3542, 774,1927,1344, 616,1432,1595,1018, 172,4360, // 4240
2325, 911,4361, 438,1468,3622, 794,3968,2024,2173,1681,1829,2957, 945, 895,3090, // 4256
575,2212,2476, 475,2401,2681, 785,2744,1745,2293,2555,1975,3133,2865, 394,4668, // 4272
3839, 635,4131, 639, 202,1507,2195,2766,1345,1435,2572,3726,1908,1184,1181,2457, // 4288
3727,3134,4362, 843,2611, 437, 916,4669, 234, 769,1884,3046,3047,3623, 833,6187, // 4304
1639,2250,2402,1355,1185,2010,2047, 999, 525,1732,1290,1488,2612, 948,1578,3728, // 4320
2413,2477,1216,2725,2159, 334,3840,1328,3624,2921,1525,4132, 564,1056, 891,4363, // 4336
1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, // 4352
2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, // 4368 //last 512
/***************************************************************************************
*Everything below is of no interest for detection purpose *
***************************************************************************************
2138,2122,3730,2888,1995,1820,1044,6190,6191,6192,6193,6194,6195,6196,6197,6198, // 4384
6199,6200,6201,6202,6203,6204,6205,4670,6206,6207,6208,6209,6210,6211,6212,6213, // 4400
6214,6215,6216,6217,6218,6219,6220,6221,6222,6223,6224,6225,6226,6227,6228,6229, // 4416
6230,6231,6232,6233,6234,6235,6236,6237,3187,6238,6239,3969,6240,6241,6242,6243, // 4432
6244,4671,6245,6246,4672,6247,6248,4133,6249,6250,4364,6251,2923,2556,2613,4673, // 4448
4365,3970,6252,6253,6254,6255,4674,6256,6257,6258,2768,2353,4366,4675,4676,3188, // 4464
4367,3463,6259,4134,4677,4678,6260,2267,6261,3842,3332,4368,3543,6262,6263,6264, // 4480
3013,1954,1928,4135,4679,6265,6266,2478,3091,6267,4680,4369,6268,6269,1699,6270, // 4496
3544,4136,4681,6271,4137,6272,4370,2804,6273,6274,2593,3971,3972,4682,6275,2236, // 4512
4683,6276,6277,4684,6278,6279,4138,3973,4685,6280,6281,3258,6282,6283,6284,6285, // 4528
3974,4686,2841,3975,6286,6287,3545,6288,6289,4139,4687,4140,6290,4141,6291,4142, // 4544
6292,6293,3333,6294,6295,6296,4371,6297,3399,6298,6299,4372,3976,6300,6301,6302, // 4560
4373,6303,6304,3843,3731,6305,4688,4374,6306,6307,3259,2294,6308,3732,2530,4143, // 4576
6309,4689,6310,6311,6312,3048,6313,6314,4690,3733,2237,6315,6316,2282,3334,6317, // 4592
6318,3844,6319,6320,4691,6321,3400,4692,6322,4693,6323,3049,6324,4375,6325,3977, // 4608
6326,6327,6328,3546,6329,4694,3335,6330,4695,4696,6331,6332,6333,6334,4376,3978, // 4624
6335,4697,3979,4144,6336,3980,4698,6337,6338,6339,6340,6341,4699,4700,4701,6342, // 4640
6343,4702,6344,6345,4703,6346,6347,4704,6348,4705,4706,3135,6349,4707,6350,4708, // 4656
6351,4377,6352,4709,3734,4145,6353,2506,4710,3189,6354,3050,4711,3981,6355,3547, // 4672
3014,4146,4378,3735,2651,3845,3260,3136,2224,1986,6356,3401,6357,4712,2594,3627, // 4688
3137,2573,3736,3982,4713,3628,4714,4715,2682,3629,4716,6358,3630,4379,3631,6359, // 4704
6360,6361,3983,6362,6363,6364,6365,4147,3846,4717,6366,6367,3737,2842,6368,4718, // 4720
2628,6369,3261,6370,2386,6371,6372,3738,3984,4719,3464,4720,3402,6373,2924,3336, // 4736
4148,2866,6374,2805,3262,4380,2704,2069,2531,3138,2806,2984,6375,2769,6376,4721, // 4752
4722,3403,6377,6378,3548,6379,6380,2705,3092,1979,4149,2629,3337,2889,6381,3338, // 4768
4150,2557,3339,4381,6382,3190,3263,3739,6383,4151,4723,4152,2558,2574,3404,3191, // 4784
6384,6385,4153,6386,4724,4382,6387,6388,4383,6389,6390,4154,6391,4725,3985,6392, // 4800
3847,4155,6393,6394,6395,6396,6397,3465,6398,4384,6399,6400,6401,6402,6403,6404, // 4816
4156,6405,6406,6407,6408,2123,6409,6410,2326,3192,4726,6411,6412,6413,6414,4385, // 4832
4157,6415,6416,4158,6417,3093,3848,6418,3986,6419,6420,3849,6421,6422,6423,4159, // 4848
6424,6425,4160,6426,3740,6427,6428,6429,6430,3987,6431,4727,6432,2238,6433,6434, // 4864
4386,3988,6435,6436,3632,6437,6438,2843,6439,6440,6441,6442,3633,6443,2958,6444, // 4880
6445,3466,6446,2364,4387,3850,6447,4388,2959,3340,6448,3851,6449,4728,6450,6451, // 4896
3264,4729,6452,3193,6453,4389,4390,2706,3341,4730,6454,3139,6455,3194,6456,3051, // 4912
2124,3852,1602,4391,4161,3853,1158,3854,4162,3989,4392,3990,4731,4732,4393,2040, // 4928
4163,4394,3265,6457,2807,3467,3855,6458,6459,6460,3991,3468,4733,4734,6461,3140, // 4944
2960,6462,4735,6463,6464,6465,6466,4736,4737,4738,4739,6467,6468,4164,2403,3856, // 4960
6469,6470,2770,2844,6471,4740,6472,6473,6474,6475,6476,6477,6478,3195,6479,4741, // 4976
4395,6480,2867,6481,4742,2808,6482,2493,4165,6483,6484,6485,6486,2295,4743,6487, // 4992
6488,6489,3634,6490,6491,6492,6493,6494,6495,6496,2985,4744,6497,6498,4745,6499, // 5008
6500,2925,3141,4166,6501,6502,4746,6503,6504,4747,6505,6506,6507,2890,6508,6509, // 5024
6510,6511,6512,6513,6514,6515,6516,6517,6518,6519,3469,4167,6520,6521,6522,4748, // 5040
4396,3741,4397,4749,4398,3342,2125,4750,6523,4751,4752,4753,3052,6524,2961,4168, // 5056
6525,4754,6526,4755,4399,2926,4169,6527,3857,6528,4400,4170,6529,4171,6530,6531, // 5072
2595,6532,6533,6534,6535,3635,6536,6537,6538,6539,6540,6541,6542,4756,6543,6544, // 5088
6545,6546,6547,6548,4401,6549,6550,6551,6552,4402,3405,4757,4403,6553,6554,6555, // 5104
4172,3742,6556,6557,6558,3992,3636,6559,6560,3053,2726,6561,3549,4173,3054,4404, // 5120
6562,6563,3993,4405,3266,3550,2809,4406,6564,6565,6566,4758,4759,6567,3743,6568, // 5136
4760,3744,4761,3470,6569,6570,6571,4407,6572,3745,4174,6573,4175,2810,4176,3196, // 5152
4762,6574,4177,6575,6576,2494,2891,3551,6577,6578,3471,6579,4408,6580,3015,3197, // 5168
6581,3343,2532,3994,3858,6582,3094,3406,4409,6583,2892,4178,4763,4410,3016,4411, // 5184
6584,3995,3142,3017,2683,6585,4179,6586,6587,4764,4412,6588,6589,4413,6590,2986, // 5200
6591,2962,3552,6592,2963,3472,6593,6594,4180,4765,6595,6596,2225,3267,4414,6597, // 5216
3407,3637,4766,6598,6599,3198,6600,4415,6601,3859,3199,6602,3473,4767,2811,4416, // 5232
1856,3268,3200,2575,3996,3997,3201,4417,6603,3095,2927,6604,3143,6605,2268,6606, // 5248
3998,3860,3096,2771,6607,6608,3638,2495,4768,6609,3861,6610,3269,2745,4769,4181, // 5264
3553,6611,2845,3270,6612,6613,6614,3862,6615,6616,4770,4771,6617,3474,3999,4418, // 5280
4419,6618,3639,3344,6619,4772,4182,6620,2126,6621,6622,6623,4420,4773,6624,3018, // 5296
6625,4774,3554,6626,4183,2025,3746,6627,4184,2707,6628,4421,4422,3097,1775,4185, // 5312
3555,6629,6630,2868,6631,6632,4423,6633,6634,4424,2414,2533,2928,6635,4186,2387, // 5328
6636,4775,6637,4187,6638,1891,4425,3202,3203,6639,6640,4776,6641,3345,6642,6643, // 5344
3640,6644,3475,3346,3641,4000,6645,3144,6646,3098,2812,4188,3642,3204,6647,3863, // 5360
3476,6648,3864,6649,4426,4001,6650,6651,6652,2576,6653,4189,4777,6654,6655,6656, // 5376
2846,6657,3477,3205,4002,6658,4003,6659,3347,2252,6660,6661,6662,4778,6663,6664, // 5392
6665,6666,6667,6668,6669,4779,4780,2048,6670,3478,3099,6671,3556,3747,4004,6672, // 5408
6673,6674,3145,4005,3748,6675,6676,6677,6678,6679,3408,6680,6681,6682,6683,3206, // 5424
3207,6684,6685,4781,4427,6686,4782,4783,4784,6687,6688,6689,4190,6690,6691,3479, // 5440
6692,2746,6693,4428,6694,6695,6696,6697,6698,6699,4785,6700,6701,3208,2727,6702, // 5456
3146,6703,6704,3409,2196,6705,4429,6706,6707,6708,2534,1996,6709,6710,6711,2747, // 5472
6712,6713,6714,4786,3643,6715,4430,4431,6716,3557,6717,4432,4433,6718,6719,6720, // 5488
6721,3749,6722,4006,4787,6723,6724,3644,4788,4434,6725,6726,4789,2772,6727,6728, // 5504
6729,6730,6731,2708,3865,2813,4435,6732,6733,4790,4791,3480,6734,6735,6736,6737, // 5520
4436,3348,6738,3410,4007,6739,6740,4008,6741,6742,4792,3411,4191,6743,6744,6745, // 5536
6746,6747,3866,6748,3750,6749,6750,6751,6752,6753,6754,6755,3867,6756,4009,6757, // 5552
4793,4794,6758,2814,2987,6759,6760,6761,4437,6762,6763,6764,6765,3645,6766,6767, // 5568
3481,4192,6768,3751,6769,6770,2174,6771,3868,3752,6772,6773,6774,4193,4795,4438, // 5584
3558,4796,4439,6775,4797,6776,6777,4798,6778,4799,3559,4800,6779,6780,6781,3482, // 5600
6782,2893,6783,6784,4194,4801,4010,6785,6786,4440,6787,4011,6788,6789,6790,6791, // 5616
6792,6793,4802,6794,6795,6796,4012,6797,6798,6799,6800,3349,4803,3483,6801,4804, // 5632
4195,6802,4013,6803,6804,4196,6805,4014,4015,6806,2847,3271,2848,6807,3484,6808, // 5648
6809,6810,4441,6811,4442,4197,4443,3272,4805,6812,3412,4016,1579,6813,6814,4017, // 5664
6815,3869,6816,2964,6817,4806,6818,6819,4018,3646,6820,6821,4807,4019,4020,6822, // 5680
6823,3560,6824,6825,4021,4444,6826,4198,6827,6828,4445,6829,6830,4199,4808,6831, // 5696
6832,6833,3870,3019,2458,6834,3753,3413,3350,6835,4809,3871,4810,3561,4446,6836, // 5712
6837,4447,4811,4812,6838,2459,4448,6839,4449,6840,6841,4022,3872,6842,4813,4814, // 5728
6843,6844,4815,4200,4201,4202,6845,4023,6846,6847,4450,3562,3873,6848,6849,4816, // 5744
4817,6850,4451,4818,2139,6851,3563,6852,6853,3351,6854,6855,3352,4024,2709,3414, // 5760
4203,4452,6856,4204,6857,6858,3874,3875,6859,6860,4819,6861,6862,6863,6864,4453, // 5776
3647,6865,6866,4820,6867,6868,6869,6870,4454,6871,2869,6872,6873,4821,6874,3754, // 5792
6875,4822,4205,6876,6877,6878,3648,4206,4455,6879,4823,6880,4824,3876,6881,3055, // 5808
4207,6882,3415,6883,6884,6885,4208,4209,6886,4210,3353,6887,3354,3564,3209,3485, // 5824
2652,6888,2728,6889,3210,3755,6890,4025,4456,6891,4825,6892,6893,6894,6895,4211, // 5840
6896,6897,6898,4826,6899,6900,4212,6901,4827,6902,2773,3565,6903,4828,6904,6905, // 5856
6906,6907,3649,3650,6908,2849,3566,6909,3567,3100,6910,6911,6912,6913,6914,6915, // 5872
4026,6916,3355,4829,3056,4457,3756,6917,3651,6918,4213,3652,2870,6919,4458,6920, // 5888
2438,6921,6922,3757,2774,4830,6923,3356,4831,4832,6924,4833,4459,3653,2507,6925, // 5904
4834,2535,6926,6927,3273,4027,3147,6928,3568,6929,6930,6931,4460,6932,3877,4461, // 5920
2729,3654,6933,6934,6935,6936,2175,4835,2630,4214,4028,4462,4836,4215,6937,3148, // 5936
4216,4463,4837,4838,4217,6938,6939,2850,4839,6940,4464,6941,6942,6943,4840,6944, // 5952
4218,3274,4465,6945,6946,2710,6947,4841,4466,6948,6949,2894,6950,6951,4842,6952, // 5968
4219,3057,2871,6953,6954,6955,6956,4467,6957,2711,6958,6959,6960,3275,3101,4843, // 5984
6961,3357,3569,6962,4844,6963,6964,4468,4845,3570,6965,3102,4846,3758,6966,4847, // 6000
3878,4848,4849,4029,6967,2929,3879,4850,4851,6968,6969,1733,6970,4220,6971,6972, // 6016
6973,6974,6975,6976,4852,6977,6978,6979,6980,6981,6982,3759,6983,6984,6985,3486, // 6032
3487,6986,3488,3416,6987,6988,6989,6990,6991,6992,6993,6994,6995,6996,6997,4853, // 6048
6998,6999,4030,7000,7001,3211,7002,7003,4221,7004,7005,3571,4031,7006,3572,7007, // 6064
2614,4854,2577,7008,7009,2965,3655,3656,4855,2775,3489,3880,4222,4856,3881,4032, // 6080
3882,3657,2730,3490,4857,7010,3149,7011,4469,4858,2496,3491,4859,2283,7012,7013, // 6096
7014,2365,4860,4470,7015,7016,3760,7017,7018,4223,1917,7019,7020,7021,4471,7022, // 6112
2776,4472,7023,7024,7025,7026,4033,7027,3573,4224,4861,4034,4862,7028,7029,1929, // 6128
3883,4035,7030,4473,3058,7031,2536,3761,3884,7032,4036,7033,2966,2895,1968,4474, // 6144
3276,4225,3417,3492,4226,2105,7034,7035,1754,2596,3762,4227,4863,4475,3763,4864, // 6160
3764,2615,2777,3103,3765,3658,3418,4865,2296,3766,2815,7036,7037,7038,3574,2872, // 6176
3277,4476,7039,4037,4477,7040,7041,4038,7042,7043,7044,7045,7046,7047,2537,7048, // 6192
7049,7050,7051,7052,7053,7054,4478,7055,7056,3767,3659,4228,3575,7057,7058,4229, // 6208
7059,7060,7061,3660,7062,3212,7063,3885,4039,2460,7064,7065,7066,7067,7068,7069, // 6224
7070,7071,7072,7073,7074,4866,3768,4867,7075,7076,7077,7078,4868,3358,3278,2653, // 6240
7079,7080,4479,3886,7081,7082,4869,7083,7084,7085,7086,7087,7088,2538,7089,7090, // 6256
7091,4040,3150,3769,4870,4041,2896,3359,4230,2930,7092,3279,7093,2967,4480,3213, // 6272
4481,3661,7094,7095,7096,7097,7098,7099,7100,7101,7102,2461,3770,7103,7104,4231, // 6288
3151,7105,7106,7107,4042,3662,7108,7109,4871,3663,4872,4043,3059,7110,7111,7112, // 6304
3493,2988,7113,4873,7114,7115,7116,3771,4874,7117,7118,4232,4875,7119,3576,2336, // 6320
4876,7120,4233,3419,4044,4877,4878,4482,4483,4879,4484,4234,7121,3772,4880,1045, // 6336
3280,3664,4881,4882,7122,7123,7124,7125,4883,7126,2778,7127,4485,4486,7128,4884, // 6352
3214,3887,7129,7130,3215,7131,4885,4045,7132,7133,4046,7134,7135,7136,7137,7138, // 6368
7139,7140,7141,7142,7143,4235,7144,4886,7145,7146,7147,4887,7148,7149,7150,4487, // 6384
4047,4488,7151,7152,4888,4048,2989,3888,7153,3665,7154,4049,7155,7156,7157,7158, // 6400
7159,7160,2931,4889,4890,4489,7161,2631,3889,4236,2779,7162,7163,4891,7164,3060, // 6416
7165,1672,4892,7166,4893,4237,3281,4894,7167,7168,3666,7169,3494,7170,7171,4050, // 6432
7172,7173,3104,3360,3420,4490,4051,2684,4052,7174,4053,7175,7176,7177,2253,4054, // 6448
7178,7179,4895,7180,3152,3890,3153,4491,3216,7181,7182,7183,2968,4238,4492,4055, // 6464
7184,2990,7185,2479,7186,7187,4493,7188,7189,7190,7191,7192,4896,7193,4897,2969, // 6480
4494,4898,7194,3495,7195,7196,4899,4495,7197,3105,2731,7198,4900,7199,7200,7201, // 6496
4056,7202,3361,7203,7204,4496,4901,4902,7205,4497,7206,7207,2315,4903,7208,4904, // 6512
7209,4905,2851,7210,7211,3577,7212,3578,4906,7213,4057,3667,4907,7214,4058,2354, // 6528
3891,2376,3217,3773,7215,7216,7217,7218,7219,4498,7220,4908,3282,2685,7221,3496, // 6544
4909,2632,3154,4910,7222,2337,7223,4911,7224,7225,7226,4912,4913,3283,4239,4499, // 6560
7227,2816,7228,7229,7230,7231,7232,7233,7234,4914,4500,4501,7235,7236,7237,2686, // 6576
7238,4915,7239,2897,4502,7240,4503,7241,2516,7242,4504,3362,3218,7243,7244,7245, // 6592
4916,7246,7247,4505,3363,7248,7249,7250,7251,3774,4506,7252,7253,4917,7254,7255, // 6608
3284,2991,4918,4919,3219,3892,4920,3106,3497,4921,7256,7257,7258,4922,7259,4923, // 6624
3364,4507,4508,4059,7260,4240,3498,7261,7262,4924,7263,2992,3893,4060,3220,7264, // 6640
7265,7266,7267,7268,7269,4509,3775,7270,2817,7271,4061,4925,4510,3776,7272,4241, // 6656
4511,3285,7273,7274,3499,7275,7276,7277,4062,4512,4926,7278,3107,3894,7279,7280, // 6672
4927,7281,4513,7282,7283,3668,7284,7285,4242,4514,4243,7286,2058,4515,4928,4929, // 6688
4516,7287,3286,4244,7288,4517,7289,7290,7291,3669,7292,7293,4930,4931,4932,2355, // 6704
4933,7294,2633,4518,7295,4245,7296,7297,4519,7298,7299,4520,4521,4934,7300,4246, // 6720
4522,7301,7302,7303,3579,7304,4247,4935,7305,4936,7306,7307,7308,7309,3777,7310, // 6736
4523,7311,7312,7313,4248,3580,7314,4524,3778,4249,7315,3581,7316,3287,7317,3221, // 6752
7318,4937,7319,7320,7321,7322,7323,7324,4938,4939,7325,4525,7326,7327,7328,4063, // 6768
7329,7330,4940,7331,7332,4941,7333,4526,7334,3500,2780,1741,4942,2026,1742,7335, // 6784
7336,3582,4527,2388,7337,7338,7339,4528,7340,4250,4943,7341,7342,7343,4944,7344, // 6800
7345,7346,3020,7347,4945,7348,7349,7350,7351,3895,7352,3896,4064,3897,7353,7354, // 6816
7355,4251,7356,7357,3898,7358,3779,7359,3780,3288,7360,7361,4529,7362,4946,4530, // 6832
2027,7363,3899,4531,4947,3222,3583,7364,4948,7365,7366,7367,7368,4949,3501,4950, // 6848
3781,4951,4532,7369,2517,4952,4252,4953,3155,7370,4954,4955,4253,2518,4533,7371, // 6864
7372,2712,4254,7373,7374,7375,3670,4956,3671,7376,2389,3502,4065,7377,2338,7378, // 6880
7379,7380,7381,3061,7382,4957,7383,7384,7385,7386,4958,4534,7387,7388,2993,7389, // 6896
3062,7390,4959,7391,7392,7393,4960,3108,4961,7394,4535,7395,4962,3421,4536,7396, // 6912
4963,7397,4964,1857,7398,4965,7399,7400,2176,3584,4966,7401,7402,3422,4537,3900, // 6928
3585,7403,3782,7404,2852,7405,7406,7407,4538,3783,2654,3423,4967,4539,7408,3784, // 6944
3586,2853,4540,4541,7409,3901,7410,3902,7411,7412,3785,3109,2327,3903,7413,7414, // 6960
2970,4066,2932,7415,7416,7417,3904,3672,3424,7418,4542,4543,4544,7419,4968,7420, // 6976
7421,4255,7422,7423,7424,7425,7426,4067,7427,3673,3365,4545,7428,3110,2559,3674, // 6992
7429,7430,3156,7431,7432,3503,7433,3425,4546,7434,3063,2873,7435,3223,4969,4547, // 7008
4548,2898,4256,4068,7436,4069,3587,3786,2933,3787,4257,4970,4971,3788,7437,4972, // 7024
3064,7438,4549,7439,7440,7441,7442,7443,4973,3905,7444,2874,7445,7446,7447,7448, // 7040
3021,7449,4550,3906,3588,4974,7450,7451,3789,3675,7452,2578,7453,4070,7454,7455, // 7056
7456,4258,3676,7457,4975,7458,4976,4259,3790,3504,2634,4977,3677,4551,4260,7459, // 7072
7460,7461,7462,3907,4261,4978,7463,7464,7465,7466,4979,4980,7467,7468,2213,4262, // 7088
7469,7470,7471,3678,4981,7472,2439,7473,4263,3224,3289,7474,3908,2415,4982,7475, // 7104
4264,7476,4983,2655,7477,7478,2732,4552,2854,2875,7479,7480,4265,7481,4553,4984, // 7120
7482,7483,4266,7484,3679,3366,3680,2818,2781,2782,3367,3589,4554,3065,7485,4071, // 7136
2899,7486,7487,3157,2462,4072,4555,4073,4985,4986,3111,4267,2687,3368,4556,4074, // 7152
3791,4268,7488,3909,2783,7489,2656,1962,3158,4557,4987,1963,3159,3160,7490,3112, // 7168
4988,4989,3022,4990,4991,3792,2855,7491,7492,2971,4558,7493,7494,4992,7495,7496, // 7184
7497,7498,4993,7499,3426,4559,4994,7500,3681,4560,4269,4270,3910,7501,4075,4995, // 7200
4271,7502,7503,4076,7504,4996,7505,3225,4997,4272,4077,2819,3023,7506,7507,2733, // 7216
4561,7508,4562,7509,3369,3793,7510,3590,2508,7511,7512,4273,3113,2994,2616,7513, // 7232
7514,7515,7516,7517,7518,2820,3911,4078,2748,7519,7520,4563,4998,7521,7522,7523, // 7248
7524,4999,4274,7525,4564,3682,2239,4079,4565,7526,7527,7528,7529,5000,7530,7531, // 7264
5001,4275,3794,7532,7533,7534,3066,5002,4566,3161,7535,7536,4080,7537,3162,7538, // 7280
7539,4567,7540,7541,7542,7543,7544,7545,5003,7546,4568,7547,7548,7549,7550,7551, // 7296
7552,7553,7554,7555,7556,5004,7557,7558,7559,5005,7560,3795,7561,4569,7562,7563, // 7312
7564,2821,3796,4276,4277,4081,7565,2876,7566,5006,7567,7568,2900,7569,3797,3912, // 7328
7570,7571,7572,4278,7573,7574,7575,5007,7576,7577,5008,7578,7579,4279,2934,7580, // 7344
7581,5009,7582,4570,7583,4280,7584,7585,7586,4571,4572,3913,7587,4573,3505,7588, // 7360
5010,7589,7590,7591,7592,3798,4574,7593,7594,5011,7595,4281,7596,7597,7598,4282, // 7376
5012,7599,7600,5013,3163,7601,5014,7602,3914,7603,7604,2734,4575,4576,4577,7605, // 7392
7606,7607,7608,7609,3506,5015,4578,7610,4082,7611,2822,2901,2579,3683,3024,4579, // 7408
3507,7612,4580,7613,3226,3799,5016,7614,7615,7616,7617,7618,7619,7620,2995,3290, // 7424
7621,4083,7622,5017,7623,7624,7625,7626,7627,4581,3915,7628,3291,7629,5018,7630, // 7440
7631,7632,7633,4084,7634,7635,3427,3800,7636,7637,4582,7638,5019,4583,5020,7639, // 7456
3916,7640,3801,5021,4584,4283,7641,7642,3428,3591,2269,7643,2617,7644,4585,3592, // 7472
7645,4586,2902,7646,7647,3227,5022,7648,4587,7649,4284,7650,7651,7652,4588,2284, // 7488
7653,5023,7654,7655,7656,4589,5024,3802,7657,7658,5025,3508,4590,7659,7660,7661, // 7504
1969,5026,7662,7663,3684,1821,2688,7664,2028,2509,4285,7665,2823,1841,7666,2689, // 7520
3114,7667,3917,4085,2160,5027,5028,2972,7668,5029,7669,7670,7671,3593,4086,7672, // 7536
4591,4087,5030,3803,7673,7674,7675,7676,7677,7678,7679,4286,2366,4592,4593,3067, // 7552
2328,7680,7681,4594,3594,3918,2029,4287,7682,5031,3919,3370,4288,4595,2856,7683, // 7568
3509,7684,7685,5032,5033,7686,7687,3804,2784,7688,7689,7690,7691,3371,7692,7693, // 7584
2877,5034,7694,7695,3920,4289,4088,7696,7697,7698,5035,7699,5036,4290,5037,5038, // 7600
5039,7700,7701,7702,5040,5041,3228,7703,1760,7704,5042,3229,4596,2106,4089,7705, // 7616
4597,2824,5043,2107,3372,7706,4291,4090,5044,7707,4091,7708,5045,3025,3805,4598, // 7632
4292,4293,4294,3373,7709,4599,7710,5046,7711,7712,5047,5048,3806,7713,7714,7715, // 7648
5049,7716,7717,7718,7719,4600,5050,7720,7721,7722,5051,7723,4295,3429,7724,7725, // 7664
7726,7727,3921,7728,3292,5052,4092,7729,7730,7731,7732,7733,7734,7735,5053,5054, // 7680
7736,7737,7738,7739,3922,3685,7740,7741,7742,7743,2635,5055,7744,5056,4601,7745, // 7696
7746,2560,7747,7748,7749,7750,3923,7751,7752,7753,7754,7755,4296,2903,7756,7757, // 7712
7758,7759,7760,3924,7761,5057,4297,7762,7763,5058,4298,7764,4093,7765,7766,5059, // 7728
3925,7767,7768,7769,7770,7771,7772,7773,7774,7775,7776,3595,7777,4299,5060,4094, // 7744
7778,3293,5061,7779,7780,4300,7781,7782,4602,7783,3596,7784,7785,3430,2367,7786, // 7760
3164,5062,5063,4301,7787,7788,4095,5064,5065,7789,3374,3115,7790,7791,7792,7793, // 7776
7794,7795,7796,3597,4603,7797,7798,3686,3116,3807,5066,7799,7800,5067,7801,7802, // 7792
4604,4302,5068,4303,4096,7803,7804,3294,7805,7806,5069,4605,2690,7807,3026,7808, // 7808
7809,7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,7824, // 7824
7825,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,7840, // 7840
7841,7842,7843,7844,7845,7846,7847,7848,7849,7850,7851,7852,7853,7854,7855,7856, // 7856
7857,7858,7859,7860,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870,7871,7872, // 7872
7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886,7887,7888, // 7888
7889,7890,7891,7892,7893,7894,7895,7896,7897,7898,7899,7900,7901,7902,7903,7904, // 7904
7905,7906,7907,7908,7909,7910,7911,7912,7913,7914,7915,7916,7917,7918,7919,7920, // 7920
7921,7922,7923,7924,3926,7925,7926,7927,7928,7929,7930,7931,7932,7933,7934,7935, // 7936
7936,7937,7938,7939,7940,7941,7942,7943,7944,7945,7946,7947,7948,7949,7950,7951, // 7952
7952,7953,7954,7955,7956,7957,7958,7959,7960,7961,7962,7963,7964,7965,7966,7967, // 7968
7968,7969,7970,7971,7972,7973,7974,7975,7976,7977,7978,7979,7980,7981,7982,7983, // 7984
7984,7985,7986,7987,7988,7989,7990,7991,7992,7993,7994,7995,7996,7997,7998,7999, // 8000
8000,8001,8002,8003,8004,8005,8006,8007,8008,8009,8010,8011,8012,8013,8014,8015, // 8016
8016,8017,8018,8019,8020,8021,8022,8023,8024,8025,8026,8027,8028,8029,8030,8031, // 8032
8032,8033,8034,8035,8036,8037,8038,8039,8040,8041,8042,8043,8044,8045,8046,8047, // 8048
8048,8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063, // 8064
8064,8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079, // 8080
8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095, // 8096
8096,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110,8111, // 8112
8112,8113,8114,8115,8116,8117,8118,8119,8120,8121,8122,8123,8124,8125,8126,8127, // 8128
8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141,8142,8143, // 8144
8144,8145,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155,8156,8157,8158,8159, // 8160
8160,8161,8162,8163,8164,8165,8166,8167,8168,8169,8170,8171,8172,8173,8174,8175, // 8176
8176,8177,8178,8179,8180,8181,8182,8183,8184,8185,8186,8187,8188,8189,8190,8191, // 8192
8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207, // 8208
8208,8209,8210,8211,8212,8213,8214,8215,8216,8217,8218,8219,8220,8221,8222,8223, // 8224
8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239, // 8240
8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, // 8256
8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271, // 8272
****************************************************************************************/
};

Просмотреть файл

@ -1,600 +0,0 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nscore.h"
#include "JpCntx.h"
// This is hiragana 2-char sequence table, the number in each cell represents
// its frequency category
const uint8_t jp2CharContext[83][83] = {
{
0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
},
{
2, 4, 0, 4, 0, 3, 0, 4, 0, 3, 4, 4, 4, 2, 4, 3, 3, 4, 3, 2, 3,
3, 4, 2, 3, 3, 3, 2, 4, 1, 4, 3, 3, 1, 5, 4, 3, 4, 3, 4, 3, 5,
3, 0, 3, 5, 4, 2, 0, 3, 1, 0, 3, 3, 0, 3, 3, 0, 1, 1, 0, 4, 3,
0, 3, 3, 0, 4, 0, 2, 0, 3, 5, 5, 5, 5, 4, 0, 4, 1, 0, 3, 4,
},
{
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
},
{
0, 4, 0, 5, 0, 5, 0, 4, 0, 4, 5, 4, 4, 3, 5, 3, 5, 1, 5, 3, 4,
3, 4, 4, 3, 4, 3, 3, 4, 3, 5, 4, 4, 3, 5, 5, 3, 5, 5, 5, 3, 5,
5, 3, 4, 5, 5, 3, 1, 3, 2, 0, 3, 4, 0, 4, 2, 0, 4, 2, 1, 5, 3,
2, 3, 5, 0, 4, 0, 2, 0, 5, 4, 4, 5, 4, 5, 0, 4, 0, 0, 4, 4,
},
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
},
{
0, 3, 0, 4, 0, 3, 0, 3, 0, 4, 5, 4, 3, 3, 3, 3, 4, 3, 5, 4, 4,
3, 5, 4, 4, 3, 4, 3, 4, 4, 4, 4, 5, 3, 4, 4, 3, 4, 5, 5, 4, 5,
5, 1, 4, 5, 4, 3, 0, 3, 3, 1, 3, 3, 0, 4, 4, 0, 3, 3, 1, 5, 3,
3, 3, 5, 0, 4, 0, 3, 0, 4, 4, 3, 4, 3, 3, 0, 4, 1, 1, 3, 4,
},
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
},
{
0, 4, 0, 3, 0, 3, 0, 4, 0, 3, 4, 4, 3, 2, 2, 1, 2, 1, 3, 1, 3,
3, 3, 3, 3, 4, 3, 1, 3, 3, 5, 3, 3, 0, 4, 3, 0, 5, 4, 3, 3, 5,
4, 4, 3, 4, 4, 5, 0, 1, 2, 0, 1, 2, 0, 2, 2, 0, 1, 0, 0, 5, 2,
2, 1, 4, 0, 3, 0, 1, 0, 4, 4, 3, 5, 4, 3, 0, 2, 1, 0, 4, 3,
},
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
},
{
0, 3, 0, 5, 0, 4, 0, 2, 1, 4, 4, 2, 4, 1, 4, 2, 4, 2, 4, 3, 3,
3, 4, 3, 3, 3, 3, 1, 4, 2, 3, 3, 3, 1, 4, 4, 1, 1, 1, 4, 3, 3,
2, 0, 2, 4, 3, 2, 0, 3, 3, 0, 3, 1, 1, 0, 0, 0, 3, 3, 0, 4, 2,
2, 3, 4, 0, 4, 0, 3, 0, 4, 4, 5, 3, 4, 4, 0, 3, 0, 0, 1, 4,
},
{
1, 4, 0, 4, 0, 4, 0, 4, 0, 3, 5, 4, 4, 3, 4, 3, 5, 4, 3, 3, 4,
3, 5, 4, 4, 4, 4, 3, 4, 2, 4, 3, 3, 1, 5, 4, 3, 2, 4, 5, 4, 5,
5, 4, 4, 5, 4, 4, 0, 3, 2, 2, 3, 3, 0, 4, 3, 1, 3, 2, 1, 4, 3,
3, 4, 5, 0, 3, 0, 2, 0, 4, 5, 5, 4, 5, 4, 0, 4, 0, 0, 5, 4,
},
{
0, 5, 0, 5, 0, 4, 0, 3, 0, 4, 4, 3, 4, 3, 3, 3, 4, 0, 4, 4, 4,
3, 4, 3, 4, 3, 3, 1, 4, 2, 4, 3, 4, 0, 5, 4, 1, 4, 5, 4, 4, 5,
3, 2, 4, 3, 4, 3, 2, 4, 1, 3, 3, 3, 2, 3, 2, 0, 4, 3, 3, 4, 3,
3, 3, 4, 0, 4, 0, 3, 0, 4, 5, 4, 4, 4, 3, 0, 4, 1, 0, 1, 3,
},
{
0, 3, 1, 4, 0, 3, 0, 2, 0, 3, 4, 4, 3, 1, 4, 2, 3, 3, 4, 3, 4,
3, 4, 3, 4, 4, 3, 2, 3, 1, 5, 4, 4, 1, 4, 4, 3, 5, 4, 4, 3, 5,
5, 4, 3, 4, 4, 3, 1, 2, 3, 1, 2, 2, 0, 3, 2, 0, 3, 1, 0, 5, 3,
3, 3, 4, 3, 3, 3, 3, 4, 4, 4, 4, 5, 4, 2, 0, 3, 3, 2, 4, 3,
},
{
0, 2, 0, 3, 0, 1, 0, 1, 0, 0, 3, 2, 0, 0, 2, 0, 1, 0, 2, 1, 3,
3, 3, 1, 2, 3, 1, 0, 1, 0, 4, 2, 1, 1, 3, 3, 0, 4, 3, 3, 1, 4,
3, 3, 0, 3, 3, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 4, 1,
0, 2, 3, 2, 2, 2, 1, 3, 3, 3, 4, 4, 3, 2, 0, 3, 1, 0, 3, 3,
},
{
0, 4, 0, 4, 0, 3, 0, 3, 0, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 3, 4,
2, 4, 3, 4, 3, 3, 2, 4, 3, 4, 5, 4, 1, 4, 5, 3, 5, 4, 5, 3, 5,
4, 0, 3, 5, 5, 3, 1, 3, 3, 2, 2, 3, 0, 3, 4, 1, 3, 3, 2, 4, 3,
3, 3, 4, 0, 4, 0, 3, 0, 4, 5, 4, 4, 5, 3, 0, 4, 1, 0, 3, 4,
},
{
0, 2, 0, 3, 0, 3, 0, 0, 0, 2, 2, 2, 1, 0, 1, 0, 0, 0, 3, 0, 3,
0, 3, 0, 1, 3, 1, 0, 3, 1, 3, 3, 3, 1, 3, 3, 3, 0, 1, 3, 1, 3,
4, 0, 0, 3, 1, 1, 0, 3, 2, 0, 0, 0, 0, 1, 3, 0, 1, 0, 0, 3, 3,
2, 0, 3, 0, 0, 0, 0, 0, 3, 4, 3, 4, 3, 3, 0, 3, 0, 0, 2, 3,
},
{
2, 3, 0, 3, 0, 2, 0, 1, 0, 3, 3, 4, 3, 1, 3, 1, 1, 1, 3, 1, 4,
3, 4, 3, 3, 3, 0, 0, 3, 1, 5, 4, 3, 1, 4, 3, 2, 5, 5, 4, 4, 4,
4, 3, 3, 4, 4, 4, 0, 2, 1, 1, 3, 2, 0, 1, 2, 0, 0, 1, 0, 4, 1,
3, 3, 3, 0, 3, 0, 1, 0, 4, 4, 4, 5, 5, 3, 0, 2, 0, 0, 4, 4,
},
{
0, 2, 0, 1, 0, 3, 1, 3, 0, 2, 3, 3, 3, 0, 3, 1, 0, 0, 3, 0, 3,
2, 3, 1, 3, 2, 1, 1, 0, 0, 4, 2, 1, 0, 2, 3, 1, 4, 3, 2, 0, 4,
4, 3, 1, 3, 1, 3, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 4, 1,
1, 1, 2, 0, 3, 0, 0, 0, 3, 4, 2, 4, 3, 2, 0, 1, 0, 0, 3, 3,
},
{
0, 1, 0, 4, 0, 5, 0, 4, 0, 2, 4, 4, 2, 3, 3, 2, 3, 3, 5, 3, 3,
3, 4, 3, 4, 2, 3, 0, 4, 3, 3, 3, 4, 1, 4, 3, 2, 1, 5, 5, 3, 4,
5, 1, 3, 5, 4, 2, 0, 3, 3, 0, 1, 3, 0, 4, 2, 0, 1, 3, 1, 4, 3,
3, 3, 3, 0, 3, 0, 1, 0, 3, 4, 4, 4, 5, 5, 0, 3, 0, 1, 4, 5,
},
{
0, 2, 0, 3, 0, 3, 0, 0, 0, 2, 3, 1, 3, 0, 4, 0, 1, 1, 3, 0, 3,
4, 3, 2, 3, 1, 0, 3, 3, 2, 3, 1, 3, 0, 2, 3, 0, 2, 1, 4, 1, 2,
2, 0, 0, 3, 3, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 2, 2, 0, 3, 2,
1, 3, 3, 0, 2, 0, 2, 0, 0, 3, 3, 1, 2, 4, 0, 3, 0, 2, 2, 3,
},
{
2, 4, 0, 5, 0, 4, 0, 4, 0, 2, 4, 4, 4, 3, 4, 3, 3, 3, 1, 2, 4,
3, 4, 3, 4, 4, 5, 0, 3, 3, 3, 3, 2, 0, 4, 3, 1, 4, 3, 4, 1, 4,
4, 3, 3, 4, 4, 3, 1, 2, 3, 0, 4, 2, 0, 4, 1, 0, 3, 3, 0, 4, 3,
3, 3, 4, 0, 4, 0, 2, 0, 3, 5, 3, 4, 5, 2, 0, 3, 0, 0, 4, 5,
},
{
0, 3, 0, 4, 0, 1, 0, 1, 0, 1, 3, 2, 2, 1, 3, 0, 3, 0, 2, 0, 2,
0, 3, 0, 2, 0, 0, 0, 1, 0, 1, 1, 0, 0, 3, 1, 0, 0, 0, 4, 0, 3,
1, 0, 2, 1, 3, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 2,
2, 3, 1, 0, 3, 0, 0, 0, 1, 4, 4, 4, 3, 0, 0, 4, 0, 0, 1, 4,
},
{
1, 4, 1, 5, 0, 3, 0, 3, 0, 4, 5, 4, 4, 3, 5, 3, 3, 4, 4, 3, 4,
1, 3, 3, 3, 3, 2, 1, 4, 1, 5, 4, 3, 1, 4, 4, 3, 5, 4, 4, 3, 5,
4, 3, 3, 4, 4, 4, 0, 3, 3, 1, 2, 3, 0, 3, 1, 0, 3, 3, 0, 5, 4,
4, 4, 4, 4, 4, 3, 3, 5, 4, 4, 3, 3, 5, 4, 0, 3, 2, 0, 4, 4,
},
{
0, 2, 0, 3, 0, 1, 0, 0, 0, 1, 3, 3, 3, 2, 4, 1, 3, 0, 3, 1, 3,
0, 2, 2, 1, 1, 0, 0, 2, 0, 4, 3, 1, 0, 4, 3, 0, 4, 4, 4, 1, 4,
3, 1, 1, 3, 3, 1, 0, 2, 0, 0, 1, 3, 0, 0, 0, 0, 2, 0, 0, 4, 3,
2, 4, 3, 5, 4, 3, 3, 3, 4, 3, 3, 4, 3, 3, 0, 2, 1, 0, 3, 3,
},
{
0, 2, 0, 4, 0, 3, 0, 2, 0, 2, 5, 5, 3, 4, 4, 4, 4, 1, 4, 3, 3,
0, 4, 3, 4, 3, 1, 3, 3, 2, 4, 3, 0, 3, 4, 3, 0, 3, 4, 4, 2, 4,
4, 0, 4, 5, 3, 3, 2, 2, 1, 1, 1, 2, 0, 1, 5, 0, 3, 3, 2, 4, 3,
3, 3, 4, 0, 3, 0, 2, 0, 4, 4, 3, 5, 5, 0, 0, 3, 0, 2, 3, 3,
},
{
0, 3, 0, 4, 0, 3, 0, 1, 0, 3, 4, 3, 3, 1, 3, 3, 3, 0, 3, 1, 3,
0, 4, 3, 3, 1, 1, 0, 3, 0, 3, 3, 0, 0, 4, 4, 0, 1, 5, 4, 3, 3,
5, 0, 3, 3, 4, 3, 0, 2, 0, 1, 1, 1, 0, 1, 3, 0, 1, 2, 1, 3, 3,
2, 3, 3, 0, 3, 0, 1, 0, 1, 3, 3, 4, 4, 1, 0, 1, 2, 2, 1, 3,
},
{
0, 1, 0, 4, 0, 4, 0, 3, 0, 1, 3, 3, 3, 2, 3, 1, 1, 0, 3, 0, 3,
3, 4, 3, 2, 4, 2, 0, 1, 0, 4, 3, 2, 0, 4, 3, 0, 5, 3, 3, 2, 4,
4, 4, 3, 3, 3, 4, 0, 1, 3, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 4, 2,
3, 3, 3, 0, 3, 0, 0, 0, 4, 4, 4, 5, 3, 2, 0, 3, 3, 0, 3, 5,
},
{
0, 2, 0, 3, 0, 0, 0, 3, 0, 1, 3, 0, 2, 0, 0, 0, 1, 0, 3, 1, 1,
3, 3, 0, 0, 3, 0, 0, 3, 0, 2, 3, 1, 0, 3, 1, 0, 3, 3, 2, 0, 4,
2, 2, 0, 2, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1,
2, 0, 1, 0, 1, 0, 0, 0, 1, 3, 1, 2, 0, 0, 0, 1, 0, 0, 1, 4,
},
{
0, 3, 0, 3, 0, 5, 0, 1, 0, 2, 4, 3, 1, 3, 3, 2, 1, 1, 5, 2, 1,
0, 5, 1, 2, 0, 0, 0, 3, 3, 2, 2, 3, 2, 4, 3, 0, 0, 3, 3, 1, 3,
3, 0, 2, 5, 3, 4, 0, 3, 3, 0, 1, 2, 0, 2, 2, 0, 3, 2, 0, 2, 2,
3, 3, 3, 0, 2, 0, 1, 0, 3, 4, 4, 2, 5, 4, 0, 3, 0, 0, 3, 5,
},
{
0, 3, 0, 3, 0, 3, 0, 1, 0, 3, 3, 3, 3, 0, 3, 0, 2, 0, 2, 1, 1,
0, 2, 0, 1, 0, 0, 0, 2, 1, 0, 0, 1, 0, 3, 2, 0, 0, 3, 3, 1, 2,
3, 1, 0, 3, 3, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 3,
1, 2, 3, 0, 3, 0, 1, 0, 3, 2, 1, 0, 4, 3, 0, 1, 1, 0, 3, 3,
},
{
0, 4, 0, 5, 0, 3, 0, 3, 0, 4, 5, 5, 4, 3, 5, 3, 4, 3, 5, 3, 3,
2, 5, 3, 4, 4, 4, 3, 4, 3, 4, 5, 5, 3, 4, 4, 3, 4, 4, 5, 4, 4,
4, 3, 4, 5, 5, 4, 2, 3, 4, 2, 3, 4, 0, 3, 3, 1, 4, 3, 2, 4, 3,
3, 5, 5, 0, 3, 0, 3, 0, 5, 5, 5, 5, 4, 4, 0, 4, 0, 1, 4, 4,
},
{
0, 4, 0, 4, 0, 3, 0, 3, 0, 3, 5, 4, 4, 2, 3, 2, 5, 1, 3, 2, 5,
1, 4, 2, 3, 2, 3, 3, 4, 3, 3, 3, 3, 2, 5, 4, 1, 3, 3, 5, 3, 4,
4, 0, 4, 4, 3, 1, 1, 3, 1, 0, 2, 3, 0, 2, 3, 0, 3, 0, 0, 4, 3,
1, 3, 4, 0, 3, 0, 2, 0, 4, 4, 4, 3, 4, 5, 0, 4, 0, 0, 3, 4,
},
{
0, 3, 0, 3, 0, 3, 1, 2, 0, 3, 4, 4, 3, 3, 3, 0, 2, 2, 4, 3, 3,
1, 3, 3, 3, 1, 1, 0, 3, 1, 4, 3, 2, 3, 4, 4, 2, 4, 4, 4, 3, 4,
4, 3, 2, 4, 4, 3, 1, 3, 3, 1, 3, 3, 0, 4, 1, 0, 2, 2, 1, 4, 3,
2, 3, 3, 5, 4, 3, 3, 5, 4, 4, 3, 3, 0, 4, 0, 3, 2, 2, 4, 4,
},
{
0, 2, 0, 1, 0, 0, 0, 0, 0, 1, 2, 1, 3, 0, 0, 0, 0, 0, 2, 0, 1,
2, 1, 0, 0, 1, 0, 0, 0, 0, 3, 0, 0, 1, 0, 1, 1, 3, 1, 0, 0, 0,
1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
2, 2, 0, 3, 4, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1,
},
{
0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 4, 1, 4, 0, 3, 0, 4, 0, 3,
0, 4, 0, 3, 0, 3, 0, 4, 1, 5, 1, 4, 0, 0, 3, 0, 5, 0, 5, 2, 0,
1, 0, 0, 0, 2, 1, 4, 0, 1, 3, 0, 0, 3, 0, 0, 3, 1, 1, 4, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
},
{
1, 4, 0, 5, 0, 3, 0, 2, 0, 3, 5, 4, 4, 3, 4, 3, 5, 3, 4, 3, 3,
0, 4, 3, 3, 3, 3, 3, 3, 2, 4, 4, 3, 1, 3, 4, 4, 5, 4, 4, 3, 4,
4, 1, 3, 5, 4, 3, 3, 3, 1, 2, 2, 3, 3, 1, 3, 1, 3, 3, 3, 5, 3,
3, 4, 5, 0, 3, 0, 3, 0, 3, 4, 3, 4, 4, 3, 0, 3, 0, 2, 4, 3,
},
{
0, 1, 0, 4, 0, 0, 0, 0, 0, 1, 4, 0, 4, 1, 4, 2, 4, 0, 3, 0, 1,
0, 1, 0, 0, 0, 0, 0, 2, 0, 3, 1, 1, 1, 0, 3, 0, 0, 0, 1, 2, 1,
0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 0, 3, 2,
0, 2, 2, 0, 1, 0, 0, 0, 2, 3, 2, 3, 3, 0, 0, 0, 0, 2, 1, 0,
},
{
0, 5, 1, 5, 0, 3, 0, 3, 0, 5, 4, 4, 5, 1, 5, 3, 3, 0, 4, 3, 4,
3, 5, 3, 4, 3, 3, 2, 4, 3, 4, 3, 3, 0, 3, 3, 1, 4, 4, 3, 4, 4,
4, 3, 4, 5, 5, 3, 2, 3, 1, 1, 3, 3, 1, 3, 1, 1, 3, 3, 2, 4, 5,
3, 3, 5, 0, 4, 0, 3, 0, 4, 4, 3, 5, 3, 3, 0, 3, 4, 0, 4, 3,
},
{
0, 5, 0, 5, 0, 3, 0, 2, 0, 4, 4, 3, 5, 2, 4, 3, 3, 3, 4, 4, 4,
3, 5, 3, 5, 3, 3, 1, 4, 0, 4, 3, 3, 0, 3, 3, 0, 4, 4, 4, 4, 5,
4, 3, 3, 5, 5, 3, 2, 3, 1, 2, 3, 2, 0, 1, 0, 0, 3, 2, 2, 4, 4,
3, 1, 5, 0, 4, 0, 3, 0, 4, 3, 1, 3, 2, 1, 0, 3, 3, 0, 3, 3,
},
{
0, 4, 0, 5, 0, 5, 0, 4, 0, 4, 5, 5, 5, 3, 4, 3, 3, 2, 5, 4, 4,
3, 5, 3, 5, 3, 4, 0, 4, 3, 4, 4, 3, 2, 4, 4, 3, 4, 5, 4, 4, 5,
5, 0, 3, 5, 5, 4, 1, 3, 3, 2, 3, 3, 1, 3, 1, 0, 4, 3, 1, 4, 4,
3, 4, 5, 0, 4, 0, 2, 0, 4, 3, 4, 4, 3, 3, 0, 4, 0, 0, 5, 5,
},
{
0, 4, 0, 4, 0, 5, 0, 1, 1, 3, 3, 4, 4, 3, 4, 1, 3, 0, 5, 1, 3,
0, 3, 1, 3, 1, 1, 0, 3, 0, 3, 3, 4, 0, 4, 3, 0, 4, 4, 4, 3, 4,
4, 0, 3, 5, 4, 1, 0, 3, 0, 0, 2, 3, 0, 3, 1, 0, 3, 1, 0, 3, 2,
1, 3, 5, 0, 3, 0, 1, 0, 3, 2, 3, 3, 4, 4, 0, 2, 2, 0, 4, 4,
},
{
2, 4, 0, 5, 0, 4, 0, 3, 0, 4, 5, 5, 4, 3, 5, 3, 5, 3, 5, 3, 5,
2, 5, 3, 4, 3, 3, 4, 3, 4, 5, 3, 2, 1, 5, 4, 3, 2, 3, 4, 5, 3,
4, 1, 2, 5, 4, 3, 0, 3, 3, 0, 3, 2, 0, 2, 3, 0, 4, 1, 0, 3, 4,
3, 3, 5, 0, 3, 0, 1, 0, 4, 5, 5, 5, 4, 3, 0, 4, 2, 0, 3, 5,
},
{
0, 5, 0, 4, 0, 4, 0, 2, 0, 5, 4, 3, 4, 3, 4, 3, 3, 3, 4, 3, 4,
2, 5, 3, 5, 3, 4, 1, 4, 3, 4, 4, 4, 0, 3, 5, 0, 4, 4, 4, 4, 5,
3, 1, 3, 4, 5, 3, 3, 3, 3, 3, 3, 3, 0, 2, 2, 0, 3, 3, 2, 4, 3,
3, 3, 5, 3, 4, 1, 3, 3, 5, 3, 2, 0, 0, 0, 0, 4, 3, 1, 3, 3,
},
{
0, 1, 0, 3, 0, 3, 0, 1, 0, 1, 3, 3, 3, 2, 3, 3, 3, 0, 3, 0, 0,
0, 3, 1, 3, 0, 0, 0, 2, 2, 2, 3, 0, 0, 3, 2, 0, 1, 2, 4, 1, 3,
3, 0, 0, 3, 3, 3, 0, 1, 0, 0, 2, 1, 0, 0, 3, 0, 3, 1, 0, 3, 0,
0, 1, 3, 0, 2, 0, 1, 0, 3, 3, 1, 3, 3, 0, 0, 1, 1, 0, 3, 3,
},
{
0, 2, 0, 3, 0, 2, 1, 4, 0, 2, 2, 3, 1, 1, 3, 1, 1, 0, 2, 0, 3,
1, 2, 3, 1, 3, 0, 0, 1, 0, 4, 3, 2, 3, 3, 3, 1, 4, 2, 3, 3, 3,
3, 1, 0, 3, 1, 4, 0, 1, 1, 0, 1, 2, 0, 1, 1, 0, 1, 1, 0, 3, 1,
3, 2, 2, 0, 1, 0, 0, 0, 2, 3, 3, 3, 1, 0, 0, 0, 0, 0, 2, 3,
},
{
0, 5, 0, 4, 0, 5, 0, 2, 0, 4, 5, 5, 3, 3, 4, 3, 3, 1, 5, 4, 4,
2, 4, 4, 4, 3, 4, 2, 4, 3, 5, 5, 4, 3, 3, 4, 3, 3, 5, 5, 4, 5,
5, 1, 3, 4, 5, 3, 1, 4, 3, 1, 3, 3, 0, 3, 3, 1, 4, 3, 1, 4, 5,
3, 3, 5, 0, 4, 0, 3, 0, 5, 3, 3, 1, 4, 3, 0, 4, 0, 1, 5, 3,
},
{
0, 5, 0, 5, 0, 4, 0, 2, 0, 4, 4, 3, 4, 3, 3, 3, 3, 3, 5, 4, 4,
4, 4, 4, 4, 5, 3, 3, 5, 2, 4, 4, 4, 3, 4, 4, 3, 3, 4, 4, 5, 5,
3, 3, 4, 3, 4, 3, 3, 4, 3, 3, 3, 3, 1, 2, 2, 1, 4, 3, 3, 5, 4,
4, 3, 4, 0, 4, 0, 3, 0, 4, 4, 4, 4, 4, 1, 0, 4, 2, 0, 2, 4,
},
{
0, 4, 0, 4, 0, 3, 0, 1, 0, 3, 5, 2, 3, 0, 3, 0, 2, 1, 4, 2, 3,
3, 4, 1, 4, 3, 3, 2, 4, 1, 3, 3, 3, 0, 3, 3, 0, 0, 3, 3, 3, 5,
3, 3, 3, 3, 3, 2, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 1, 0, 0, 3, 1,
2, 2, 3, 0, 3, 0, 2, 0, 4, 4, 3, 3, 4, 1, 0, 3, 0, 0, 2, 4,
},
{
0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1,
0, 2, 0, 1, 0, 0, 0, 0, 0, 3, 1, 3, 0, 3, 2, 0, 0, 0, 1, 0, 3,
2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 0, 2, 0, 0, 0, 0, 0, 0, 2,
},
{
0, 2, 1, 3, 0, 2, 0, 2, 0, 3, 3, 3, 3, 1, 3, 1, 3, 3, 3, 3, 3,
3, 4, 2, 2, 1, 2, 1, 4, 0, 4, 3, 1, 3, 3, 3, 2, 4, 3, 5, 4, 3,
3, 3, 3, 3, 3, 3, 0, 1, 3, 0, 2, 0, 0, 1, 0, 0, 1, 0, 0, 4, 2,
0, 2, 3, 0, 3, 3, 0, 3, 3, 4, 2, 3, 1, 4, 0, 1, 2, 0, 2, 3,
},
{
0, 3, 0, 3, 0, 1, 0, 3, 0, 2, 3, 3, 3, 0, 3, 1, 2, 0, 3, 3, 2,
3, 3, 2, 3, 2, 3, 1, 3, 0, 4, 3, 2, 0, 3, 3, 1, 4, 3, 3, 2, 3,
4, 3, 1, 3, 3, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 4, 1,
1, 0, 3, 0, 3, 1, 0, 2, 3, 3, 3, 3, 3, 1, 0, 0, 2, 0, 3, 3,
},
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 2, 0, 3, 0, 0, 0, 0, 0, 0,
0, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 3, 1, 0, 1, 0, 1, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 3, 0, 2, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 3,
},
{
0, 2, 0, 3, 1, 3, 0, 3, 0, 2, 3, 3, 3, 1, 3, 1, 3, 1, 3, 1, 3,
3, 3, 1, 3, 0, 2, 3, 1, 1, 4, 3, 3, 2, 3, 3, 1, 2, 2, 4, 1, 3,
3, 0, 1, 4, 2, 3, 0, 1, 3, 0, 3, 0, 0, 1, 3, 0, 2, 0, 0, 3, 3,
2, 1, 3, 0, 3, 0, 2, 0, 3, 4, 4, 4, 3, 1, 0, 3, 0, 0, 3, 3,
},
{
0, 2, 0, 1, 0, 2, 0, 0, 0, 1, 3, 2, 2, 1, 3, 0, 1, 1, 3, 0, 3,
2, 3, 1, 2, 0, 2, 0, 1, 1, 3, 3, 3, 0, 3, 3, 1, 1, 2, 3, 2, 3,
3, 1, 2, 3, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 2, 1,
2, 1, 3, 0, 3, 0, 0, 0, 3, 4, 4, 4, 3, 2, 0, 2, 0, 0, 2, 4,
},
{
0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 3,
},
{
0, 3, 0, 3, 0, 2, 0, 3, 0, 3, 3, 3, 2, 3, 2, 2, 2, 0, 3, 1, 3,
3, 3, 2, 3, 3, 0, 0, 3, 0, 3, 2, 2, 0, 2, 3, 1, 4, 3, 4, 3, 3,
2, 3, 1, 5, 4, 4, 0, 3, 1, 2, 1, 3, 0, 3, 1, 1, 2, 0, 2, 3, 1,
3, 1, 3, 0, 3, 0, 1, 0, 3, 3, 4, 4, 2, 1, 0, 2, 1, 0, 2, 4,
},
{
0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 4, 2, 5, 1, 4, 0, 2, 0, 2, 1, 3,
1, 4, 0, 2, 1, 0, 0, 2, 1, 4, 1, 1, 0, 3, 3, 0, 5, 1, 3, 2, 3,
3, 1, 0, 3, 2, 3, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0,
1, 0, 3, 0, 2, 0, 1, 0, 3, 3, 3, 4, 3, 3, 0, 0, 0, 0, 2, 3,
},
{
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 1, 0, 0, 0, 0, 0, 3,
},
{
0, 1, 0, 3, 0, 4, 0, 3, 0, 2, 4, 3, 1, 0, 3, 2, 2, 1, 3, 1, 2,
2, 3, 1, 1, 1, 2, 1, 3, 0, 1, 2, 0, 1, 3, 2, 1, 3, 0, 5, 5, 1,
0, 0, 1, 3, 2, 1, 0, 3, 0, 0, 1, 0, 0, 0, 0, 0, 3, 4, 0, 1, 1,
1, 3, 2, 0, 2, 0, 1, 0, 2, 3, 3, 1, 2, 3, 0, 1, 0, 1, 0, 4,
},
{
0, 0, 0, 1, 0, 3, 0, 3, 0, 2, 2, 1, 0, 0, 4, 0, 3, 0, 3, 1, 3,
0, 3, 0, 3, 0, 1, 0, 3, 0, 3, 1, 3, 0, 3, 3, 0, 0, 1, 2, 1, 1,
1, 0, 1, 2, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2,
1, 2, 0, 0, 2, 0, 0, 0, 0, 2, 3, 3, 3, 3, 0, 0, 0, 0, 1, 4,
},
{
0, 0, 0, 3, 0, 3, 0, 0, 0, 0, 3, 1, 1, 0, 3, 0, 1, 0, 2, 0, 1,
0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 2, 0, 2, 3, 0, 0, 2, 2, 3, 1,
2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 2, 0, 0, 0, 0, 2, 3,
},
{
2, 4, 0, 5, 0, 5, 0, 4, 0, 3, 4, 3, 3, 3, 4, 3, 3, 3, 4, 3, 4,
4, 5, 4, 5, 5, 5, 2, 3, 0, 5, 5, 4, 1, 5, 4, 3, 1, 5, 4, 3, 4,
4, 3, 3, 4, 3, 3, 0, 3, 2, 0, 2, 3, 0, 3, 0, 0, 3, 3, 0, 5, 3,
2, 3, 3, 0, 3, 0, 3, 0, 3, 4, 5, 4, 5, 3, 0, 4, 3, 0, 3, 4,
},
{
0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 3, 4, 3, 2, 3, 2, 3, 0, 4, 3, 3,
3, 3, 3, 3, 3, 3, 0, 3, 2, 4, 3, 3, 1, 3, 4, 3, 4, 4, 4, 3, 4,
4, 3, 2, 4, 4, 1, 0, 2, 0, 0, 1, 1, 0, 2, 0, 0, 3, 1, 0, 5, 3,
2, 1, 3, 0, 3, 0, 1, 2, 4, 3, 2, 4, 3, 3, 0, 3, 2, 0, 4, 4,
},
{
0, 3, 0, 3, 0, 1, 0, 0, 0, 1, 4, 3, 3, 2, 3, 1, 3, 1, 4, 2, 3,
2, 4, 2, 3, 4, 3, 0, 2, 2, 3, 3, 3, 0, 3, 3, 3, 0, 3, 4, 1, 3,
3, 0, 3, 4, 3, 3, 0, 1, 1, 0, 1, 0, 0, 0, 4, 0, 3, 0, 0, 3, 1,
2, 1, 3, 0, 4, 0, 1, 0, 4, 3, 3, 4, 3, 3, 0, 2, 0, 0, 3, 3,
},
{
0, 3, 0, 4, 0, 1, 0, 3, 0, 3, 4, 3, 3, 0, 3, 3, 3, 1, 3, 1, 3,
3, 4, 3, 3, 3, 0, 0, 3, 1, 5, 3, 3, 1, 3, 3, 2, 5, 4, 3, 3, 4,
5, 3, 2, 5, 3, 4, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 1, 1, 0, 4, 2,
2, 1, 3, 0, 3, 0, 2, 0, 4, 4, 3, 5, 3, 2, 0, 1, 1, 0, 3, 4,
},
{
0, 5, 0, 4, 0, 5, 0, 2, 0, 4, 4, 3, 3, 2, 3, 3, 3, 1, 4, 3, 4,
1, 5, 3, 4, 3, 4, 0, 4, 2, 4, 3, 4, 1, 5, 4, 0, 4, 4, 4, 4, 5,
4, 1, 3, 5, 4, 2, 1, 4, 1, 1, 3, 2, 0, 3, 1, 0, 3, 2, 1, 4, 3,
3, 3, 4, 0, 4, 0, 3, 0, 4, 4, 4, 3, 3, 3, 0, 4, 2, 0, 3, 4,
},
{
1, 4, 0, 4, 0, 3, 0, 1, 0, 3, 3, 3, 1, 1, 3, 3, 2, 2, 3, 3, 1,
0, 3, 2, 2, 1, 2, 0, 3, 1, 2, 1, 2, 0, 3, 2, 0, 2, 2, 3, 3, 4,
3, 0, 3, 3, 1, 2, 0, 1, 1, 3, 1, 2, 0, 0, 3, 0, 1, 1, 0, 3, 2,
2, 3, 3, 0, 3, 0, 0, 0, 2, 3, 3, 4, 3, 3, 0, 1, 0, 0, 1, 4,
},
{
0, 4, 0, 4, 0, 4, 0, 0, 0, 3, 4, 4, 3, 1, 4, 2, 3, 2, 3, 3, 3,
1, 4, 3, 4, 0, 3, 0, 4, 2, 3, 3, 2, 2, 5, 4, 2, 1, 3, 4, 3, 4,
3, 1, 3, 3, 4, 2, 0, 2, 1, 0, 3, 3, 0, 0, 2, 0, 3, 1, 0, 4, 4,
3, 4, 3, 0, 4, 0, 1, 0, 2, 4, 4, 4, 4, 4, 0, 3, 2, 0, 3, 3,
},
{
0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 0, 0, 1, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2,
},
{
0, 2, 0, 3, 0, 4, 0, 4, 0, 1, 3, 3, 3, 0, 4, 0, 2, 1, 2, 1, 1,
1, 2, 0, 3, 1, 1, 0, 1, 0, 3, 1, 0, 0, 3, 3, 2, 0, 1, 1, 0, 0,
0, 0, 0, 1, 0, 2, 0, 2, 2, 0, 3, 1, 0, 0, 1, 0, 1, 1, 0, 1, 2,
0, 3, 0, 0, 0, 0, 1, 0, 0, 3, 3, 4, 3, 1, 0, 1, 0, 3, 0, 2,
},
{
0, 0, 0, 3, 0, 5, 0, 0, 0, 0, 1, 0, 2, 0, 3, 1, 0, 1, 3, 0, 0,
0, 2, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 4, 0, 0, 0, 2, 3, 0, 1,
4, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 3, 0, 0, 0, 0, 0, 3,
},
{
0, 2, 0, 5, 0, 5, 0, 1, 0, 2, 4, 3, 3, 2, 5, 1, 3, 2, 3, 3, 3,
0, 4, 1, 2, 0, 3, 0, 4, 0, 2, 2, 1, 1, 5, 3, 0, 0, 1, 4, 2, 3,
2, 0, 3, 3, 3, 2, 0, 2, 4, 1, 1, 2, 0, 1, 1, 0, 3, 1, 0, 1, 3,
1, 2, 3, 0, 2, 0, 0, 0, 1, 3, 5, 4, 4, 4, 0, 3, 0, 0, 1, 3,
},
{
0, 4, 0, 5, 0, 4, 0, 4, 0, 4, 5, 4, 3, 3, 4, 3, 3, 3, 4, 3, 4,
4, 5, 3, 4, 5, 4, 2, 4, 2, 3, 4, 3, 1, 4, 4, 1, 3, 5, 4, 4, 5,
5, 4, 4, 5, 5, 5, 2, 3, 3, 1, 4, 3, 1, 3, 3, 0, 3, 3, 1, 4, 3,
4, 4, 4, 0, 3, 0, 4, 0, 3, 3, 4, 4, 5, 0, 0, 4, 3, 0, 4, 5,
},
{
0, 4, 0, 4, 0, 3, 0, 3, 0, 3, 4, 4, 4, 3, 3, 2, 4, 3, 4, 3, 4,
3, 5, 3, 4, 3, 2, 1, 4, 2, 4, 4, 3, 1, 3, 4, 2, 4, 5, 5, 3, 4,
5, 4, 1, 5, 4, 3, 0, 3, 2, 2, 3, 2, 1, 3, 1, 0, 3, 3, 3, 5, 3,
3, 3, 5, 4, 4, 2, 3, 3, 4, 3, 3, 3, 2, 1, 0, 3, 2, 1, 4, 3,
},
{
0, 4, 0, 5, 0, 4, 0, 3, 0, 3, 5, 5, 3, 2, 4, 3, 4, 0, 5, 4, 4,
1, 4, 4, 4, 3, 3, 3, 4, 3, 5, 5, 2, 3, 3, 4, 1, 2, 5, 5, 3, 5,
5, 2, 3, 5, 5, 4, 0, 3, 2, 0, 3, 3, 1, 1, 5, 1, 4, 1, 0, 4, 3,
2, 3, 5, 0, 4, 0, 3, 0, 5, 4, 3, 4, 3, 0, 0, 4, 1, 0, 4, 4,
},
{
1, 3, 0, 4, 0, 2, 0, 2, 0, 2, 5, 5, 3, 3, 3, 3, 3, 0, 4, 2, 3,
4, 4, 4, 3, 4, 0, 0, 3, 4, 5, 4, 3, 3, 3, 3, 2, 5, 5, 4, 5, 5,
5, 4, 3, 5, 5, 5, 1, 3, 1, 0, 1, 0, 0, 3, 2, 0, 4, 2, 0, 5, 2,
3, 2, 4, 1, 3, 0, 3, 0, 4, 5, 4, 5, 4, 3, 0, 4, 2, 0, 5, 4,
},
{
0, 3, 0, 4, 0, 5, 0, 3, 0, 3, 4, 4, 3, 2, 3, 2, 3, 3, 3, 3, 3,
2, 4, 3, 3, 2, 2, 0, 3, 3, 3, 3, 3, 1, 3, 3, 3, 0, 4, 4, 3, 4,
4, 1, 1, 4, 4, 2, 0, 3, 1, 0, 1, 1, 0, 4, 1, 0, 2, 3, 1, 3, 3,
1, 3, 4, 0, 3, 0, 1, 0, 3, 1, 3, 0, 0, 1, 0, 2, 0, 0, 4, 4,
},
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
},
{
0, 3, 0, 3, 0, 2, 0, 3, 0, 1, 5, 4, 3, 3, 3, 1, 4, 2, 1, 2, 3,
4, 4, 2, 4, 4, 5, 0, 3, 1, 4, 3, 4, 0, 4, 3, 3, 3, 2, 3, 2, 5,
3, 4, 3, 2, 2, 3, 0, 0, 3, 0, 2, 1, 0, 1, 2, 0, 0, 0, 0, 2, 1,
1, 3, 1, 0, 2, 0, 4, 0, 3, 4, 4, 4, 5, 2, 0, 2, 0, 0, 1, 3,
},
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,
1, 1, 0, 0, 1, 1, 0, 0, 0, 4, 2, 1, 1, 0, 1, 0, 3, 2, 0, 0, 3,
1, 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0,
1, 0, 0, 0, 2, 0, 0, 0, 1, 4, 0, 4, 2, 1, 0, 0, 0, 0, 0, 1,
},
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 1, 0, 1, 0, 0, 0, 0, 3, 1, 0, 0, 0, 2, 0, 2, 1, 0, 0, 1,
2, 1, 0, 1, 1, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3,
1, 0, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2,
},
{
0, 4, 0, 4, 0, 4, 0, 3, 0, 4, 4, 3, 4, 2, 4, 3, 2, 0, 4, 4, 4,
3, 5, 3, 5, 3, 3, 2, 4, 2, 4, 3, 4, 3, 1, 4, 0, 2, 3, 4, 4, 4,
3, 3, 3, 4, 4, 4, 3, 4, 1, 3, 4, 3, 2, 1, 2, 1, 3, 3, 3, 4, 4,
3, 3, 5, 0, 4, 0, 3, 0, 4, 3, 3, 3, 2, 1, 0, 3, 0, 0, 3, 3,
},
{
0, 4, 0, 3, 0, 3, 0, 3, 0, 3, 5, 5, 3, 3, 3, 3, 4, 3, 4, 3, 3,
3, 4, 4, 4, 3, 3, 3, 3, 4, 3, 5, 3, 3, 1, 3, 2, 4, 5, 5, 5, 5,
4, 3, 4, 5, 5, 3, 2, 2, 3, 3, 3, 3, 2, 3, 3, 1, 2, 3, 2, 4, 3,
3, 3, 4, 0, 4, 0, 2, 0, 4, 3, 2, 2, 1, 2, 0, 3, 0, 0, 4, 1,
},
};
#define MINIMUM_DATA_THRESHOLD 4
void JapaneseContextAnalysis::HandleData(const char* aBuf, uint32_t aLen) {
uint32_t charLen;
int32_t order;
uint32_t i;
if (mDone) return;
// The buffer we got is byte oriented, and a character may span in more than
// one buffers. In case the last one or two byte in last buffer is not
// complete, we record how many byte needed to complete that character and
// skip these bytes here. We can choose to record those bytes as well and
// analyse the character once it is complete, but since a character will not
// make much difference, by simply skipping this character will simply our
// logic and improve performance.
for (i = mNeedToSkipCharNum; i < aLen;) {
order = GetOrder(aBuf + i, &charLen);
i += charLen;
if (i > aLen) {
mNeedToSkipCharNum = i - aLen;
mLastCharOrder = -1;
} else {
if (order != -1 && mLastCharOrder != -1) {
mTotalRel++;
if (mTotalRel > MAX_REL_THRESHOLD) {
mDone = true;
break;
}
mRelSample[jp2CharContext[mLastCharOrder][order]]++;
}
mLastCharOrder = order;
}
}
}
void JapaneseContextAnalysis::Reset() {
mTotalRel = 0;
for (uint32_t i = 0; i < NUM_OF_CATEGORY; i++) mRelSample[i] = 0;
mNeedToSkipCharNum = 0;
mLastCharOrder = -1;
mDone = false;
mDataThreshold = 0;
}
#define DONT_KNOW (float)-1
float JapaneseContextAnalysis::GetConfidence(void) {
// This is just one way to calculate confidence. It works well for me.
if (mTotalRel > mDataThreshold)
return ((float)(mTotalRel - mRelSample[0])) / mTotalRel;
else
return (float)DONT_KNOW;
}
int32_t SJISContextAnalysis::GetOrder(const char* str, uint32_t* charLen) {
// find out current char's byte length
if (((unsigned char)*str >= (unsigned char)0x81 &&
(unsigned char)*str <= (unsigned char)0x9f) ||
((unsigned char)*str >= (unsigned char)0xe0 &&
(unsigned char)*str <= (unsigned char)0xfc))
*charLen = 2;
else
*charLen = 1;
// return its order if it is hiragana
if (*str == '\202' && (unsigned char)*(str + 1) >= (unsigned char)0x9f &&
(unsigned char)*(str + 1) <= (unsigned char)0xf1)
return (unsigned char)*(str + 1) - (unsigned char)0x9f;
return -1;
}
int32_t EUCJPContextAnalysis::GetOrder(const char* str, uint32_t* charLen) {
// find out current char's byte length
if ((unsigned char)*str == (unsigned char)0x8e ||
((unsigned char)*str >= (unsigned char)0xa1 &&
(unsigned char)*str <= (unsigned char)0xfe))
*charLen = 2;
else if ((unsigned char)*str == (unsigned char)0x8f)
*charLen = 3;
else
*charLen = 1;
// return its order if it is hiragana
if ((unsigned char)*str == (unsigned char)0xa4 &&
(unsigned char)*(str + 1) >= (unsigned char)0xa1 &&
(unsigned char)*(str + 1) <= (unsigned char)0xf3)
return (unsigned char)*(str + 1) - (unsigned char)0xa1;
return -1;
}

Просмотреть файл

@ -1,97 +0,0 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef __JPCNTX_H__
#define __JPCNTX_H__
#define NUM_OF_CATEGORY 6
#include "nscore.h"
#define ENOUGH_REL_THRESHOLD 100
#define MAX_REL_THRESHOLD 1000
// hiragana frequency category table
extern const uint8_t jp2CharContext[83][83];
class JapaneseContextAnalysis {
public:
JapaneseContextAnalysis() { Reset(); }
void HandleData(const char* aBuf, uint32_t aLen);
void HandleOneChar(const char* aStr, uint32_t aCharLen) {
int32_t order;
// if we received enough data, stop here
if (mTotalRel > MAX_REL_THRESHOLD) mDone = true;
if (mDone) return;
// Only 2-bytes characters are of our interest
order = (aCharLen == 2) ? GetOrder(aStr) : -1;
if (order != -1 && mLastCharOrder != -1) {
mTotalRel++;
// count this sequence to its category counter
mRelSample[jp2CharContext[mLastCharOrder][order]]++;
}
mLastCharOrder = order;
}
float GetConfidence(void);
void Reset();
bool GotEnoughData() { return mTotalRel > ENOUGH_REL_THRESHOLD; }
protected:
virtual int32_t GetOrder(const char* str, uint32_t* charLen) = 0;
virtual int32_t GetOrder(const char* str) = 0;
// category counters, each integer counts sequences in its category
uint32_t mRelSample[NUM_OF_CATEGORY];
// total sequence received
uint32_t mTotalRel;
// Number of sequences needed to trigger detection
uint32_t mDataThreshold;
// The order of previous char
int32_t mLastCharOrder;
// if last byte in current buffer is not the last byte of a character, we
// need to know how many byte to skip in next buffer.
uint32_t mNeedToSkipCharNum;
// If this flag is set to true, detection is done and conclusion has been made
bool mDone;
};
class SJISContextAnalysis : public JapaneseContextAnalysis {
// SJISContextAnalysis(){};
protected:
int32_t GetOrder(const char* str, uint32_t* charLen) override;
int32_t GetOrder(const char* str) override {
// We only interested in Hiragana, so first byte is '\202'
if (*str == '\202' && (unsigned char)*(str + 1) >= (unsigned char)0x9f &&
(unsigned char)*(str + 1) <= (unsigned char)0xf1)
return (unsigned char)*(str + 1) - (unsigned char)0x9f;
return -1;
}
};
class EUCJPContextAnalysis : public JapaneseContextAnalysis {
protected:
int32_t GetOrder(const char* str, uint32_t* charLen) override;
int32_t GetOrder(const char* str) override
// We only interested in Hiragana, so first byte is '\244'
{
if (*str == '\244' && (unsigned char)*(str + 1) >= (unsigned char)0xa1 &&
(unsigned char)*(str + 1) <= (unsigned char)0xf3)
return (unsigned char)*(str + 1) - (unsigned char)0xa1;
return -1;
}
};
#endif /* __JPCNTX_H__ */

Просмотреть файл

@ -1,21 +0,0 @@
# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
# vim: set filetype=python:
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
UNIFIED_SOURCES += [
'CharDistribution.cpp',
'JpCntx.cpp',
'nsCharSetProber.cpp',
'nsEscCharsetProber.cpp',
'nsEscSM.cpp',
'nsEUCJPProber.cpp',
'nsMBCSGroupProber.cpp',
'nsMBCSSM.cpp',
'nsSJISProber.cpp',
'nsUniversalDetector.cpp',
'nsUTF8Prober.cpp',
]
FINAL_LIBRARY = 'xul'

Просмотреть файл

@ -1,88 +0,0 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsCharSetProber.h"
// This filter applies to all scripts which do not use English characters
bool nsCharSetProber::FilterWithoutEnglishLetters(const char* aBuf,
uint32_t aLen, char** newBuf,
uint32_t& newLen) {
char* newptr;
char *prevPtr, *curPtr;
bool meetMSB = false;
newptr = *newBuf = (char*)malloc(aLen);
if (!newptr) return false;
for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf + aLen; curPtr++) {
if (*curPtr & 0x80) {
meetMSB = true;
} else if (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') ||
*curPtr > 'z') {
// current char is a symbol, most likely a punctuation. we treat it as
// segment delimiter
if (meetMSB && curPtr > prevPtr)
// this segment contains more than single symbol, and it has upper ASCII,
// we need to keep it
{
while (prevPtr < curPtr) *newptr++ = *prevPtr++;
prevPtr++;
*newptr++ = ' ';
meetMSB = false;
} else // ignore current segment. (either because it is just a symbol or
// just an English word)
prevPtr = curPtr + 1;
}
}
if (meetMSB && curPtr > prevPtr)
while (prevPtr < curPtr) *newptr++ = *prevPtr++;
newLen = newptr - *newBuf;
return true;
}
// This filter applies to all scripts which contain both English characters and
// upper ASCII characters.
bool nsCharSetProber::FilterWithEnglishLetters(const char* aBuf, uint32_t aLen,
char** newBuf,
uint32_t& newLen) {
// do filtering to reduce load to probers
char* newptr;
char *prevPtr, *curPtr;
bool isInTag = false;
newptr = *newBuf = (char*)malloc(aLen);
if (!newptr) return false;
for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf + aLen; curPtr++) {
if (*curPtr == '>')
isInTag = false;
else if (*curPtr == '<')
isInTag = true;
if (!(*curPtr & 0x80) &&
(*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z')) {
if (curPtr > prevPtr &&
!isInTag) // Current segment contains more than just a symbol
// and it is not inside a tag, keep it.
{
while (prevPtr < curPtr) *newptr++ = *prevPtr++;
prevPtr++;
*newptr++ = ' ';
} else
prevPtr = curPtr + 1;
}
}
// If the current segment contains more than just a symbol
// and it is not inside a tag then keep it.
if (!isInTag)
while (prevPtr < curPtr) *newptr++ = *prevPtr++;
newLen = newptr - *newBuf;
return true;
}

Просмотреть файл

@ -1,44 +0,0 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef nsCharSetProber_h__
#define nsCharSetProber_h__
#include "nscore.h"
//#define DEBUG_chardet // Uncomment this for debug dump.
typedef enum {
eDetecting = 0, // We are still detecting, no sure answer yet, but caller can
// ask for confidence.
eFoundIt = 1, // That's a positive answer
eNotMe = 2 // Negative answer
} nsProbingState;
#define SHORTCUT_THRESHOLD (float)0.95
class nsCharSetProber {
public:
virtual ~nsCharSetProber() {}
virtual const char* GetCharSetName() = 0;
virtual nsProbingState HandleData(const char* aBuf, uint32_t aLen) = 0;
virtual nsProbingState GetState(void) = 0;
virtual void Reset(void) = 0;
virtual float GetConfidence(void) = 0;
#ifdef DEBUG_chardet
virtual void DumpStatus(){};
#endif
// Helper functions used in the Latin1 and Group probers.
// both functions Allocate a new buffer for newBuf. This buffer should be
// freed by the caller using free().
// Both functions return false in case of memory allocation failure.
static bool FilterWithoutEnglishLetters(const char* aBuf, uint32_t aLen,
char** newBuf, uint32_t& newLen);
static bool FilterWithEnglishLetters(const char* aBuf, uint32_t aLen,
char** newBuf, uint32_t& newLen);
};
#endif /* nsCharSetProber_h__ */

Просмотреть файл

@ -1,85 +0,0 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef nsCodingStateMachine_h__
#define nsCodingStateMachine_h__
#include "mozilla/ArrayUtils.h"
#include "nsPkgInt.h"
/* Apart from these 3 generic states, machine states are specific to
* each charset prober.
*/
#define eStart 0
#define eError 1
#define eItsMe 2
#define GETCLASS(c) GETFROMPCK(((unsigned char)(c)), mModel->classTable)
// state machine model
typedef struct {
nsPkgInt classTable;
uint32_t classFactor;
nsPkgInt stateTable;
const uint32_t* charLenTable;
#ifdef DEBUG
const size_t charLenTableLength;
#endif
const char* name;
} SMModel;
class nsCodingStateMachine {
public:
explicit nsCodingStateMachine(const SMModel* sm) : mModel(sm) {
mCurrentState = eStart;
}
uint32_t NextState(char c) {
// for each byte we get its class , if it is first byte, we also get byte
// length
uint32_t byteCls = GETCLASS(c);
if (mCurrentState == eStart) {
mCurrentBytePos = 0;
MOZ_ASSERT(byteCls < mModel->charLenTableLength);
mCurrentCharLen = mModel->charLenTable[byteCls];
}
// from byte's class and stateTable, we get its next state
mCurrentState = GETFROMPCK(mCurrentState * mModel->classFactor + byteCls,
mModel->stateTable);
mCurrentBytePos++;
return mCurrentState;
}
uint32_t GetCurrentCharLen(void) { return mCurrentCharLen; }
void Reset(void) { mCurrentState = eStart; }
const char* GetCodingStateMachine() { return mModel->name; }
protected:
uint32_t mCurrentState;
uint32_t mCurrentCharLen;
uint32_t mCurrentBytePos;
const SMModel* mModel;
};
extern const SMModel UTF8SMModel;
extern const SMModel Big5SMModel;
extern const SMModel EUCJPSMModel;
extern const SMModel EUCKRSMModel;
extern const SMModel EUCTWSMModel;
extern const SMModel GB18030SMModel;
extern const SMModel SJISSMModel;
extern const SMModel HZSMModel;
extern const SMModel ISO2022CNSMModel;
extern const SMModel ISO2022JPSMModel;
extern const SMModel ISO2022KRSMModel;
#undef CHAR_LEN_TABLE
#ifdef DEBUG
# define CHAR_LEN_TABLE(x) x, mozilla::ArrayLength(x)
#else
# define CHAR_LEN_TABLE(x) x
#endif
#endif /* nsCodingStateMachine_h__ */

Просмотреть файл

@ -1,60 +0,0 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// for japanese encoding, obeserve characteristic:
// 1, kana character (or hankaku?) often have hight frequency of appereance
// 2, kana character often exist in group
// 3, certain combination of kana is never used in japanese language
#include "nsEUCJPProber.h"
#include "nsDebug.h"
void nsEUCJPProber::Reset(void) {
mCodingSM->Reset();
mState = eDetecting;
mContextAnalyser.Reset();
mDistributionAnalyser.Reset();
}
nsProbingState nsEUCJPProber::HandleData(const char* aBuf, uint32_t aLen) {
NS_ASSERTION(aLen, "HandleData called with empty buffer");
uint32_t codingState;
for (uint32_t i = 0; i < aLen; i++) {
codingState = mCodingSM->NextState(aBuf[i]);
if (codingState == eItsMe) {
mState = eFoundIt;
break;
}
if (codingState == eStart) {
uint32_t charLen = mCodingSM->GetCurrentCharLen();
if (i == 0) {
mLastChar[1] = aBuf[0];
mContextAnalyser.HandleOneChar(mLastChar, charLen);
mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
} else {
mContextAnalyser.HandleOneChar(aBuf + i - 1, charLen);
mDistributionAnalyser.HandleOneChar(aBuf + i - 1, charLen);
}
}
}
mLastChar[0] = aBuf[aLen - 1];
if (mState == eDetecting)
if (mContextAnalyser.GotEnoughData() &&
GetConfidence() > SHORTCUT_THRESHOLD)
mState = eFoundIt;
return mState;
}
float nsEUCJPProber::GetConfidence(void) {
float contxtCf = mContextAnalyser.GetConfidence();
float distribCf = mDistributionAnalyser.GetConfidence();
return (contxtCf > distribCf ? contxtCf : distribCf);
}

Просмотреть файл

@ -1,42 +0,0 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// for S-JIS encoding, obeserve characteristic:
// 1, kana character (or hankaku?) often have hight frequency of appereance
// 2, kana character often exist in group
// 3, certain combination of kana is never used in japanese language
#ifndef nsEUCJPProber_h__
#define nsEUCJPProber_h__
#include "nsCharSetProber.h"
#include "nsCodingStateMachine.h"
#include "JpCntx.h"
#include "CharDistribution.h"
class nsEUCJPProber : public nsCharSetProber {
public:
nsEUCJPProber() {
mCodingSM = new nsCodingStateMachine(&EUCJPSMModel);
Reset();
}
virtual ~nsEUCJPProber(void) { delete mCodingSM; }
nsProbingState HandleData(const char* aBuf, uint32_t aLen) override;
const char* GetCharSetName() override { return "EUC-JP"; }
nsProbingState GetState(void) override { return mState; }
void Reset(void) override;
float GetConfidence(void) override;
protected:
nsCodingStateMachine* mCodingSM;
nsProbingState mState;
EUCJPContextAnalysis mContextAnalyser;
EUCJPDistributionAnalysis mDistributionAnalyser;
char mLastChar[2];
};
#endif /* nsEUCJPProber_h__ */

Просмотреть файл

@ -1,37 +0,0 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsEscCharsetProber.h"
#include "nsUniversalDetector.h"
nsEscCharSetProber::nsEscCharSetProber() {
mCodingSM = new nsCodingStateMachine(&ISO2022JPSMModel);
mState = eDetecting;
mDetectedCharset = nullptr;
}
nsEscCharSetProber::~nsEscCharSetProber(void) {}
void nsEscCharSetProber::Reset(void) {
mState = eDetecting;
mCodingSM->Reset();
mDetectedCharset = nullptr;
}
nsProbingState nsEscCharSetProber::HandleData(const char* aBuf, uint32_t aLen) {
uint32_t codingState;
uint32_t i;
for (i = 0; i < aLen && mState == eDetecting; i++) {
codingState = mCodingSM->NextState(aBuf[i]);
if (codingState == eItsMe) {
mState = eFoundIt;
mDetectedCharset = mCodingSM->GetCodingStateMachine();
return mState;
}
}
return mState;
}

Просмотреть файл

@ -1,31 +0,0 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef nsEscCharSetProber_h__
#define nsEscCharSetProber_h__
#include "nsCharSetProber.h"
#include "nsCodingStateMachine.h"
#include "nsAutoPtr.h"
class nsEscCharSetProber : public nsCharSetProber {
public:
nsEscCharSetProber();
virtual ~nsEscCharSetProber(void);
nsProbingState HandleData(const char* aBuf, uint32_t aLen) override;
const char* GetCharSetName() override { return mDetectedCharset; }
nsProbingState GetState(void) override { return mState; }
void Reset(void) override;
float GetConfidence(void) override { return (float)0.99; }
protected:
void GetDistribution(uint32_t aCharLen, const char* aStr);
nsAutoPtr<nsCodingStateMachine> mCodingSM;
nsProbingState mState;
const char* mDetectedCharset;
};
#endif /* nsEscCharSetProber_h__ */

Просмотреть файл

@ -1,70 +0,0 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsCodingStateMachine.h"
static const uint32_t ISO2022JP_cls[256 / 8] = {
PCK4BITS(2, 0, 0, 0, 0, 0, 0, 0), // 00 - 07
PCK4BITS(0, 0, 0, 0, 0, 0, 2, 2), // 08 - 0f
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 10 - 17
PCK4BITS(0, 0, 0, 1, 0, 0, 0, 0), // 18 - 1f
PCK4BITS(0, 0, 0, 0, 7, 0, 0, 0), // 20 - 27
PCK4BITS(3, 0, 0, 0, 0, 0, 0, 0), // 28 - 2f
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 30 - 37
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 38 - 3f
PCK4BITS(6, 0, 4, 0, 8, 0, 0, 0), // 40 - 47
PCK4BITS(0, 9, 5, 0, 0, 0, 0, 0), // 48 - 4f
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 50 - 57
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 58 - 5f
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 60 - 67
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 68 - 6f
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 70 - 77
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 78 - 7f
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 80 - 87
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 88 - 8f
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 90 - 97
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 98 - 9f
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a0 - a7
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e0 - e7
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e8 - ef
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // f0 - f7
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2) // f8 - ff
};
static const uint32_t ISO2022JP_st[9] = {
PCK4BITS(eStart, 3, eError, eStart, eStart, eStart, eStart,
eStart), // 00-07
PCK4BITS(eStart, eStart, eError, eError, eError, eError, eError,
eError), // 08-0f
PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe,
eItsMe), // 10-17
PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eError,
eError), // 18-1f
PCK4BITS(eError, 5, eError, eError, eError, 4, eError, eError), // 20-27
PCK4BITS(eError, eError, eError, 6, eItsMe, eError, eItsMe,
eError), // 28-2f
PCK4BITS(eError, eError, eError, eError, eError, eError, eItsMe,
eItsMe), // 30-37
PCK4BITS(eError, eError, eError, eItsMe, eError, eError, eError,
eError), // 38-3f
PCK4BITS(eError, eError, eError, eError, eItsMe, eError, eStart,
eStart) // 40-47
};
static const uint32_t ISO2022JPCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
const SMModel ISO2022JPSMModel = {
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_cls},
10,
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_st},
CHAR_LEN_TABLE(ISO2022JPCharLenTable),
"ISO-2022-JP",
};

Просмотреть файл

@ -1,149 +0,0 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include <stdio.h>
#include "nsMBCSGroupProber.h"
#include "nsUniversalDetector.h"
#if defined(DEBUG_chardet) || defined(DEBUG_jgmyers)
const char* ProberName[] = {
"UTF8",
"SJIS",
"EUCJP",
};
#endif
nsMBCSGroupProber::nsMBCSGroupProber() {
mProbers[0] = new nsUTF8Prober();
mProbers[1] = new nsSJISProber();
mProbers[2] = new nsEUCJPProber();
Reset();
}
nsMBCSGroupProber::~nsMBCSGroupProber() {
for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) {
delete mProbers[i];
}
}
const char* nsMBCSGroupProber::GetCharSetName() {
if (mBestGuess == -1) {
GetConfidence();
if (mBestGuess == -1) mBestGuess = 0;
}
return mProbers[mBestGuess]->GetCharSetName();
}
void nsMBCSGroupProber::Reset(void) {
mActiveNum = 0;
for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) {
if (mProbers[i]) {
mProbers[i]->Reset();
mIsActive[i] = true;
++mActiveNum;
} else
mIsActive[i] = false;
}
mBestGuess = -1;
mState = eDetecting;
mKeepNext = 0;
}
nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, uint32_t aLen) {
nsProbingState st;
uint32_t start = 0;
uint32_t keepNext = mKeepNext;
// do filtering to reduce load to probers
for (uint32_t pos = 0; pos < aLen; ++pos) {
if (aBuf[pos] & 0x80) {
if (!keepNext) start = pos;
keepNext = 2;
} else if (keepNext) {
if (--keepNext == 0) {
for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) {
if (!mIsActive[i]) continue;
st = mProbers[i]->HandleData(aBuf + start, pos + 1 - start);
if (st == eFoundIt) {
mBestGuess = i;
mState = eFoundIt;
return mState;
}
}
}
}
}
if (keepNext) {
for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) {
if (!mIsActive[i]) continue;
st = mProbers[i]->HandleData(aBuf + start, aLen - start);
if (st == eFoundIt) {
mBestGuess = i;
mState = eFoundIt;
return mState;
}
}
}
mKeepNext = keepNext;
return mState;
}
float nsMBCSGroupProber::GetConfidence(void) {
uint32_t i;
float bestConf = 0.0, cf;
switch (mState) {
case eFoundIt:
return (float)0.99;
case eNotMe:
return (float)0.01;
default:
for (i = 0; i < NUM_OF_PROBERS; i++) {
if (!mIsActive[i]) continue;
cf = mProbers[i]->GetConfidence();
if (bestConf < cf) {
bestConf = cf;
mBestGuess = i;
}
}
}
return bestConf;
}
#ifdef DEBUG_chardet
void nsMBCSGroupProber::DumpStatus() {
uint32_t i;
float cf;
GetConfidence();
for (i = 0; i < NUM_OF_PROBERS; i++) {
if (!mIsActive[i])
printf(" MBCS inactive: [%s] (confidence is too low).\r\n",
ProberName[i]);
else {
cf = mProbers[i]->GetConfidence();
printf(" MBCS %1.3f: [%s]\r\n", cf, ProberName[i]);
}
}
}
#endif
#ifdef DEBUG_jgmyers
void nsMBCSGroupProber::GetDetectorState(
nsUniversalDetector::DetectorState (
&states)[nsUniversalDetector::NumDetectors],
uint32_t& offset) {
for (uint32_t i = 0; i < NUM_OF_PROBERS; ++i) {
states[offset].name = ProberName[i];
states[offset].isActive = mIsActive[i];
states[offset].confidence =
mIsActive[i] ? mProbers[i]->GetConfidence() : 0.0;
++offset;
}
}
#endif /* DEBUG_jgmyers */

Просмотреть файл

@ -1,43 +0,0 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef nsMBCSGroupProber_h__
#define nsMBCSGroupProber_h__
#include "nsSJISProber.h"
#include "nsUTF8Prober.h"
#include "nsEUCJPProber.h"
#define NUM_OF_PROBERS 3
class nsMBCSGroupProber : public nsCharSetProber {
public:
nsMBCSGroupProber();
virtual ~nsMBCSGroupProber();
nsProbingState HandleData(const char* aBuf, uint32_t aLen) override;
const char* GetCharSetName() override;
nsProbingState GetState(void) override { return mState; }
void Reset(void) override;
float GetConfidence(void) override;
#ifdef DEBUG_chardet
void DumpStatus();
#endif
#ifdef DEBUG_jgmyers
void GetDetectorState(nsUniversalDetector::DetectorState (
&states)[nsUniversalDetector::NumDetectors],
uint32_t& offset);
#endif
protected:
nsProbingState mState;
nsCharSetProber* mProbers[NUM_OF_PROBERS];
bool mIsActive[NUM_OF_PROBERS];
int32_t mBestGuess;
uint32_t mActiveNum;
uint32_t mKeepNext;
};
#endif /* nsMBCSGroupProber_h__ */

Просмотреть файл

@ -1,200 +0,0 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsCodingStateMachine.h"
/*
Modification from frank tang's original work:
. 0x00 is allowed as a legal character. Since some web pages contains this char
in text stream.
*/
static const uint32_t EUCJP_cls[256 / 8] = {
// PCK4BITS(5,4,4,4,4,4,4,4), // 00 - 07
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 00 - 07
PCK4BITS(4, 4, 4, 4, 4, 4, 5, 5), // 08 - 0f
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 10 - 17
PCK4BITS(4, 4, 4, 5, 4, 4, 4, 4), // 18 - 1f
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 20 - 27
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 28 - 2f
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 30 - 37
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 38 - 3f
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 40 - 47
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 48 - 4f
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 50 - 57
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 58 - 5f
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 60 - 67
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 68 - 6f
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 70 - 77
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 78 - 7f
PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // 80 - 87
PCK4BITS(5, 5, 5, 5, 5, 5, 1, 3), // 88 - 8f
PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // 90 - 97
PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // 98 - 9f
PCK4BITS(5, 2, 2, 2, 2, 2, 2, 2), // a0 - a7
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // e0 - e7
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // e8 - ef
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // f0 - f7
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 5) // f8 - ff
};
static const uint32_t EUCJP_st[5] = {
PCK4BITS(3, 4, 3, 5, eStart, eError, eError, eError), // 00-07
PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe,
eItsMe), // 08-0f
PCK4BITS(eItsMe, eItsMe, eStart, eError, eStart, eError, eError,
eError), // 10-17
PCK4BITS(eError, eError, eStart, eError, eError, eError, 3,
eError), // 18-1f
PCK4BITS(3, eError, eError, eError, eStart, eStart, eStart,
eStart) // 20-27
};
static const uint32_t EUCJPCharLenTable[] = {2, 2, 2, 3, 1, 0};
const SMModel EUCJPSMModel = {
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_cls},
6,
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_st},
CHAR_LEN_TABLE(EUCJPCharLenTable),
"EUC-JP",
};
// sjis
static const uint32_t SJIS_cls[256 / 8] = {
// PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 00 - 07
PCK4BITS(1, 1, 1, 1, 1, 1, 0, 0), // 08 - 0f
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 10 - 17
PCK4BITS(1, 1, 1, 0, 1, 1, 1, 1), // 18 - 1f
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 20 - 27
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 28 - 2f
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 30 - 37
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 38 - 3f
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 40 - 47
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 48 - 4f
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 50 - 57
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 58 - 5f
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 60 - 67
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 68 - 6f
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 70 - 77
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 1), // 78 - 7f
PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 80 - 87
PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 88 - 8f
PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 90 - 97
PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 98 - 9f
// 0xa0 is illegal in sjis encoding, but some pages does
// contain such byte. We need to be more error forgiven.
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a0 - a7
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df
PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // e0 - e7
PCK4BITS(3, 3, 3, 3, 3, 4, 4, 4), // e8 - ef
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // f0 - f7
PCK4BITS(4, 4, 4, 4, 4, 0, 0, 0) // f8 - ff
};
static const uint32_t SJIS_st[3] = {
PCK4BITS(eError, eStart, eStart, 3, eError, eError, eError,
eError), // 00-07
PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe,
eItsMe), // 08-0f
PCK4BITS(eItsMe, eItsMe, eError, eError, eStart, eStart, eStart,
eStart) // 10-17
};
static const uint32_t SJISCharLenTable[] = {0, 1, 1, 2, 0, 0};
const SMModel SJISSMModel = {
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_cls},
6,
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_st},
CHAR_LEN_TABLE(SJISCharLenTable),
"Shift_JIS",
};
static const uint32_t UTF8_cls[256 / 8] = {
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 00 - 07
PCK4BITS(1, 1, 1, 1, 1, 1, 0, 0), // 08 - 0f
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 10 - 17
PCK4BITS(1, 1, 1, 0, 1, 1, 1, 1), // 18 - 1f
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 20 - 27
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 28 - 2f
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 30 - 37
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 38 - 3f
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 40 - 47
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 48 - 4f
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 50 - 57
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 58 - 5f
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 60 - 67
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 68 - 6f
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 70 - 77
PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 78 - 7f
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 80 - 87
PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 88 - 8f
PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 90 - 97
PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 98 - 9f
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // a0 - a7
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // a8 - af
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // b0 - b7
PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // b8 - bf
PCK4BITS(0, 0, 5, 5, 5, 5, 5, 5), // c0 - c7
PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // c8 - cf
PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // d0 - d7
PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // d8 - df
PCK4BITS(6, 7, 7, 7, 7, 7, 7, 7), // e0 - e7
PCK4BITS(7, 7, 7, 7, 7, 8, 7, 7), // e8 - ef
PCK4BITS(9, 10, 10, 10, 11, 0, 0, 0), // f0 - f7
PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0) // f8 - ff
};
static const uint32_t UTF8_st[15] = {
PCK4BITS(eError, eStart, eError, eError, eError, 3, 4, 5), // 00 - 07
PCK4BITS(6, 7, 8, 9, eError, eError, eError, eError), // 08 - 0f
PCK4BITS(eError, eError, eError, eError, eError, eError, eError,
eError), // 10 - 17
PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe,
eItsMe), // 18 - 1f
PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eError, eError, eStart,
eStart), // 20 - 27
PCK4BITS(eStart, eError, eError, eError, eError, eError, eError,
eError), // 28 - 2f
PCK4BITS(eError, eError, eError, eError, 3, eError, eError,
eError), // 30 - 37
PCK4BITS(eError, eError, eError, eError, eError, eError, 3, 3), // 38 - 3f
PCK4BITS(3, eError, eError, eError, eError, eError, eError,
eError), // 40 - 47
PCK4BITS(eError, eError, 3, 3, eError, eError, eError, eError), // 48 - 4f
PCK4BITS(eError, eError, eError, eError, eError, eError, 5, 5), // 50 - 57
PCK4BITS(eError, eError, eError, eError, eError, eError, eError,
eError), // 58 - 5f
PCK4BITS(eError, eError, 5, 5, 5, eError, eError, eError), // 60 - 67
PCK4BITS(eError, eError, eError, eError, eError, eError, 5,
eError), // 68 - 6f
PCK4BITS(eError, eError, eError, eError, eError, eError, eError,
eError) // 70 - 77
};
static const uint32_t UTF8CharLenTable[] = {0, 1, 0, 0, 0, 2, 3, 3, 3, 4, 4, 4};
const SMModel UTF8SMModel = {
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_cls},
12,
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_st},
CHAR_LEN_TABLE(UTF8CharLenTable),
"UTF-8",
};

Просмотреть файл

@ -1,43 +0,0 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef nsPkgInt_h__
#define nsPkgInt_h__
#include "nscore.h"
typedef enum { eIdxSft4bits = 3, eIdxSft8bits = 2, eIdxSft16bits = 1 } nsIdxSft;
typedef enum { eSftMsk4bits = 7, eSftMsk8bits = 3, eSftMsk16bits = 1 } nsSftMsk;
typedef enum { eBitSft4bits = 2, eBitSft8bits = 3, eBitSft16bits = 4 } nsBitSft;
typedef enum {
eUnitMsk4bits = 0x0000000FL,
eUnitMsk8bits = 0x000000FFL,
eUnitMsk16bits = 0x0000FFFFL
} nsUnitMsk;
typedef struct nsPkgInt {
nsIdxSft idxsft;
nsSftMsk sftmsk;
nsBitSft bitsft;
nsUnitMsk unitmsk;
const uint32_t* const data;
} nsPkgInt;
#define PCK16BITS(a, b) ((uint32_t)(((b) << 16) | (a)))
#define PCK8BITS(a, b, c, d) \
PCK16BITS(((uint32_t)(((b) << 8) | (a))), ((uint32_t)(((d) << 8) | (c))))
#define PCK4BITS(a, b, c, d, e, f, g, h) \
PCK8BITS(((uint32_t)(((b) << 4) | (a))), ((uint32_t)(((d) << 4) | (c))), \
((uint32_t)(((f) << 4) | (e))), ((uint32_t)(((h) << 4) | (g))))
#define GETFROMPCK(i, c) \
(((((c).data)[(i) >> (c).idxsft]) >> (((i) & (c).sftmsk) << (c).bitsft)) & \
(c).unitmsk)
#endif /* nsPkgInt_h__ */

Просмотреть файл

@ -1,59 +0,0 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// for S-JIS encoding, obeserve characteristic:
// 1, kana character (or hankaku?) often have hight frequency of appereance
// 2, kana character often exist in group
// 3, certain combination of kana is never used in japanese language
#include "nsSJISProber.h"
#include "nsDebug.h"
void nsSJISProber::Reset(void) {
mCodingSM->Reset();
mState = eDetecting;
mContextAnalyser.Reset();
mDistributionAnalyser.Reset();
}
nsProbingState nsSJISProber::HandleData(const char* aBuf, uint32_t aLen) {
NS_ASSERTION(aLen, "HandleData called with empty buffer");
uint32_t codingState;
for (uint32_t i = 0; i < aLen; i++) {
codingState = mCodingSM->NextState(aBuf[i]);
if (codingState == eItsMe) {
mState = eFoundIt;
break;
}
if (codingState == eStart) {
uint32_t charLen = mCodingSM->GetCurrentCharLen();
if (i == 0) {
mLastChar[1] = aBuf[0];
mContextAnalyser.HandleOneChar(mLastChar + 2 - charLen, charLen);
mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
} else {
mContextAnalyser.HandleOneChar(aBuf + i + 1 - charLen, charLen);
mDistributionAnalyser.HandleOneChar(aBuf + i - 1, charLen);
}
}
}
mLastChar[0] = aBuf[aLen - 1];
if (mState == eDetecting)
if (mContextAnalyser.GotEnoughData() &&
GetConfidence() > SHORTCUT_THRESHOLD)
mState = eFoundIt;
return mState;
}
float nsSJISProber::GetConfidence(void) {
float contxtCf = mContextAnalyser.GetConfidence();
float distribCf = mDistributionAnalyser.GetConfidence();
return (contxtCf > distribCf ? contxtCf : distribCf);
}

Просмотреть файл

@ -1,42 +0,0 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// for S-JIS encoding, obeserve characteristic:
// 1, kana character (or hankaku?) often have hight frequency of appereance
// 2, kana character often exist in group
// 3, certain combination of kana is never used in japanese language
#ifndef nsSJISProber_h__
#define nsSJISProber_h__
#include "nsCharSetProber.h"
#include "nsCodingStateMachine.h"
#include "JpCntx.h"
#include "CharDistribution.h"
class nsSJISProber : public nsCharSetProber {
public:
nsSJISProber() {
mCodingSM = new nsCodingStateMachine(&SJISSMModel);
Reset();
}
virtual ~nsSJISProber(void) { delete mCodingSM; }
nsProbingState HandleData(const char* aBuf, uint32_t aLen) override;
const char* GetCharSetName() override { return "Shift_JIS"; }
nsProbingState GetState(void) override { return mState; }
void Reset(void) override;
float GetConfidence(void) override;
protected:
nsCodingStateMachine* mCodingSM;
nsProbingState mState;
SJISContextAnalysis mContextAnalyser;
SJISDistributionAnalysis mDistributionAnalyser;
char mLastChar[2];
};
#endif /* nsSJISProber_h__ */

Просмотреть файл

@ -1,43 +0,0 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsUTF8Prober.h"
void nsUTF8Prober::Reset(void) {
mCodingSM->Reset();
mNumOfMBChar = 0;
mState = eDetecting;
}
nsProbingState nsUTF8Prober::HandleData(const char* aBuf, uint32_t aLen) {
uint32_t codingState;
for (uint32_t i = 0; i < aLen; i++) {
codingState = mCodingSM->NextState(aBuf[i]);
if (codingState == eItsMe) {
mState = eFoundIt;
break;
}
if (codingState == eStart) {
if (mCodingSM->GetCurrentCharLen() >= 2) mNumOfMBChar++;
}
}
if (mState == eDetecting)
if (GetConfidence() > SHORTCUT_THRESHOLD) mState = eFoundIt;
return mState;
}
#define ONE_CHAR_PROB (float)0.50
float nsUTF8Prober::GetConfidence(void) {
float unlike = (float)0.99;
if (mNumOfMBChar < 6) {
for (uint32_t i = 0; i < mNumOfMBChar; i++) unlike *= ONE_CHAR_PROB;
return (float)1.0 - unlike;
} else
return (float)0.99;
}

Просмотреть файл

@ -1,32 +0,0 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef nsUTF8Prober_h__
#define nsUTF8Prober_h__
#include "nsCharSetProber.h"
#include "nsCodingStateMachine.h"
class nsUTF8Prober : public nsCharSetProber {
public:
nsUTF8Prober() {
mNumOfMBChar = 0;
mCodingSM = new nsCodingStateMachine(&UTF8SMModel);
Reset();
}
virtual ~nsUTF8Prober() { delete mCodingSM; }
nsProbingState HandleData(const char* aBuf, uint32_t aLen) override;
const char* GetCharSetName() override { return "UTF-8"; }
nsProbingState GetState(void) override { return mState; }
void Reset(void) override;
float GetConfidence(void) override;
protected:
nsCodingStateMachine* mCodingSM;
nsProbingState mState;
uint32_t mNumOfMBChar;
};
#endif /* nsUTF8Prober_h__ */

Просмотреть файл

@ -1,179 +0,0 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nscore.h"
#include "nsUniversalDetector.h"
#include "nsMBCSGroupProber.h"
#include "nsEscCharsetProber.h"
nsUniversalDetector::nsUniversalDetector() {
mDone = false;
mBestGuess = -1; // illegal value as signal
mInTag = false;
mMultibyteProber = nullptr;
mEscCharSetProber = nullptr;
mStart = true;
mDetectedCharset = nullptr;
mGotData = false;
mInputState = ePureAscii;
mLastChar = '\0';
}
nsUniversalDetector::~nsUniversalDetector() {
delete mMultibyteProber;
delete mEscCharSetProber;
}
void nsUniversalDetector::Reset() {
mDone = false;
mBestGuess = -1; // illegal value as signal
mInTag = false;
mStart = true;
mDetectedCharset = nullptr;
mGotData = false;
mInputState = ePureAscii;
mLastChar = '\0';
if (mMultibyteProber) {
mMultibyteProber->Reset();
}
if (mEscCharSetProber) {
mEscCharSetProber->Reset();
}
}
//---------------------------------------------------------------------
#define SHORTCUT_THRESHOLD (float)0.95
#define MINIMUM_THRESHOLD (float)0.20
nsresult nsUniversalDetector::HandleData(const char* aBuf, uint32_t aLen) {
if (mDone) return NS_OK;
if (aLen > 0) mGotData = true;
// If the data starts with BOM, we know it is UTF
if (mStart) {
mStart = false;
if (aLen >= 2) {
switch (aBuf[0]) {
case '\xEF':
if ((aLen > 2) && ('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2])) {
// EF BB BF UTF-8 encoded BOM
mDetectedCharset = "UTF-8";
}
break;
case '\xFE':
if ('\xFF' == aBuf[1]) {
// FE FF UTF-16, big endian BOM
mDetectedCharset = "UTF-16BE";
}
break;
case '\xFF':
if ('\xFE' == aBuf[1]) {
// FF FE UTF-16, little endian BOM
mDetectedCharset = "UTF-16LE";
}
break;
} // switch
}
if (mDetectedCharset) {
mDone = true;
return NS_OK;
}
}
uint32_t i;
for (i = 0; i < aLen; i++) {
// other than 0xa0, if every othe character is ascii, the page is ascii
if (aBuf[i] & '\x80' &&
aBuf[i] != '\xA0') // Since many Ascii only page contains NBSP
{
// we got a non-ascii byte (high-byte)
if (mInputState != eHighbyte) {
// adjust state
mInputState = eHighbyte;
// kill mEscCharSetProber if it is active
if (mEscCharSetProber) {
delete mEscCharSetProber;
mEscCharSetProber = nullptr;
}
// start multibyte charset prober
if (!mMultibyteProber) {
mMultibyteProber = new nsMBCSGroupProber();
}
}
} else {
// ok, just pure ascii so far
if ((ePureAscii == mInputState) && (aBuf[i] == '\033')) {
// found escape character
mInputState = eEscAscii;
}
mLastChar = aBuf[i];
}
}
nsProbingState st;
switch (mInputState) {
case eEscAscii:
if (nullptr == mEscCharSetProber) {
mEscCharSetProber = new nsEscCharSetProber();
if (nullptr == mEscCharSetProber) return NS_ERROR_OUT_OF_MEMORY;
}
st = mEscCharSetProber->HandleData(aBuf, aLen);
if (st == eFoundIt) {
mDone = true;
mDetectedCharset = mEscCharSetProber->GetCharSetName();
}
break;
case eHighbyte:
st = mMultibyteProber->HandleData(aBuf, aLen);
if (st == eFoundIt) {
mDone = true;
mDetectedCharset = mMultibyteProber->GetCharSetName();
return NS_OK;
}
break;
default: // pure ascii
; // do nothing here
}
return NS_OK;
}
//---------------------------------------------------------------------
void nsUniversalDetector::DataEnd() {
if (!mGotData) {
// we haven't got any data yet, return immediately
// caller program sometimes call DataEnd before anything has been sent to
// detector
return;
}
if (mDetectedCharset) {
mDone = true;
Report(mDetectedCharset);
return;
}
switch (mInputState) {
case eHighbyte: {
// do not report anything because we are not confident of it, that's in
// fact a negative answer
if (mMultibyteProber->GetConfidence() > MINIMUM_THRESHOLD)
Report(mMultibyteProber->GetCharSetName());
} break;
case eEscAscii:
break;
default:;
}
}

Просмотреть файл

@ -1,37 +0,0 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef nsUniversalDetector_h__
#define nsUniversalDetector_h__
class nsCharSetProber;
typedef enum { ePureAscii = 0, eEscAscii = 1, eHighbyte = 2 } nsInputState;
class nsUniversalDetector {
public:
nsUniversalDetector();
virtual ~nsUniversalDetector();
virtual nsresult HandleData(const char* aBuf, uint32_t aLen);
virtual void DataEnd(void);
protected:
virtual void Report(const char* aCharset) = 0;
virtual void Reset();
nsInputState mInputState;
bool mDone;
bool mInTag;
bool mStart;
bool mGotData;
char mLastChar;
const char* mDetectedCharset;
int32_t mBestGuess;
uint32_t mLanguageFilter;
nsCharSetProber* mMultibyteProber;
nsCharSetProber* mEscCharSetProber;
};
#endif

Просмотреть файл

@ -1,8 +0,0 @@
# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
# vim: set filetype=python:
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
DIRS += ['base', 'xpcom']

Просмотреть файл

@ -1,15 +0,0 @@
# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
# vim: set filetype=python:
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
UNIFIED_SOURCES += [
'nsUdetXPCOMWrapper.cpp',
]
FINAL_LIBRARY = 'xul'
LOCAL_INCLUDES += [
'../base',
]

Просмотреть файл

@ -1,75 +0,0 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nscore.h"
#include "nsUniversalDetector.h"
#include "nsUdetXPCOMWrapper.h"
#include "nsCharSetProber.h" // for DumpStatus
#include "nsUniversalCharDetDll.h"
//---- for XPCOM
#include "nsIFactory.h"
#include "nsISupports.h"
#include "nsCOMPtr.h"
//---------------------------------------------------------------------
nsXPCOMDetector::nsXPCOMDetector() : nsUniversalDetector() {}
//---------------------------------------------------------------------
nsXPCOMDetector::~nsXPCOMDetector() {}
//---------------------------------------------------------------------
NS_IMPL_ISUPPORTS(nsXPCOMDetector, nsICharsetDetector)
//---------------------------------------------------------------------
NS_IMETHODIMP nsXPCOMDetector::Init(nsICharsetDetectionObserver* aObserver) {
NS_ASSERTION(mObserver == nullptr, "Init twice");
if (nullptr == aObserver) return NS_ERROR_ILLEGAL_VALUE;
mObserver = aObserver;
return NS_OK;
}
//----------------------------------------------------------
NS_IMETHODIMP nsXPCOMDetector::DoIt(const char* aBuf, uint32_t aLen,
bool* oDontFeedMe) {
NS_ASSERTION(mObserver != nullptr, "have not init yet");
if ((nullptr == aBuf) || (nullptr == oDontFeedMe))
return NS_ERROR_ILLEGAL_VALUE;
this->Reset();
nsresult rv = this->HandleData(aBuf, aLen);
if (NS_FAILED(rv)) return rv;
if (mDone) {
if (mDetectedCharset) Report(mDetectedCharset);
*oDontFeedMe = true;
}
*oDontFeedMe = false;
return NS_OK;
}
//----------------------------------------------------------
NS_IMETHODIMP nsXPCOMDetector::Done() {
NS_ASSERTION(mObserver != nullptr, "have not init yet");
#ifdef DEBUG_chardet
for (int32_t i = 0; i < NUM_OF_CHARSET_PROBERS; i++) {
// If no data was received the array might stay filled with nulls
// the way it was initialized in the constructor.
if (mCharSetProbers[i]) mCharSetProbers[i]->DumpStatus();
}
#endif
this->DataEnd();
return NS_OK;
}
//----------------------------------------------------------
void nsXPCOMDetector::Report(const char* aCharset) {
NS_ASSERTION(mObserver != nullptr, "have not init yet");
#ifdef DEBUG_chardet
printf("Universal Charset Detector report charset %s . \r\n", aCharset);
#endif
mObserver->Notify(aCharset, eBestAnswer);
}

Просмотреть файл

@ -1,40 +0,0 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef _nsUdetXPCOMWrapper_h__
#define _nsUdetXPCOMWrapper_h__
#include "nsISupports.h"
#include "nsICharsetDetector.h"
#include "nsIStringCharsetDetector.h"
#include "nsICharsetDetectionObserver.h"
#include "nsCOMPtr.h"
#include "nsIFactory.h"
#include "nsUniversalDetector.h"
//=====================================================================
class nsXPCOMDetector : public nsUniversalDetector, public nsICharsetDetector {
NS_DECL_ISUPPORTS
public:
nsXPCOMDetector();
NS_IMETHOD Init(nsICharsetDetectionObserver* aObserver) override;
NS_IMETHOD DoIt(const char* aBuf, uint32_t aLen, bool* oDontFeedMe) override;
NS_IMETHOD Done() override;
protected:
virtual ~nsXPCOMDetector();
virtual void Report(const char* aCharset) override;
private:
nsCOMPtr<nsICharsetDetectionObserver> mObserver;
};
//=====================================================================
class nsJAPSMDetector final : public nsXPCOMDetector {
public:
nsJAPSMDetector() : nsXPCOMDetector() {}
};
#endif //_nsUdetXPCOMWrapper_h__

Просмотреть файл

@ -1,11 +0,0 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef nsCharDetDll_h__
#define nsCharDetDll_h__
#include "prtypes.h"
#endif /* nsCharDetDll_h__ */

Просмотреть файл

@ -252,6 +252,14 @@ class Encoding final {
return encoding_is_ascii_compatible(this);
}
/**
* Checks whether this is a Japanese legacy encoding.
*/
inline bool IsJapaneseLegacy() const {
return this == SHIFT_JIS_ENCODING || this == EUC_JP_ENCODING ||
this == ISO_2022_JP_ENCODING;
}
/**
* Returns the _output encoding_ of this encoding. This is UTF-8 for
* UTF-16BE, UTF-16LE and replacement and the encoding itself otherwise.

124
intl/JapaneseDetector.h Normal file
Просмотреть файл

@ -0,0 +1,124 @@
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
// file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// Mostly copied and pasted from
// third_party/rust/shift_or_euc/src/lib.rs , so
// "top-level directory of this distribution" above refers to
// third_party/rust/shift_or_euc/
#ifndef mozilla_JapaneseDetector_h
#define mozilla_JapaneseDetector_h
#include "mozilla/Encoding.h"
namespace mozilla {
class JapaneseDetector;
}; // namespace mozilla
#define SHIFT_OR_EUC_DETECTOR mozilla::JapaneseDetector
#include "shift_or_euc.h"
namespace mozilla {
/**
* A Japanese legacy encoding detector for detecting between Shift_JIS,
* EUC-JP, and, optionally, ISO-2022-JP _given_ the assumption that the
* encoding is one of those.
*
* # Principle of Operation
*
* The detector is based on two observations:
*
* 1. The ISO-2022-JP escape sequences don't normally occur in Shift_JIS or
* EUC-JP, so encountering such an escape sequence (before non-ASCII has been
* encountered) can be taken as indication of ISO-2022-JP.
* 2. When normal (full-with) kana or common kanji encoded as Shift_JIS is
* decoded as EUC-JP, or vice versa, the result is either an error or
* half-width katakana, and it's very uncommon for Japanese HTML to have
* half-width katakana character before a normal kana or common kanji
* character. Therefore, if decoding as Shift_JIS results in error or
* have-width katakana, the detector decides that the content is EUC-JP, and
* vice versa.
*
* # Failure Modes
*
* The detector gives the wrong answer if the text has a half-width katakana
* character before normal kana or common kanji. Some uncommon kanji are
* undecidable. (All JIS X 0208 Level 1 kanji are decidable.)
*
* The half-width katakana issue is mainly relevant for old 8-bit JIS X
* 0201-only text files that would decode correctly as Shift_JIS but that the
* detector detects as EUC-JP.
*
* The undecidable kanji issue does not realistically show up when a full
* document is fed to the detector, because, realistically, in a full
* document, there is at least one kana or common kanji. It can occur,
* though, if the detector is only run on a prefix of a document and the
* prefix only contains the title of the document. It is possible for
* document title to consist entirely of undecidable kanji. (Indeed,
* Japanese Wikipedia has articles with such titles.) If the detector is
* undecided, a fallback to Shift_JIS should be used.
*/
class JapaneseDetector final {
public:
~JapaneseDetector() {}
static void operator delete(void* aDetector) {
shift_or_euc_detector_free(reinterpret_cast<JapaneseDetector*>(aDetector));
}
/**
* Instantiates the detector. If `aAllow2022` is `true` the possible
* guesses are Shift_JIS, EUC-JP, ISO-2022-JP, and undecided. If
* `aAllow2022` is `false`, the possible guesses are Shift_JIS, EUC-JP,
* and undecided.
*/
static inline UniquePtr<JapaneseDetector> Create(bool aAllow2022) {
UniquePtr<JapaneseDetector> detector(shift_or_euc_detector_new(aAllow2022));
return detector;
}
/**
* Feeds bytes to the detector. If `aLast` is `true` the end of the stream
* is considered to occur immediately after the end of `aBuffer`.
* Otherwise, the stream is expected to continue. `aBuffer` may be empty.
*
* If you're running the detector only on a prefix of a complete
* document, _do not_ pass `aLast` as `true` after the prefix if the
* stream as a whole still contains more content.
*
* Returns `SHIFT_JIS_ENCODING` if the detector guessed
* Shift_JIS. Returns `EUC_JP_ENCODING` if the detector
* guessed EUC-JP. Returns `ISO_2022_JP_ENCODING` if the
* detector guessed ISO-2022-JP (only possible if `true` was passed as
* `aAllow2022` when instantiating the detector). Returns `nullptr` if the
* detector is undecided. If `nullptr` is returned even when passing `true`
* as `aLast`, falling back to Shift_JIS is the best guess for Web
* purposes.
*
* Do not call again after the method has returned non-`nullptr` or after
* the method has been called with `true` as `aLast`. (Asserts if the
* previous sentence isn't adhered to.)
*/
inline const mozilla::Encoding* Feed(Span<const uint8_t> aBuffer,
bool aLast) {
return shift_or_euc_detector_feed(this, aBuffer.Elements(),
aBuffer.Length(), aLast);
}
private:
JapaneseDetector() = delete;
JapaneseDetector(const JapaneseDetector&) = delete;
JapaneseDetector& operator=(const JapaneseDetector&) = delete;
};
}; // namespace mozilla
#endif // mozilla_JapaneseDetector_h

Просмотреть файл

@ -26,11 +26,13 @@ DIRS += [
EXPORTS.mozilla += [
'Encoding.h',
'JapaneseDetector.h',
]
EXPORTS += [
'../third_party/rust/encoding_c/include/encoding_rs.h',
'../third_party/rust/encoding_c/include/encoding_rs_statics.h',
'../third_party/rust/shift_or_euc_c/include/shift_or_euc.h',
]
with Files("**"):

Просмотреть файл

@ -3373,6 +3373,15 @@ VARCACHE_PREF(
RelaxedAtomicBool, false
)
// Whether ISO-2022-JP is a permitted content-based encoding detection
// outcome.
VARCACHE_PREF(
Live,
"intl.charset.detector.iso2022jp.allowed",
intl_charset_detector_iso2022jp_allowed,
bool, true
)
//---------------------------------------------------------------------------
// Prefs starting with "layers."
//---------------------------------------------------------------------------

Просмотреть файл

@ -95,8 +95,6 @@ FINAL_LIBRARY = 'xul'
LOCAL_INCLUDES += [
'/dom/base',
'/extensions/universalchardet/src/base',
'/extensions/universalchardet/src/xpcom',
'/intl/chardet',
]

Просмотреть файл

@ -31,7 +31,6 @@
#include "nsIThreadRetargetableRequest.h"
#include "nsPrintfCString.h"
#include "nsNetUtil.h"
#include "nsUdetXPCOMWrapper.h"
#include "nsXULAppAPI.h"
#include "mozilla/SchedulerGroup.h"
#include "nsJSEnvironment.h"
@ -156,7 +155,7 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
mBomState(eBomState::BOM_SNIFFING_NOT_STARTED),
mCharsetSource(kCharsetUninitialized),
mEncoding(WINDOWS_1252_ENCODING),
mFeedChardetIfEncoding(nullptr),
mFeedChardet(true),
mReparseForbidden(false),
mLastBuffer(nullptr), // Will be filled when starting
mExecutor(aExecutor),
@ -181,6 +180,8 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
mEventTarget(nsHtml5Module::GetStreamParserThread()->SerialEventTarget()),
mExecutorFlusher(new nsHtml5ExecutorFlusher(aExecutor)),
mLoadFlusher(new nsHtml5LoadFlusher(aExecutor)),
mJapaneseDetector(mozilla::JapaneseDetector::Create(
StaticPrefs::intl_charset_detector_iso2022jp_allowed())),
mInitialEncodingWasFromParentFrame(false),
mHasHadErrors(false),
mDecodingLocalFileAsUTF8(false),
@ -210,16 +211,11 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
nsAutoCString detectorName;
Preferences::GetLocalizedCString("intl.charset.detector", detectorName);
if (!detectorName.IsEmpty()) {
// We recognize one of the three magic strings for the following languages.
// We recognize one of the two magic strings for Russian and Ukranian.
if (detectorName.EqualsLiteral("ruprob")) {
mChardet = new nsRUProbDetector();
mFeedChardetIfEncoding = WINDOWS_1251_ENCODING;
} else if (detectorName.EqualsLiteral("ukprob")) {
mChardet = new nsUKProbDetector();
mFeedChardetIfEncoding = WINDOWS_1251_ENCODING;
} else if (detectorName.EqualsLiteral("ja_parallel_state_machine")) {
mChardet = new nsJAPSMDetector();
mFeedChardetIfEncoding = SHIFT_JIS_ENCODING;
}
if (mChardet) {
(void)mChardet->Init(this);
@ -263,7 +259,7 @@ NS_IMETHODIMP
nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf) {
NS_ASSERTION(IsParserThread(), "Wrong thread!");
if (aConf == eBestAnswer || aConf == eSureAnswer) {
mFeedChardetIfEncoding = nullptr; // just in case
mFeedChardet = false; // just in case
auto encoding =
Encoding::ForLabelNoReplacement(nsDependentCString(aCharset));
if (!encoding) {
@ -271,8 +267,8 @@ nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf) {
}
if (HasDecoder()) {
if (mEncoding == encoding) {
NS_ASSERTION(mCharsetSource < kCharsetFromAutoDetection,
"Why are we running chardet at all?");
MOZ_ASSERT(mCharsetSource < kCharsetFromAutoDetection,
"Why are we running chardet at all?");
mCharsetSource = kCharsetFromAutoDetection;
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
} else {
@ -294,6 +290,62 @@ nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf) {
return NS_OK;
}
void nsHtml5StreamParser::FeedJapaneseDetector(Span<const uint8_t> aBuffer,
bool aLast) {
const Encoding* detected = mJapaneseDetector->Feed(aBuffer, aLast);
if (!detected) {
return;
}
mFeedChardet = false;
if (mDecodingLocalFileAsUTF8 && detected != ISO_2022_JP_ENCODING) {
return;
}
int32_t source = kCharsetFromAutoDetection;
if (mCharsetSource == kCharsetFromParentForced ||
mCharsetSource == kCharsetFromUserForced) {
source = kCharsetFromUserForcedAutoDetection;
}
if (detected == mEncoding) {
MOZ_ASSERT(mCharsetSource < source, "Why are we running chardet at all?");
mCharsetSource = source;
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
} else if (HasDecoder()) {
// We've already committed to a decoder. Request a reload from the
// docshell.
mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(detected), source, 0);
FlushTreeOpsAndDisarmTimer();
Interrupt();
} else {
// Got a confident answer from the sniffing buffer. That code will
// take care of setting up the decoder.
mEncoding = WrapNotNull(detected);
mCharsetSource = source;
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
}
}
void nsHtml5StreamParser::FeedDetector(Span<const uint8_t> aBuffer,
bool aLast) {
if (mEncoding->IsJapaneseLegacy()) {
FeedJapaneseDetector(aBuffer, aLast);
} else if (mEncoding == WINDOWS_1251_ENCODING && mChardet &&
!mDecodingLocalFileAsUTF8) {
if (!aBuffer.IsEmpty()) {
bool dontFeed = false;
mozilla::Unused << mChardet->DoIt((const char*)aBuffer.Elements(),
aBuffer.Length(), &dontFeed);
if (dontFeed) {
mFeedChardet = false;
}
}
if (aLast) {
mozilla::Unused << mChardet->Done();
}
} else {
mFeedChardet = false;
}
}
void nsHtml5StreamParser::SetViewSourceTitle(nsIURI* aURL) {
if (recordreplay::IsRecordingOrReplaying()) {
nsAutoCString spec;
@ -335,6 +387,11 @@ nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
MOZ_ASSERT(mEncoding != UTF_8_ENCODING);
mUnicodeDecoder = UTF_8_ENCODING->NewDecoderWithBOMRemoval();
} else {
if (mCharsetSource >= kCharsetFromAutoDetection &&
!(mCharsetSource == kCharsetFromUserForced ||
mCharsetSource == kCharsetFromParentForced)) {
mFeedChardet = false;
}
mDecodingLocalFileAsUTF8 = false;
mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
}
@ -354,7 +411,7 @@ nsresult nsHtml5StreamParser::SetupDecodingFromBom(
mDecodingLocalFileAsUTF8 = false;
mUnicodeDecoder = mEncoding->NewDecoderWithoutBOMHandling();
mCharsetSource = kCharsetFromByteOrderMark;
mFeedChardetIfEncoding = nullptr;
mFeedChardet = false;
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
mSniffingBuffer = nullptr;
mMetaScanner = nullptr;
@ -412,7 +469,7 @@ void nsHtml5StreamParser::SniffBOMlessUTF16BasicLatin(
}
mCharsetSource = kCharsetFromIrreversibleAutoDetection;
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
mFeedChardetIfEncoding = nullptr;
mFeedChardet = false;
mTreeBuilder->MaybeComplainAboutCharset("EncBomlessUtf16", true, 0);
}
@ -479,12 +536,38 @@ static void HandleProcessingInstruction(void* aUserData,
XML_StopParser(ud->mExpat, false);
}
void nsHtml5StreamParser::FinalizeSniffingWithDetector(
Span<const uint8_t> aFromSegment, uint32_t aCountToSniffingLimit,
bool aEof) {
if (mSniffingBuffer) {
FeedDetector(MakeSpan(mSniffingBuffer.get(), mSniffingLength), false);
}
if (mFeedChardet && !aFromSegment.IsEmpty()) {
// Avoid buffer boundary-dependent behavior when
// reparsing is forbidden. If reparse is forbidden,
// act as if we only saw the first 1024 bytes.
// When reparsing isn't forbidden, buffer boundaries
// can have an effect on whether the page is loaded
// once or twice. :-(
FeedDetector(mReparseForbidden ? aFromSegment.To(aCountToSniffingLimit)
: aFromSegment,
false);
}
if (mFeedChardet && aEof &&
(!mReparseForbidden || aCountToSniffingLimit == aFromSegment.Length())) {
// Don't signal EOF if reparse is forbidden and we didn't pass all input
// to the detector above.
mFeedChardet = false;
FeedDetector(Span<const uint8_t>(), true);
}
}
nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
uint32_t aCountToSniffingLimit,
bool aEof) {
NS_ASSERTION(IsParserThread(), "Wrong thread!");
NS_ASSERTION(mCharsetSource < kCharsetFromParentForced,
"Should not finalize sniffing when using forced charset.");
MOZ_ASSERT(IsParserThread(), "Wrong thread!");
MOZ_ASSERT(mCharsetSource < kCharsetFromUserForcedAutoDetection,
"Should not finalize sniffing with strong decision already made.");
if (mMode == VIEW_SOURCE_XML) {
static const XML_Memory_Handling_Suite memsuite = {
(void* (*)(size_t))moz_xmalloc, (void* (*)(void*, size_t))moz_xrealloc,
@ -547,50 +630,15 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
}
// meta scan failed.
if (mCharsetSource >= kCharsetFromHintPrevDoc) {
mFeedChardetIfEncoding = nullptr;
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
if (mCharsetSource < kCharsetFromMetaPrescan) {
// Check for BOMless UTF-16 with Basic
// Latin content for compat with IE. See bug 631751.
SniffBOMlessUTF16BasicLatin(aFromSegment.To(aCountToSniffingLimit));
}
// Check for BOMless UTF-16 with Basic
// Latin content for compat with IE. See bug 631751.
SniffBOMlessUTF16BasicLatin(aFromSegment.To(aCountToSniffingLimit));
// the charset may have been set now
// maybe try chardet now;
if ((mFeedChardetIfEncoding == mEncoding) && !mDecodingLocalFileAsUTF8) {
bool dontFeed;
nsresult rv;
if (mSniffingBuffer) {
rv = mChardet->DoIt((const char*)mSniffingBuffer.get(), mSniffingLength,
&dontFeed);
if (dontFeed) {
mFeedChardetIfEncoding = nullptr;
}
NS_ENSURE_SUCCESS(rv, rv);
}
if ((mFeedChardetIfEncoding == mEncoding) && !aFromSegment.IsEmpty()) {
rv = mChardet->DoIt(
(const char*)aFromSegment.Elements(),
// Avoid buffer boundary-dependent behavior when
// reparsing is forbidden. If reparse is forbidden,
// act as if we only saw the first 1024 bytes.
// When reparsing isn't forbidden, buffer boundaries
// can have an effect on whether the page is loaded
// once or twice. :-(
mReparseForbidden ? aCountToSniffingLimit : aFromSegment.Length(),
&dontFeed);
if (dontFeed) {
mFeedChardetIfEncoding = nullptr;
}
NS_ENSURE_SUCCESS(rv, rv);
}
if ((mFeedChardetIfEncoding == mEncoding) && (aEof || mReparseForbidden)) {
// mReparseForbidden is checked so that we get to use the sniffing
// buffer with the best guess so far if we aren't allowed to guess
// better later.
mFeedChardetIfEncoding = nullptr;
rv = mChardet->Done();
NS_ENSURE_SUCCESS(rv, rv);
}
if (mFeedChardet) {
FinalizeSniffingWithDetector(aFromSegment, aCountToSniffingLimit, aEof);
// fall thru; callback may have changed charset
}
if (mCharsetSource == kCharsetUninitialized) {
@ -600,7 +648,7 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
} else if (mMode == LOAD_AS_DATA && mCharsetSource == kCharsetFromFallback) {
NS_ASSERTION(mReparseForbidden, "Reparse should be forbidden for XHR");
NS_ASSERTION(!mFeedChardetIfEncoding, "Should not feed chardet for XHR");
NS_ASSERTION(!mFeedChardet, "Should not feed chardet for XHR");
NS_ASSERTION(mEncoding == UTF_8_ENCODING, "XHR should default to UTF-8");
// Now mark charset source as non-weak to signal that we have a decision
mCharsetSource = kCharsetFromDocTypeDefault;
@ -687,7 +735,6 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
// earlier call to SetDocumentCharset(), since we didn't find a BOM and
// overwrite mEncoding. (Note that if the user has overridden the charset,
// we don't come here but check <meta> for XSS-dangerous charsets first.)
mFeedChardetIfEncoding = nullptr;
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
}
@ -719,12 +766,16 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
(encoding->IsAsciiCompatible() ||
encoding == ISO_2022_JP_ENCODING)) {
// Honor override
if (mEncoding->IsJapaneseLegacy()) {
mFeedChardet = true;
FinalizeSniffingWithDetector(aFromSegment, countToSniffingLimit,
false);
}
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
aFromSegment);
}
mEncoding = WrapNotNull(encoding);
mCharsetSource = kCharsetFromMetaPrescan;
mFeedChardetIfEncoding = nullptr;
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
aFromSegment);
@ -733,6 +784,10 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
if (mCharsetSource == kCharsetFromParentForced ||
mCharsetSource == kCharsetFromUserForced) {
// meta not found, honor override
if (mEncoding->IsJapaneseLegacy()) {
mFeedChardet = true;
FinalizeSniffingWithDetector(aFromSegment, countToSniffingLimit, false);
}
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
}
return FinalizeSniffing(aFromSegment, countToSniffingLimit, false);
@ -761,7 +816,6 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
}
mEncoding = WrapNotNull(encoding);
mCharsetSource = kCharsetFromMetaPrescan;
mFeedChardetIfEncoding = nullptr;
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
}
@ -841,6 +895,12 @@ void nsHtml5StreamParser::ReDecodeLocalFile() {
mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
mHasHadErrors = false;
// We need the detector to start with fresh state.
// Turn off ISO-2022-JP detection, because if this doc was
// ISO-2022-JP, it would have already been detected.
mJapaneseDetector = mozilla::JapaneseDetector::Create(false);
mFeedChardet = true;
// Throw away previous decoded data
mLastBuffer = mFirstBuffer;
mLastBuffer->next = nullptr;
@ -856,7 +916,7 @@ void nsHtml5StreamParser::ReDecodeLocalFile() {
void nsHtml5StreamParser::CommitLocalFileToUTF8() {
MOZ_ASSERT(mDecodingLocalFileAsUTF8);
mDecodingLocalFileAsUTF8 = false;
mFeedChardetIfEncoding = nullptr;
mFeedChardet = false;
mEncoding = UTF_8_ENCODING;
mCharsetSource = kCharsetFromFileURLGuess;
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
@ -1000,7 +1060,7 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
// This is the old Gecko behavior but the HTML5 spec disagrees.
// Don't reparse on POST.
mReparseForbidden = true;
mFeedChardetIfEncoding = nullptr; // can't restart anyway
mFeedChardet = false; // can't restart anyway
}
}
@ -1031,8 +1091,10 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
mInitialEncodingWasFromParentFrame = true;
}
if (mCharsetSource >= kCharsetFromAutoDetection) {
mFeedChardetIfEncoding = nullptr;
if (mCharsetSource >= kCharsetFromAutoDetection &&
!(mCharsetSource == kCharsetFromParentForced ||
mCharsetSource == kCharsetFromUserForced)) {
mFeedChardet = false;
}
if (mCharsetSource < kCharsetFromUtf8OnlyMime) {
@ -1041,11 +1103,11 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
return NS_OK;
}
// We are reloading a document.open()ed doc or loading JSON/WebVTT/etc. into
// a browsing context. In the latter case, there's no need to remove the
// BOM manually here, because the UTF-8 decoder removes it.
// We are loading JSON/WebVTT/etc. into a browsing context.
// There's no need to remove the BOM manually here, because
// the UTF-8 decoder removes it.
mReparseForbidden = true;
mFeedChardetIfEncoding = nullptr;
mFeedChardet = false;
// Instantiate the converter here to avoid BOM sniffing.
mDecodingLocalFileAsUTF8 = false;
@ -1085,8 +1147,9 @@ void nsHtml5StreamParser::DoStopRequest() {
return;
}
}
if ((mFeedChardetIfEncoding == mEncoding) && !mDecodingLocalFileAsUTF8) {
mChardet->Done();
if (mFeedChardet) {
mFeedChardet = false;
FeedDetector(Span<uint8_t>(), true);
}
MOZ_ASSERT(mUnicodeDecoder,
@ -1246,13 +1309,8 @@ void nsHtml5StreamParser::DoDataAvailable(Span<const uint8_t> aBuffer) {
nsresult rv;
if (HasDecoder()) {
if ((mFeedChardetIfEncoding == mEncoding) && !mDecodingLocalFileAsUTF8) {
bool dontFeed;
mChardet->DoIt((const char*)aBuffer.Elements(), aBuffer.Length(),
&dontFeed);
if (dontFeed) {
mFeedChardetIfEncoding = nullptr;
}
if (mFeedChardet) {
FeedDetector(aBuffer, false);
}
rv = WriteStreamBytes(aBuffer);
} else {
@ -1411,7 +1469,7 @@ const Encoding* nsHtml5StreamParser::PreferredForInternalEncodingDecl(
}
}
mCharsetSource = kCharsetFromMetaTag; // become confident
mFeedChardetIfEncoding = nullptr; // don't feed chardet when confident
mFeedChardet = false; // don't feed chardet when confident
return nullptr;
}
@ -1450,7 +1508,7 @@ bool nsHtml5StreamParser::internalEncodingDeclaration(nsHtml5String aEncoding) {
// Avoid having the chardet ask for another restart after this restart
// request.
mFeedChardetIfEncoding = nullptr;
mFeedChardet = false;
mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(encoding), kCharsetFromMetaTag,
mTokenizer->getLineNumber());
FlushTreeOpsAndDisarmTimer();

Просмотреть файл

@ -11,6 +11,7 @@
#include "nsICharsetDetectionObserver.h"
#include "nsHtml5MetaScanner.h"
#include "mozilla/Encoding.h"
#include "mozilla/JapaneseDetector.h"
#include "nsHtml5TreeOpExecutor.h"
#include "nsHtml5OwningUTF16Buffer.h"
#include "nsIInputStream.h"
@ -148,6 +149,16 @@ class nsHtml5StreamParser final : public nsICharsetDetectionObserver {
// Not from an external interface
/**
* Pass a buffer to the JapaneseDetector.
*/
void FeedJapaneseDetector(mozilla::Span<const uint8_t> aBuffer, bool aLast);
/**
* Pass a buffer to the Japanese or Cyrillic detector as appropriate.
*/
void FeedDetector(mozilla::Span<const uint8_t> aBuffer, bool aLast);
/**
* Call this method once you've created a parser, and want to instruct it
* about what charset to load
@ -282,6 +293,12 @@ class nsHtml5StreamParser final : public nsICharsetDetectionObserver {
*/
void SniffBOMlessUTF16BasicLatin(mozilla::Span<const uint8_t> aFromSegment);
/**
* Write the start of the stream to detector.
*/
void FinalizeSniffingWithDetector(mozilla::Span<const uint8_t> aFromSegment,
uint32_t aCountToSniffingLimit, bool aEof);
/**
* <meta charset> scan failed. Try chardet if applicable. After this, the
* the parser will have some encoding even if a last resolt fallback.
@ -411,9 +428,9 @@ class nsHtml5StreamParser final : public nsICharsetDetectionObserver {
NotNull<const Encoding*> mEncoding;
/**
* The character encoding that is the base expectation for detection.
* Whether the Cyrillic or Japanese detector should still be fed.
*/
const Encoding* mFeedChardetIfEncoding;
bool mFeedChardet;
/**
* Whether reparse is forbidden
@ -529,10 +546,15 @@ class nsHtml5StreamParser final : public nsICharsetDetectionObserver {
nsCOMPtr<nsIRunnable> mLoadFlusher;
/**
* The chardet instance if chardet is enabled.
* The Cyrillic detector if enabled.
*/
nsCOMPtr<nsICharsetDetector> mChardet;
/**
* The Japanese detector.
*/
mozilla::UniquePtr<mozilla::JapaneseDetector> mJapaneseDetector;
/**
* Whether the initial charset source was kCharsetFromParentFrame
*/

Просмотреть файл

@ -14,14 +14,14 @@
#define kCharsetFromCache 5
#define kCharsetFromParentFrame 6
#define kCharsetFromAutoDetection 7
#define kCharsetFromHintPrevDoc 8
#define kCharsetFromMetaPrescan 9 // this one and smaller: HTML5 Tentative
#define kCharsetFromMetaTag 10 // this one and greater: HTML5 Confident
#define kCharsetFromIrreversibleAutoDetection 11
#define kCharsetFromChannel 12
#define kCharsetFromOtherComponent 13
#define kCharsetFromParentForced 14 // propagates to child frames
#define kCharsetFromUserForced 15 // propagates to child frames
#define kCharsetFromMetaPrescan 8 // this one and smaller: HTML5 Tentative
#define kCharsetFromMetaTag 9 // this one and greater: HTML5 Confident
#define kCharsetFromIrreversibleAutoDetection 10
#define kCharsetFromChannel 11
#define kCharsetFromOtherComponent 12
#define kCharsetFromParentForced 13 // propagates to child frames
#define kCharsetFromUserForced 14 // propagates to child frames
#define kCharsetFromUserForcedAutoDetection 15
#define kCharsetFromByteOrderMark 16
#define kCharsetFromUtf8OnlyMime 17 // For JSON, WebVTT and such
#define kCharsetFromBuiltIn 18 // resource: URLs

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

4
third_party/rust/encoding_rs/Cargo.toml поставляемый
Просмотреть файл

@ -12,7 +12,7 @@
[package]
name = "encoding_rs"
version = "0.8.16"
version = "0.8.17"
authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
description = "A Gecko-oriented implementation of the Encoding Standard"
homepage = "https://docs.rs/encoding_rs/"
@ -35,7 +35,7 @@ optional = true
version = "1.0"
optional = true
[dev-dependencies.bincode]
version = "0.8"
version = "1.0"
[dev-dependencies.serde_derive]
version = "1.0"

4
third_party/rust/encoding_rs/README.md поставляемый
Просмотреть файл

@ -404,6 +404,10 @@ To regenerate the generated code:
## Release Notes
### 0.8.17
* Update `bincode` (dev dependency) version requirement to 1.0.
### 0.8.16
* Switch from the `simd` crate to `packed_simd`.

4
third_party/rust/encoding_rs/src/lib.rs поставляемый
Просмотреть файл

@ -11,7 +11,7 @@
feature = "cargo-clippy",
allow(doc_markdown, inline_always, new_ret_no_self)
)]
#![doc(html_root_url = "https://docs.rs/encoding_rs/0.8.16")]
#![doc(html_root_url = "https://docs.rs/encoding_rs/0.8.17")]
//! encoding_rs is a Gecko-oriented Free Software / Open Source implementation
//! of the [Encoding Standard](https://encoding.spec.whatwg.org/) in Rust.
@ -5635,7 +5635,7 @@ mod tests {
let deserialized: Demo = serde_json::from_str(&serialized).unwrap();
assert_eq!(deserialized, demo);
let bincoded = bincode::serialize(&demo, bincode::Infinite).unwrap();
let bincoded = bincode::serialize(&demo).unwrap();
let debincoded: Demo = bincode::deserialize(&bincoded[..]).unwrap();
assert_eq!(debincoded, demo);
}

1
third_party/rust/shift_or_euc/.cargo-checksum.json поставляемый Normal file
Просмотреть файл

@ -0,0 +1 @@
{"files":{"CONTRIBUTING.md":"0e64fb3dd5a00e3fd528de6442de3f2ca851bd718c45cca0871aaf4eedac9ee1","COPYRIGHT":"3a7313aa2f19bf7095a2fd731c3d5e76f38d5e4640bd2a115d53032f24b2aa6c","Cargo.toml":"f9f41b76ecbe257a312ab09ed1208189b8dc9952d12d17a216fe2846d1d471c8","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"dac4dde23582d18b01701032860d8f8a1979fb2cf626060ca8de77e081a2a3d5","README.md":"b7148745a7ef59788e76fbe638d4b41c54dcaa1313a809f4630a020645f892a8","examples/detect.rs":"eb7239ccc802290ef24331db600ca1226198801dd86df86876b4b738ef4b8470","src/lib.rs":"f2a83db125d553af5c6fabae0487ef211aad62f2d93c4418dc510cbd425d472a"},"package":"f930dea4685b9803954b9d74cdc175c6d946a22f2eafe5aa2e9a58cdcae7da8c"}

38
third_party/rust/shift_or_euc/CONTRIBUTING.md поставляемый Normal file
Просмотреть файл

@ -0,0 +1,38 @@
If you send a pull request / patch, please observe the following.
## Licensing
Since this crate is dual-licensed,
[section 5 of the Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0#contributions)
is considered to apply in the sense of Contributions being automatically
under the Apache License 2.0 or MIT dual license (see the `COPYRIGHT` file).
That is, by the act of offering a Contribution, you place your Contribution
under the Apache License 2.0 or MIT dual license stated in the `COPYRIGHT`
file. Please do not contribute if you aren't willing or allowed to license your
contributions in this manner.
You are encouraged to dedicate test code that you contribute to the Public
Domain using the CC0 dedication. If you contribute test code that is not
dedicated to the Public Domain, please be sure not to put it in a part of
source code that the comments designate as being dedicated to the Public
Domain.
## Copyright Notices
If you require the addition of your copyright notice, it's up to you to edit in
your notice as part of your Contribution. Not adding a copyright notice is
taken as a waiver of copyright notice.
## Compatibility with Stable Rust
Please ensure that your Contribution compiles with the latest stable-channel
rustc.
## rustfmt
The `rustfmt` version used for this code is `rustfmt-nightly`. Please either
use that version or avoid using `rustfmt` (so as not to reformat all the code).
## Unit tests
Please ensure that `cargo test` succeeds.

9
third_party/rust/shift_or_euc/COPYRIGHT поставляемый Normal file
Просмотреть файл

@ -0,0 +1,9 @@
shift_or_euc is copyright 2018 Mozilla Foundation.
Licensed under the Apache License, Version 2.0
<LICENSE-APACHE or
https://www.apache.org/licenses/LICENSE-2.0> or the MIT
license <LICENSE-MIT or https://opensource.org/licenses/MIT>,
at your option. All files in the project carrying such
notice may not be copied, modified, or distributed except
according to those terms.

30
third_party/rust/shift_or_euc/Cargo.toml поставляемый Normal file
Просмотреть файл

@ -0,0 +1,30 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
edition = "2018"
name = "shift_or_euc"
version = "0.1.0"
authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
description = "Detects among the Japanese legacy encodings"
homepage = "https://docs.rs/shift_or_euc/"
documentation = "https://docs.rs/shift_or_euc/"
readme = "README.md"
keywords = ["encoding", "web", "charset"]
categories = ["text-processing", "encoding", "web-programming", "internationalization"]
license = "MIT/Apache-2.0"
repository = "https://github.com/hsivonen/shift_or_euc"
[dependencies.encoding_rs]
version = "0.8.17"
[dependencies.memchr]
version = "2.2.0"

202
third_party/rust/shift_or_euc/LICENSE-APACHE поставляемый Normal file
Просмотреть файл

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

25
third_party/rust/shift_or_euc/LICENSE-MIT поставляемый Normal file
Просмотреть файл

@ -0,0 +1,25 @@
Copyright (c) 2018 Mozilla Foundation
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

73
third_party/rust/shift_or_euc/README.md поставляемый Normal file
Просмотреть файл

@ -0,0 +1,73 @@
# shift_or_euc
[![Apache 2 / MIT dual-licensed](https://img.shields.io/badge/license-Apache%202%20%2F%20MIT-blue.svg)](https://github.com/hsivonen/shift_or_euc/blob/master/COPYRIGHT)
A Japanese legacy encoding detector for detecting between Shift_JIS, EUC-JP,
and, optionally, ISO-2022-JP _given_ the assumption that the encoding is one
of those.
This detector is generally more accurate (but see below about the failure
mode on half-width katakana) and decides much sooner than machine
learning-based detectors. To decide EUC-JP, machine learning-based detectors
try to gain confidence that the input looks like EUC-JP. To decide EUC-JP,
this detector instead looks for two simple rule-based signs of the input not
being Shift_JIS.
As a consequence of not containing machine learning tables, the binary size
footprint that this crate adds on top of
[`encoding_rs`](https://docs.rs/crate/encoding_rs) is tiny.
## Documentation
[API documentation on docs.rs](https://docs.rs/crate/shift_or_euc)
## Licensing
See the file named [COPYRIGHT](https://github.com/hsivonen/shift_or_euc/blob/master/COPYRIGHT).
## Sample Program Usage
1. [Install Rust](https://rustup.rs/)
2. `git clone https://github.com/hsivonen/shift_or_euc`
3. `cd shift_or_euc`
4. `cargo run --example detect PATH_TO_FILE`
The program prints one of:
* Shift_JIS
* EUC-JP
* ISO-2022-JP
* Undecided
## Principle of Operation
The detector is based on two observations:
1. The ISO-2022-JP escape sequences don't normally occur in Shift_JIS or
EUC-JP, so encountering such an escape sequence (before non-ASCII has been
encountered) can be taken as indication of ISO-2022-JP.
2. When normal (full-with) kana or common kanji encoded as Shift_JIS is
decoded as EUC-JP, or vice versa, the result is either an error or half-width
katakana, and it's very uncommon for Japanese HTML to have half-width katakana
character before a normal kana or common kanji character. Therefore, if
decoding as Shift_JIS results in error or have-width katakana, the detector
decides that the content is EUC-JP, and vice versa.
## Failure Modes
The detector gives the wrong answer if the text has a half-width katakana
character before normal kana or common kanji. Some uncommon kanji are
undecidable. (All JIS X 0208 Level 1 kanji are decidable.)
The half-width katakana issue is mainly relevant for old 8-bit JIS X 0201-only
text files that would decode correctly as Shift_JIS but that the detector
detects as EUC-JP.
The undecidable kanji issue does not realistically show up when a full
document is fed to the detector, because, realistically, in a full document,
there is at least one kana or common kanji. It can occur, though, if the
detector is only run on a prefix of a document and the prefix only contains
the title of the document. It is possible for document title to consist
entirely of undecidable kanji. (Indeed, Japanese Wikipedia has articles with
such titles.) If the detector is undecided, falling back to Shift_JIS is
typically the Web oriented better guess.

56
third_party/rust/shift_or_euc/examples/detect.rs поставляемый Normal file
Просмотреть файл

@ -0,0 +1,56 @@
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
// file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::fs::File;
use std::io::Read;
use shift_or_euc::Detector;
fn main() {
let mut args = std::env::args_os();
if args.next().is_none() {
eprintln!("Error: Program name missing from arguments.");
std::process::exit(-1);
}
if let Some(path) = args.next() {
if args.next().is_some() {
eprintln!("Error: Too many arguments.");
std::process::exit(-3);
}
if let Ok(mut file) = File::open(path) {
let mut buffer = [0u8; 4096];
let mut detector = Detector::new(true);
loop {
if let Ok(num_read) = file.read(&mut buffer[..]) {
let opt_enc = if num_read == 0 {
detector.feed(b"", true)
} else {
detector.feed(&buffer[..num_read], false)
};
if let Some(encoding) = opt_enc {
println!("{}", encoding.name());
return;
} else if num_read == 0 {
println!("Undecided");
return;
}
} else {
eprintln!("Error: Error reading file.");
std::process::exit(-5);
}
}
} else {
eprintln!("Error: Could not open file.");
std::process::exit(-4);
}
} else {
eprintln!("Error: One path argument needed.");
std::process::exit(-2);
}
}

278
third_party/rust/shift_or_euc/src/lib.rs поставляемый Normal file
Просмотреть файл

@ -0,0 +1,278 @@
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
// file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![doc(html_root_url = "https://docs.rs/shift_or_euc/0.1.0")]
//! A Japanese legacy encoding detector for detecting between Shift_JIS,
//! EUC-JP, and, optionally, ISO-2022-JP _given_ the assumption that the
//! encoding is one of those.
//!
//! This detector is generally more accurate (but see below about the failure
//! mode on half-width katakana) and decides much sooner than machine
//! learning-based detectors. To decide EUC-JP, machine learning-based
//! detectors try to gain confidence that the input looks like EUC-JP. To
//! decide EUC-JP, this detector instead looks for two simple rule-based
//! signs of the input not being Shift_JIS.
//!
//! As a consequence of not containing machine learning tables, the binary
//! size footprint that this crate adds on top of
//! [`encoding_rs`](https://docs.rs/crate/encoding_rs) is tiny.
//!
//! # Licensing
//!
//! See the file named [COPYRIGHT](https://github.com/hsivonen/shift_or_euc/blob/master/COPYRIGHT).
//!
//! # Principle of Operation
//!
//! The detector is based on two observations:
//!
//! 1. The ISO-2022-JP escape sequences don't normally occur in Shift_JIS or
//! EUC-JP, so encountering such an escape sequence (before non-ASCII has been
//! encountered) can be taken as indication of ISO-2022-JP.
//! 2. When normal (full-with) kana or common kanji encoded as Shift_JIS is
//! decoded as EUC-JP, or vice versa, the result is either an error or
//! half-width katakana, and it's very uncommon for Japanese HTML to have
//! half-width katakana character before a normal kana or common kanji
//! character. Therefore, if decoding as Shift_JIS results in error or
//! have-width katakana, the detector decides that the content is EUC-JP, and
//! vice versa.
//!
//! # Failure Modes
//!
//! The detector gives the wrong answer if the text has a half-width katakana
//! character before normal kana or common kanji. Some uncommon kanji are
//! undecidable. (All JIS X 0208 Level 1 kanji are decidable.)
//!
//! The half-width katakana issue is mainly relevant for old 8-bit JIS X
//! 0201-only text files that would decode correctly as Shift_JIS but that the
//! detector detects as EUC-JP.
//!
//! The undecidable kanji issue does not realistically show up when a full
//! document is fed to the detector, because, realistically, in a full
//! document, there is at least one kana or common kanji. It can occur,
//! though, if the detector is only run on a prefix of a document and the
//! prefix only contains the title of the document. It is possible for
//! document title to consist entirely of undecidable kanji. (Indeed,
//! Japanese Wikipedia has articles with such titles.) If the detector is
//! undecided, falling back to Shift_JIS is typically the Web oriented better
//! guess.
use encoding_rs::Decoder;
use encoding_rs::DecoderResult;
use encoding_rs::Encoding;
use encoding_rs::EUC_JP;
use encoding_rs::ISO_2022_JP;
use encoding_rs::SHIFT_JIS;
/// Returns the index of the first non-ASCII byte or the first
/// 0x1B, whichever comes first, or the length of the buffer
/// if neither is found.
fn find_non_ascii_or_escape(buffer: &[u8]) -> usize {
let ascii_up_to = Encoding::ascii_valid_up_to(buffer);
if let Some(escape) = memchr::memchr(0x1B, &buffer[..ascii_up_to]) {
escape
} else {
ascii_up_to
}
}
/// Feed decoder with one byte (if `last` is `false`) or EOF (if `last` is
/// `true`). `byte` is ignored if `last` is `true`.
/// Returns `true` if there was no rejection or `false` upon rejecting the
/// encoding hypothesis represented by this decoder.
#[inline(always)]
fn feed_decoder(decoder: &mut Decoder, byte: u8, last: bool) -> bool {
let mut output = [0u16; 1];
let input = [byte];
let (result, _read, written) = decoder.decode_to_utf16_without_replacement(
if last { b"" } else { &input },
&mut output,
last,
);
match result {
DecoderResult::InputEmpty => {
if written == 1 {
match output[0] {
0xFF61...0xFF9F => {
return false;
}
_ => {}
}
}
}
DecoderResult::Malformed(_, _) => {
return false;
}
DecoderResult::OutputFull => {
unreachable!();
}
}
true
}
/// A detector for detecting the character encoding of input on the
/// precondition that the encoding is a Japanese legacy encoding.
pub struct Detector {
shift_jis_decoder: Decoder,
euc_jp_decoder: Decoder,
second_byte_in_escape: u8,
iso_2022_jp_disqualified: bool,
escape_seen: bool,
finished: bool,
}
impl Detector {
/// Instantiates the detector. If `allow_2022` is `true` the possible
/// guesses are Shift_JIS, EUC-JP, ISO-2022-JP, and undecided. If
/// `allow_2022` is `false`, the possible guesses are Shift_JIS, EUC-JP,
/// and undecided.
pub fn new(allow_2022: bool) -> Self {
Detector {
shift_jis_decoder: SHIFT_JIS.new_decoder_without_bom_handling(),
euc_jp_decoder: EUC_JP.new_decoder_without_bom_handling(),
second_byte_in_escape: 0,
iso_2022_jp_disqualified: !allow_2022,
escape_seen: false,
finished: false,
}
}
/// Feeds bytes to the detector. If `last` is `true` the end of the stream
/// is considered to occur immediately after the end of `buffer`.
/// Otherwise, the stream is expected to continue. `buffer` may be empty.
///
/// If you're running the detector only on a prefix of a complete
/// document, _do not_ pass `last` as `true` after the prefix if the
/// stream as a whole still contains more content.
///
/// Returns `Some(encoding_rs::SHIFT_JIS)` if the detector guessed
/// Shift_JIS. Returns `Some(encoding_rs::EUC_JP)` if the detector
/// guessed EUC-JP. Returns `Some(encoding_rs::ISO_2022_JP)` if the
/// detector guessed ISO-2022-JP (only possible if `true` was passed as
/// `allow_2022` when instantiating the detector). Returns `None` if the
/// detector is undecided. If `None` is returned even when passing `true`
/// as `last`, falling back to Shift_JIS is the best guess for Web
/// purposes.
///
/// Do not call again after the method has returned `Some(_)` or after
/// the method has been called with `true` as `last`.
///
/// # Panics
///
/// If called after the method has returned `Some(_)` or after the method
/// has been called with `true` as `last`.
pub fn feed(&mut self, buffer: &[u8], last: bool) -> Option<&'static Encoding> {
assert!(
!self.finished,
"Tried to used a detector that has finished."
);
self.finished = true; // Will change back to false unless we return early
let mut i = 0;
if !self.iso_2022_jp_disqualified {
if !self.escape_seen {
i = find_non_ascii_or_escape(buffer);
}
while i < buffer.len() {
let byte = buffer[i];
if byte > 0x7F {
self.iso_2022_jp_disqualified = true;
break;
}
if !self.escape_seen && byte == 0x1B {
self.escape_seen = true;
i += 1;
continue;
}
if self.escape_seen && self.second_byte_in_escape == 0 {
self.second_byte_in_escape = byte;
i += 1;
continue;
}
match (self.second_byte_in_escape, byte) {
(0x28, 0x42) | (0x28, 0x4A) | (0x28, 0x49) | (0x24, 0x40) | (0x24, 0x42) => {
return Some(ISO_2022_JP);
}
_ => {}
}
if self.escape_seen {
self.iso_2022_jp_disqualified = true;
break;
}
i += 1;
}
}
for &byte in &buffer[i..] {
if !feed_decoder(&mut self.euc_jp_decoder, byte, false) {
return Some(SHIFT_JIS);
}
if !feed_decoder(&mut self.shift_jis_decoder, byte, false) {
return Some(EUC_JP);
}
}
if last {
if !feed_decoder(&mut self.euc_jp_decoder, 0, true) {
return Some(SHIFT_JIS);
}
if !feed_decoder(&mut self.shift_jis_decoder, 0, true) {
return Some(EUC_JP);
}
return None;
}
self.finished = false;
None
}
}
// Any copyright to the test code below this comment is dedicated to the
// Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_iso_2022_jp() {
let mut detector = Detector::new(true);
assert_eq!(
detector.feed(b"abc\x1B\x28\x42\xFF", true),
Some(ISO_2022_JP)
);
}
#[test]
fn test_error_precedence() {
let mut detector = Detector::new(true);
assert_eq!(detector.feed(b"abc\xFF", true), Some(SHIFT_JIS));
}
#[test]
fn test_invalid_euc_jp() {
let mut detector = Detector::new(true);
assert_eq!(detector.feed(b"abc\x81\x40", true), Some(SHIFT_JIS));
}
#[test]
fn test_invalid_shift_jis() {
let mut detector = Detector::new(true);
assert_eq!(detector.feed(b"abc\xEB\xA8", true), Some(EUC_JP));
}
#[test]
fn test_invalid_shift_jis_before_invalid_euc_jp() {
let mut detector = Detector::new(true);
assert_eq!(detector.feed(b"abc\xEB\xA8\x81\x40", true), Some(EUC_JP));
}
#[test]
fn test_undecided() {
let mut detector = Detector::new(true);
assert_eq!(detector.feed(b"abc", false), None);
assert_eq!(detector.feed(b"abc", false), None);
}
}

1
third_party/rust/shift_or_euc_c/.cargo-checksum.json поставляемый Normal file
Просмотреть файл

@ -0,0 +1 @@
{"files":{"CONTRIBUTING.md":"0e64fb3dd5a00e3fd528de6442de3f2ca851bd718c45cca0871aaf4eedac9ee1","COPYRIGHT":"3a7313aa2f19bf7095a2fd731c3d5e76f38d5e4640bd2a115d53032f24b2aa6c","Cargo.toml":"342e5345f4fb433b89f397b07e4e7162376b30cbbc1d6f6ccb11523116e6ed6b","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"dac4dde23582d18b01701032860d8f8a1979fb2cf626060ca8de77e081a2a3d5","README.md":"a323f1f4537bc7b3f9b3b216c8ac5041b83aa0321f5349a52627aade947c6272","include/shift_or_euc.h":"47c3b9832cb7eb8995aa37dcc2e76be7d4f5c7b3fa6b43135e579831ab449cd8","src/lib.rs":"cab1898dd6724e0a0324a1e44f6348c107f13916da8873dba69c70dbc95ba9cd"},"package":"c81ec08c8a68c45c48d8ef58b80ce038cc9945891c4a4996761e2ec5cba05abc"}

38
third_party/rust/shift_or_euc_c/CONTRIBUTING.md поставляемый Normal file
Просмотреть файл

@ -0,0 +1,38 @@
If you send a pull request / patch, please observe the following.
## Licensing
Since this crate is dual-licensed,
[section 5 of the Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0#contributions)
is considered to apply in the sense of Contributions being automatically
under the Apache License 2.0 or MIT dual license (see the `COPYRIGHT` file).
That is, by the act of offering a Contribution, you place your Contribution
under the Apache License 2.0 or MIT dual license stated in the `COPYRIGHT`
file. Please do not contribute if you aren't willing or allowed to license your
contributions in this manner.
You are encouraged to dedicate test code that you contribute to the Public
Domain using the CC0 dedication. If you contribute test code that is not
dedicated to the Public Domain, please be sure not to put it in a part of
source code that the comments designate as being dedicated to the Public
Domain.
## Copyright Notices
If you require the addition of your copyright notice, it's up to you to edit in
your notice as part of your Contribution. Not adding a copyright notice is
taken as a waiver of copyright notice.
## Compatibility with Stable Rust
Please ensure that your Contribution compiles with the latest stable-channel
rustc.
## rustfmt
The `rustfmt` version used for this code is `rustfmt-nightly`. Please either
use that version or avoid using `rustfmt` (so as not to reformat all the code).
## Unit tests
Please ensure that `cargo test` succeeds.

9
third_party/rust/shift_or_euc_c/COPYRIGHT поставляемый Normal file
Просмотреть файл

@ -0,0 +1,9 @@
shift_or_euc is copyright 2018 Mozilla Foundation.
Licensed under the Apache License, Version 2.0
<LICENSE-APACHE or
https://www.apache.org/licenses/LICENSE-2.0> or the MIT
license <LICENSE-MIT or https://opensource.org/licenses/MIT>,
at your option. All files in the project carrying such
notice may not be copied, modified, or distributed except
according to those terms.

30
third_party/rust/shift_or_euc_c/Cargo.toml поставляемый Normal file
Просмотреть файл

@ -0,0 +1,30 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
edition = "2018"
name = "shift_or_euc_c"
version = "0.1.0"
authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
description = "C API for shift_or_euc"
homepage = "https://docs.rs/shift_or_euc_c/"
documentation = "https://docs.rs/shift_or_euc_c/"
readme = "README.md"
keywords = ["encoding", "web", "charset"]
categories = ["text-processing", "encoding", "web-programming", "internationalization"]
license = "MIT/Apache-2.0"
repository = "https://github.com/hsivonen/shift_or_euc_c"
[dependencies.encoding_rs]
version = "0.8.17"
[dependencies.shift_or_euc]
version = "0.1.0"

202
third_party/rust/shift_or_euc_c/LICENSE-APACHE поставляемый Normal file
Просмотреть файл

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

25
third_party/rust/shift_or_euc_c/LICENSE-MIT поставляемый Normal file
Просмотреть файл

@ -0,0 +1,25 @@
Copyright (c) 2018 Mozilla Foundation
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

13
third_party/rust/shift_or_euc_c/README.md поставляемый Normal file
Просмотреть файл

@ -0,0 +1,13 @@
# shift_or_euc_c
[![Apache 2 / MIT dual-licensed](https://img.shields.io/badge/license-Apache%202%20%2F%20MIT-blue.svg)](https://github.com/hsivonen/shift_or_euc_c/blob/master/COPYRIGHT)
C API for [`shift_or_euc`](https://docs.rs/crate/shift_or_euc).
## Documentation
[API documentation on docs.rs](https://docs.rs/crate/shift_or_euc_c)
## Licensing
See the file named [COPYRIGHT](https://github.com/hsivonen/shift_or_euc_c/blob/master/COPYRIGHT).

88
third_party/rust/shift_or_euc_c/include/shift_or_euc.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,88 @@
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
// file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#ifndef shift_or_euc_h
#define shift_or_euc_h
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <stdbool.h>
#include "encoding_rs.h"
#ifndef SHIFT_OR_EUC_DETECTOR
#define SHIFT_OR_EUC_DETECTOR Detector
#ifndef __cplusplus
typedef struct Detector_ Detector;
#endif
#endif
/// Instantiates the detector. If `allow_2022` is `true` the possible
/// guesses are Shift_JIS, EUC-JP, ISO-2022-JP, and undecided. If
/// `allow_2022` is `false`, the possible guesses are Shift_JIS, EUC-JP,
/// and undecided.
///
/// The instantiated detector must be freed after use using
/// `shift_or_euc_detector_free`.
SHIFT_OR_EUC_DETECTOR* shift_or_euc_detector_new(bool allow_2022);
/// Deallocates a detector obtained from `shift_or_euc_detector_new`.
void shift_or_euc_detector_free(SHIFT_OR_EUC_DETECTOR* detector);
/// Feeds bytes to the detector. If `last` is `true` the end of the stream
/// is considered to occur immediately after the end of `buffer`.
/// Otherwise, the stream is expected to continue. `buffer_len` may be zero.
/// `buffer` must not be `NULL` but may be undereferencable when
/// `buffer_len` is zero.
///
/// If you're running the detector only on a prefix of a complete
/// document, _do not_ pass `last` as `true` after the prefix if the
/// stream as a whole still contains more content.
///
/// Returns `SHIFT_JIS_ENCODING` if the detector guessed
/// Shift_JIS. Returns `EUC_JP_ENCODING` if the detector
/// guessed EUC-JP. Returns `ISO_2022_JP_ENCODING` if the
/// detector guessed ISO-2022-JP (only possible if `true` was passed as
/// `allow_2022` when instantiating the detector). Returns `NULL` if the
/// detector is undecided. If `NULL` is returned even when passing `true`
/// as `last`, falling back to Shift_JIS is the best guess for Web
/// purposes.
///
/// Do not call again after the function has returned non-`NULL` or after
/// the function has been called with `true` as `last`.
///
/// # Panics
///
/// If called after the function has returned non-`NULL` or after the
/// function has been called with `true` as `last`.
///
/// # Undefined Behavior
///
/// UB ensues if
///
/// * `detector` does not point to a detector obtained from
/// `shift_or_euc_detector_new` but not yet freed with
/// `shift_or_euc_detector_free`.
/// * `buffer` is `NULL`.
/// * `buffer` and `buffer_len` don't designate a range of memory
/// valid for reading.
ENCODING_RS_ENCODING const* shift_or_euc_detector_feed(
SHIFT_OR_EUC_DETECTOR* detector,
uint8_t const* buffer,
size_t buffer_len,
bool last
);
#ifdef __cplusplus
}
#endif
#endif // shift_or_euc_h

94
third_party/rust/shift_or_euc_c/src/lib.rs поставляемый Normal file
Просмотреть файл

@ -0,0 +1,94 @@
// Copyright 2018 Mozilla Foundation. See the COPYRIGHT
// file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![doc(html_root_url = "https://docs.rs/shift_or_euc_c/0.1.0")]
//! C API for [`shift_or_euc`](https://docs.rs/shift_or_euc/)
//!
//! # Panics
//!
//! This crate is designed to be used only in a `panic=abort` scenario.
//! Panic propagation across FFI is not handled!
//!
//! # Licensing
//!
//! See the file named [COPYRIGHT](https://github.com/hsivonen/shift_or_euc/blob/master/COPYRIGHT).
use encoding_rs::Encoding;
use shift_or_euc::*;
/// Instantiates the detector. If `allow_2022` is `true` the possible
/// guesses are Shift_JIS, EUC-JP, ISO-2022-JP, and undecided. If
/// `allow_2022` is `false`, the possible guesses are Shift_JIS, EUC-JP,
/// and undecided.
///
/// The instantiated detector must be freed after use using
/// `shift_or_euc_detector_free`.
#[no_mangle]
pub unsafe extern "C" fn shift_or_euc_detector_new(allow_2022: bool) -> *mut Detector {
Box::into_raw(Box::new(Detector::new(allow_2022)))
}
/// Deallocates a detector obtained from `shift_or_euc_detector_new`.
#[no_mangle]
pub unsafe extern "C" fn shift_or_euc_detector_free(detector: *mut Detector) {
let _ = Box::from_raw(detector);
}
/// Feeds bytes to the detector. If `last` is `true` the end of the stream
/// is considered to occur immediately after the end of `buffer`.
/// Otherwise, the stream is expected to continue. `buffer_len` may be zero.
/// `buffer` must not be `NULL` but may be undereferencable when
/// `buffer_len` is zero.
///
/// If you're running the detector only on a prefix of a complete
/// document, _do not_ pass `last` as `true` after the prefix if the
/// stream as a whole still contains more content.
///
/// Returns `SHIFT_JIS_ENCODING` if the detector guessed
/// Shift_JIS. Returns `EUC_JP_ENCODING` if the detector
/// guessed EUC-JP. Returns `ISO_2022_JP_ENCODING` if the
/// detector guessed ISO-2022-JP (only possible if `true` was passed as
/// `allow_2022` when instantiating the detector). Returns `NULL` if the
/// detector is undecided. If `NULL` is returned even when passing `true`
/// as `last`, falling back to Shift_JIS is the best guess for Web
/// purposes.
///
/// Do not call again after the function has returned non-`NULL` or after
/// the function has been called with `true` as `last`.
///
/// # Panics
///
/// If called after the function has returned non-`NULL` or after the
/// function has been called with `true` as `last`.
///
/// # Undefined Behavior
///
/// UB ensues if
///
/// * `detector` does not point to a detector obtained from
/// `shift_or_euc_detector_new` but not yet freed with
/// `shift_or_euc_detector_free`.
/// * `buffer` is `NULL`.
/// * `buffer` and `buffer_len` don't designate a range of memory
/// valid for reading.
#[no_mangle]
pub unsafe extern "C" fn shift_or_euc_detector_feed(
detector: *mut Detector,
buffer: *const u8,
buffer_len: usize,
last: bool,
) -> *const Encoding {
if let Some(encoding) = (*detector).feed(::std::slice::from_raw_parts(buffer, buffer_len), last)
{
encoding
} else {
::std::ptr::null()
}
}

Просмотреть файл

@ -245,6 +245,8 @@ class MozBrowser extends MozElements.MozElementMixin(XULFrameElement) {
this._mayEnableCharacterEncodingMenu = null;
this._charsetAutodetected = false;
this._contentPrincipal = null;
this._csp = null;
@ -619,6 +621,16 @@ class MozBrowser extends MozElements.MozElementMixin(XULFrameElement) {
}
}
get charsetAutodetected() {
return this.isRemoteBrowser ? this._charsetAutodetected : this.docShell.charsetAutodetected;
}
set charsetAutodetected(aAutodetected) {
if (this.isRemoteBrowser) {
this._charsetAutodetected = aAutodetected;
}
}
get contentPrincipal() {
return this.isRemoteBrowser ? this._contentPrincipal : this.contentDocument.nodePrincipal;
}
@ -1709,6 +1721,7 @@ class MozBrowser extends MozElements.MozElementMixin(XULFrameElement) {
"_contentTitle",
"_characterSet",
"_mayEnableCharacterEncodingMenu",
"_charsetAutodetected",
"_contentPrincipal",
"_imageDocument",
"_fullZoom",

Просмотреть файл

@ -38,6 +38,7 @@ cert_storage = { path = "../../../../security/manager/ssl/cert_storage", optiona
bitsdownload = { path = "../../../components/bitsdownload", optional = true }
storage = { path = "../../../../storage/rust" }
bookmark_sync = { path = "../../../components/places/bookmark_sync", optional = true }
shift_or_euc_c = "0.1.0"
[build-dependencies]
rustc_version = "0.2"

Просмотреть файл

@ -44,6 +44,7 @@ extern crate bitsdownload;
extern crate storage;
#[cfg(feature = "moz_places")]
extern crate bookmark_sync;
extern crate shift_or_euc_c;
extern crate arrayvec;

Просмотреть файл

@ -31,8 +31,6 @@ charsetMenuAutodet = Auto-Detect
charsetMenuAutodet.key = D
charsetMenuAutodet.off = (off)
charsetMenuAutodet.off.key = o
charsetMenuAutodet.ja = Japanese
charsetMenuAutodet.ja.key = J
charsetMenuAutodet.ru = Russian
charsetMenuAutodet.ru.key = R
charsetMenuAutodet.uk = Ukrainian
@ -104,25 +102,8 @@ windows-1255 = Hebrew
# sorts right after that one in the collation order for your locale.
ISO-8859-8 = Hebrew, Visual
# Japanese
Shift_JIS.key = J
Shift_JIS = Japanese (Shift_JIS)
EUC-JP.key = p
EUC-JP = Japanese (EUC-JP)
ISO-2022-JP.key = n
ISO-2022-JP = Japanese (ISO-2022-JP)
# UI string in anticipation of bug 1543077; deliberately not in use yet
# LOCALIZATION NOTE (Japanese.key): If taken into use, this string will appear
# instead of the string for Shift_JIS.key, so the use of the same
# accelerator is deliberate.
# Japanese (NOT AN ENCODING NAME)
Japanese.key = J
# LOCALIZATION NOTE (Japanese): If taken into use, this string will appear
# as a single item in place of the strings for the three items Shift_JIS,
# EUC-JP, and ISO-2022-JP, so this string does not need to make sense together
# with those strings and should be translated the way those were
# but omitting the part in parentheses.
Japanese = Japanese
# Korean

Просмотреть файл

@ -16,18 +16,16 @@ ChromeUtils.defineModuleGetter(this, "Deprecated",
const kAutoDetectors = [
["off", ""],
["ja", "ja_parallel_state_machine"],
["ru", "ruprob"],
["uk", "ukprob"],
];
/**
* This set contains encodings that are in the Encoding Standard, except:
* - XSS-dangerous encodings (except ISO-2022-JP which is assumed to be
* too common not to be included).
* - Japanese encodings are represented by one autodetection item
* - x-user-defined, which practically never makes sense as an end-user-chosen
* override.
* - Encodings that IE11 doesn't have in its correspoding menu.
* - Encodings that IE11 doesn't have in its corresponding menu.
*/
const kEncodings = new Set([
// Globally relevant
@ -60,10 +58,8 @@ const kEncodings = new Set([
// Hebrew
"windows-1255",
"ISO-8859-8",
// Japanese
"Shift_JIS",
"EUC-JP",
"ISO-2022-JP",
// Japanese (NOT AN ENCODING NAME)
"Japanese",
// Korean
"EUC-KR",
// Thai
@ -95,8 +91,7 @@ function CharsetComparator(a, b) {
// happens to make the less frequently-used items first.
let titleA = a.label.replace(/\(.*/, "") + b.value;
let titleB = b.label.replace(/\(.*/, "") + a.value;
// Secondarily reverse sort by encoding name to sort "windows" or
// "shift_jis" first.
// Secondarily reverse sort by encoding name to sort "windows"
return titleA.localeCompare(titleB) || b.value.localeCompare(a.value);
}
@ -239,7 +234,17 @@ var CharsetMenu = {
* For substantially similar encodings, treat two encodings as the same
* for the purpose of the check mark.
*/
foldCharset(charset) {
foldCharset(charset, isAutodetected) {
if (isAutodetected) {
switch (charset) {
case "Shift_JIS":
case "EUC-JP":
case "ISO-2022-JP":
return "Japanese";
default:
// fall through
}
}
switch (charset) {
case "ISO-8859-8-I":
return "windows-1255";
@ -252,8 +257,11 @@ var CharsetMenu = {
}
},
/**
* This method is for comm-central callers only.
*/
update(parent, charset) {
let menuitem = parent.getElementsByAttribute("charset", this.foldCharset(charset)).item(0);
let menuitem = parent.getElementsByAttribute("charset", this.foldCharset(charset, false)).item(0);
if (menuitem) {
menuitem.setAttribute("checked", "true");
}

Просмотреть файл

@ -196,6 +196,7 @@ class RemoteWebProgressManager {
if (json.charset) {
this._browser._characterSet = json.charset;
this._browser._mayEnableCharacterEncodingMenu = json.mayEnableCharacterEncodingMenu;
this._browser._charsetAutodetected = json.charsetAutodetected;
}
}

Просмотреть файл

@ -116,6 +116,7 @@ class WebProgressChild {
json.title = this.mm.content.document.title;
json.charset = this.mm.content.document.characterSet;
json.mayEnableCharacterEncodingMenu = this.mm.docShell.mayEnableCharacterEncodingMenu;
json.charsetAutodetected = this.mm.docShell.charsetAutodetected;
json.principal = this.mm.content.document.nodePrincipal;
let csp = this.mm.content.document.csp;
json.csp = E10SUtils.serializeCSP(csp);