2001-09-26 04:40:45 +04:00
|
|
|
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
2012-05-21 15:12:37 +04:00
|
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
1999-07-14 19:17:24 +04:00
|
|
|
#ifndef nsCyrillicDetector_h__
|
|
|
|
#define nsCyrillicDetector_h__
|
|
|
|
|
2001-04-12 06:06:02 +04:00
|
|
|
#include "nsCyrillicClass.h"
|
1999-07-14 19:17:24 +04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// {2002F781-3960-11d3-B3C3-00805F8A6670}
|
|
|
|
#define NS_RU_PROBDETECTOR_CID \
|
|
|
|
{ 0x2002f781, 0x3960, 0x11d3, { 0xb3, 0xc3, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } }
|
|
|
|
|
|
|
|
|
|
|
|
// {2002F782-3960-11d3-B3C3-00805F8A6670}
|
|
|
|
#define NS_UK_PROBDETECTOR_CID \
|
|
|
|
{ 0x2002f782, 0x3960, 0x11d3, { 0xb3, 0xc3, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } }
|
|
|
|
|
|
|
|
// {2002F783-3960-11d3-B3C3-00805F8A6670}
|
|
|
|
#define NS_RU_STRING_PROBDETECTOR_CID \
|
|
|
|
{ 0x2002f783, 0x3960, 0x11d3, { 0xb3, 0xc3, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } }
|
|
|
|
|
|
|
|
// {2002F784-3960-11d3-B3C3-00805F8A6670}
|
|
|
|
#define NS_UK_STRING_PROBDETECTOR_CID \
|
|
|
|
{ 0x2002f784, 0x3960, 0x11d3, { 0xb3, 0xc3, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } }
|
|
|
|
|
2012-08-22 19:56:38 +04:00
|
|
|
static const uint8_t *gCyrillicCls[5] =
|
2001-04-12 06:06:02 +04:00
|
|
|
{
|
|
|
|
CP1251Map,
|
|
|
|
KOI8Map,
|
|
|
|
ISO88595Map,
|
|
|
|
MacCyrillicMap,
|
|
|
|
IBM866Map
|
|
|
|
};
|
|
|
|
|
|
|
|
static const char * gRussian[5] = {
|
|
|
|
"windows-1251",
|
|
|
|
"KOI8-R",
|
|
|
|
"ISO-8859-5",
|
|
|
|
"x-mac-cyrillic",
|
|
|
|
"IBM866"
|
|
|
|
};
|
|
|
|
|
|
|
|
static const char * gUkrainian[5] = {
|
|
|
|
"windows-1251",
|
|
|
|
"KOI8-U",
|
|
|
|
"ISO-8859-5",
|
2009-02-12 15:09:54 +03:00
|
|
|
"x-mac-cyrillic",
|
2001-04-12 06:06:02 +04:00
|
|
|
"IBM866"
|
|
|
|
};
|
|
|
|
|
|
|
|
#define NUM_CYR_CHARSET 5
|
|
|
|
|
|
|
|
class nsCyrillicDetector
|
|
|
|
{
|
|
|
|
public:
|
2012-08-22 19:56:38 +04:00
|
|
|
nsCyrillicDetector(uint8_t aItems,
|
|
|
|
const uint8_t ** aCyrillicClass,
|
2001-04-12 06:06:02 +04:00
|
|
|
const char **aCharsets) {
|
|
|
|
mItems = aItems;
|
|
|
|
mCyrillicClass = aCyrillicClass;
|
|
|
|
mCharsets = aCharsets;
|
2012-08-22 19:56:38 +04:00
|
|
|
for(unsigned i=0;i<mItems;i++)
|
2001-04-12 06:06:02 +04:00
|
|
|
mProb[i] = mLastCls[i] =0;
|
2011-10-17 18:59:28 +04:00
|
|
|
mDone = false;
|
2007-04-23 18:21:53 +04:00
|
|
|
}
|
|
|
|
virtual ~nsCyrillicDetector() {}
|
2012-08-22 19:56:38 +04:00
|
|
|
virtual void HandleData(const char* aBuf, uint32_t aLen);
|
2001-04-12 06:06:02 +04:00
|
|
|
virtual void DataEnd();
|
|
|
|
protected:
|
|
|
|
virtual void Report(const char* aCharset) = 0;
|
2011-09-29 10:19:26 +04:00
|
|
|
bool mDone;
|
2001-04-12 06:06:02 +04:00
|
|
|
|
|
|
|
private:
|
2012-08-22 19:56:38 +04:00
|
|
|
uint8_t mItems;
|
|
|
|
const uint8_t ** mCyrillicClass;
|
2001-04-12 06:06:02 +04:00
|
|
|
const char** mCharsets;
|
2012-08-22 19:56:38 +04:00
|
|
|
uint32_t mProb[NUM_CYR_CHARSET];
|
|
|
|
uint8_t mLastCls[NUM_CYR_CHARSET];
|
2001-04-12 06:06:02 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
class nsCyrXPCOMDetector :
|
|
|
|
public nsCyrillicDetector,
|
|
|
|
public nsICharsetDetector
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
// nsISupports interface
|
|
|
|
NS_DECL_ISUPPORTS
|
2012-08-22 19:56:38 +04:00
|
|
|
nsCyrXPCOMDetector(uint8_t aItems,
|
|
|
|
const uint8_t ** aCyrillicClass,
|
2001-04-12 06:06:02 +04:00
|
|
|
const char **aCharsets);
|
2015-03-21 19:28:04 +03:00
|
|
|
NS_IMETHOD Init(nsICharsetDetectionObserver* aObserver) override;
|
|
|
|
NS_IMETHOD DoIt(const char* aBuf, uint32_t aLen, bool *oDontFeedMe) override;
|
|
|
|
NS_IMETHOD Done() override;
|
2001-04-12 06:06:02 +04:00
|
|
|
protected:
|
2014-06-24 02:40:02 +04:00
|
|
|
virtual ~nsCyrXPCOMDetector();
|
2015-03-21 19:28:04 +03:00
|
|
|
virtual void Report(const char* aCharset) override;
|
2001-04-12 06:06:02 +04:00
|
|
|
private:
|
|
|
|
nsCOMPtr<nsICharsetDetectionObserver> mObserver;
|
|
|
|
};
|
|
|
|
|
|
|
|
class nsCyrXPCOMStringDetector :
|
|
|
|
public nsCyrillicDetector,
|
|
|
|
public nsIStringCharsetDetector
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
// nsISupports interface
|
|
|
|
NS_DECL_ISUPPORTS
|
2012-08-22 19:56:38 +04:00
|
|
|
nsCyrXPCOMStringDetector(uint8_t aItems,
|
|
|
|
const uint8_t ** aCyrillicClass,
|
2001-04-12 06:06:02 +04:00
|
|
|
const char **aCharsets);
|
2012-08-22 19:56:38 +04:00
|
|
|
NS_IMETHOD DoIt(const char* aBuf, uint32_t aLen,
|
2015-03-21 19:28:04 +03:00
|
|
|
const char** oCharset, nsDetectionConfident &oConf) override;
|
2001-04-12 06:06:02 +04:00
|
|
|
protected:
|
2014-06-24 02:40:02 +04:00
|
|
|
virtual ~nsCyrXPCOMStringDetector();
|
2015-03-21 19:28:04 +03:00
|
|
|
virtual void Report(const char* aCharset) override;
|
2001-04-12 06:06:02 +04:00
|
|
|
private:
|
|
|
|
nsCOMPtr<nsICharsetDetectionObserver> mObserver;
|
|
|
|
const char* mResult;
|
|
|
|
};
|
|
|
|
|
|
|
|
class nsRUProbDetector : public nsCyrXPCOMDetector
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
nsRUProbDetector()
|
2007-04-23 18:21:53 +04:00
|
|
|
: nsCyrXPCOMDetector(5, gCyrillicCls, gRussian) {}
|
2001-04-12 06:06:02 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
class nsRUStringProbDetector : public nsCyrXPCOMStringDetector
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
nsRUStringProbDetector()
|
2007-04-23 18:21:53 +04:00
|
|
|
: nsCyrXPCOMStringDetector(5, gCyrillicCls, gRussian) {}
|
2001-04-12 06:06:02 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
class nsUKProbDetector : public nsCyrXPCOMDetector
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
nsUKProbDetector()
|
2007-04-23 18:21:53 +04:00
|
|
|
: nsCyrXPCOMDetector(5, gCyrillicCls, gUkrainian) {}
|
2001-04-12 06:06:02 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
class nsUKStringProbDetector : public nsCyrXPCOMStringDetector
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
nsUKStringProbDetector()
|
2007-04-23 18:21:53 +04:00
|
|
|
: nsCyrXPCOMStringDetector(5, gCyrillicCls, gUkrainian) {}
|
2001-04-12 06:06:02 +04:00
|
|
|
};
|
1999-07-14 19:17:24 +04:00
|
|
|
|
|
|
|
#endif
|