зеркало из https://github.com/mozilla/gecko-dev.git
some performance improvements for header parsing
This commit is contained in:
Родитель
d0b8911644
Коммит
9adfdc2ef9
|
@ -568,13 +568,13 @@ static PRBool intlmime_only_ascii_str(const char *s)
|
|||
|
||||
static unsigned char * utf8_nextchar(unsigned char *str)
|
||||
{
|
||||
int len = PL_strlen((char *) str);
|
||||
if (*str < 128) {
|
||||
return (str+1);
|
||||
}
|
||||
int len = PL_strlen((char *) str);
|
||||
// RFC 2279 defines more than 3 bytes sequences (0xF0, 0xF8, 0xFC),
|
||||
// but I think we won't encounter those cases as long as we're supporting UCS-2 and no surrogate.
|
||||
else if ((len >= 3) && (*str >= 0xE0)) {
|
||||
if ((len >= 3) && (*str >= 0xE0)) {
|
||||
return (str+3);
|
||||
}
|
||||
else if ((len >= 2) && (*str >= 0xC0)) {
|
||||
|
@ -1268,41 +1268,6 @@ static PRInt32 INTL_ConvertFromUnicode(const char* to_charset, const void* uniBu
|
|||
}
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class MimeCharsetConverterClass {
|
||||
public:
|
||||
MimeCharsetConverterClass();
|
||||
virtual ~MimeCharsetConverterClass();
|
||||
|
||||
// Initialize converters for charsets, fails if converter not available.
|
||||
//
|
||||
PRInt32 Initialize(const char* from_charset, const char* to_charset,
|
||||
const PRBool autoDetect=PR_FALSE, const PRInt32 maxNumCharsDetect=-1);
|
||||
|
||||
// Converts input buffer or duplicates input if converters not available (and returns 0).
|
||||
// Also duplicates input if convertion not needed.
|
||||
// C string is generated for converted string.
|
||||
PRInt32 Convert(const char* inBuffer, const PRInt32 inLength,
|
||||
char** outBuffer, PRInt32* outLength,
|
||||
PRInt32* numUnConverted);
|
||||
|
||||
protected:
|
||||
nsIUnicodeDecoder * GetUnicodeDecoder() {return (mAutoDetect && NULL != mDecoderDetected) ? mDecoderDetected : mDecoder;}
|
||||
nsIUnicodeEncoder * GetUnicodeEncoder() {return mEncoder;}
|
||||
PRBool NeedCharsetConversion(const nsString& from_charset, const nsString& to_charset);
|
||||
|
||||
private:
|
||||
nsIUnicodeDecoder *mDecoder; // decoder (convert to unicode)
|
||||
nsIUnicodeEncoder *mEncoder; // encoder (convert from unicode)
|
||||
nsIUnicodeDecoder *mDecoderDetected; // decoder of detected charset (after when auto detection succeeded)
|
||||
PRInt32 mMaxNumCharsDetect; // maximum number of characters in bytes to abort auto detection
|
||||
// (-1 for no limit)
|
||||
PRInt32 mNumChars; // accumulated number of characters converted in bytes
|
||||
PRBool mAutoDetect; // true if apply auto detection
|
||||
nsString mInputCharset; // input charset for auto detection hint as well as need conversion check
|
||||
nsString mOutputCharset; // output charset for need conversion check
|
||||
nsIStringCharsetDetector *mDetector; // charset detector
|
||||
};
|
||||
|
||||
MimeCharsetConverterClass::MimeCharsetConverterClass()
|
||||
{
|
||||
mDecoder = NULL;
|
||||
|
@ -1583,16 +1548,17 @@ PRInt32 MIME_ConvertCharset(const PRBool autoDetection, const char* from_charset
|
|||
const char* inBuffer, const PRInt32 inLength, char** outBuffer, PRInt32* outLength,
|
||||
PRInt32* numUnConverted)
|
||||
{
|
||||
char srcCharset[kMAX_CSNAME+1], dstCharset[kMAX_CSNAME+1];
|
||||
// char srcCharset[kMAX_CSNAME+1], dstCharset[kMAX_CSNAME+1];
|
||||
MimeCharsetConverterClass aMimeCharsetConverterClass;
|
||||
PRInt32 res;
|
||||
|
||||
srcCharset[0] = '\0';
|
||||
dstCharset[0] = '\0';
|
||||
PL_strcpy(srcCharset, PL_strcasecmp(from_charset, "us-ascii") ? (char *) from_charset : "iso-8859-1");
|
||||
PL_strcpy(dstCharset, PL_strcasecmp(to_charset, "us-ascii") ? (char *) to_charset : "iso-8859-1");
|
||||
// commenting out per Naoki's instructions.
|
||||
// srcCharset[0] = '\0';
|
||||
// dstCharset[0] = '\0';
|
||||
// PL_strcpy(srcCharset, PL_strcasecmp(from_charset, "us-ascii") ? (char *) from_charset : "iso-8859-1");
|
||||
// PL_strcpy(dstCharset, PL_strcasecmp(from_charset, "us-ascii") ? (char *) to_charset : "iso-8859-1");
|
||||
|
||||
res = aMimeCharsetConverterClass.Initialize(srcCharset, dstCharset, autoDetection, -1);
|
||||
res = aMimeCharsetConverterClass.Initialize(from_charset, from_charset, autoDetection, -1);
|
||||
|
||||
if (res != -1) {
|
||||
res = aMimeCharsetConverterClass.Convert(inBuffer, inLength, outBuffer, outLength, NULL);
|
||||
|
|
|
@ -15,7 +15,10 @@
|
|||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
#ifndef _COMI18N_LOADED_H_
|
||||
#define _COMI18N_LOADED_H_
|
||||
|
||||
#include "msgCore.h"
|
||||
|
||||
#ifndef kMIME_ENCODED_WORD_SIZE
|
||||
#define kMIME_ENCODED_WORD_SIZE 75
|
||||
|
@ -25,7 +28,47 @@
|
|||
#define kMAX_CSNAME 64
|
||||
#endif
|
||||
|
||||
class nsIUnicodeDecoder;
|
||||
class nsIUnicodeEncoder;
|
||||
class nsIStringCharsetDetector;
|
||||
|
||||
class MimeCharsetConverterClass {
|
||||
public:
|
||||
MimeCharsetConverterClass();
|
||||
virtual ~MimeCharsetConverterClass();
|
||||
|
||||
// Initialize converters for charsets, fails if converter not available.
|
||||
//
|
||||
PRInt32 Initialize(const char* from_charset, const char* to_charset,
|
||||
const PRBool autoDetect=PR_FALSE, const PRInt32 maxNumCharsDetect=-1);
|
||||
|
||||
// Converts input buffer or duplicates input if converters not available (and returns 0).
|
||||
// Also duplicates input if convertion not needed.
|
||||
// C string is generated for converted string.
|
||||
PRInt32 Convert(const char* inBuffer, const PRInt32 inLength,
|
||||
char** outBuffer, PRInt32* outLength,
|
||||
PRInt32* numUnConverted);
|
||||
|
||||
protected:
|
||||
nsIUnicodeDecoder * GetUnicodeDecoder() {return (mAutoDetect && NULL != mDecoderDetected) ? mDecoderDetected : mDecoder;}
|
||||
nsIUnicodeEncoder * GetUnicodeEncoder() {return mEncoder;}
|
||||
PRBool NeedCharsetConversion(const nsString& from_charset, const nsString& to_charset);
|
||||
|
||||
private:
|
||||
nsIUnicodeDecoder *mDecoder; // decoder (convert to unicode)
|
||||
nsIUnicodeEncoder *mEncoder; // encoder (convert from unicode)
|
||||
nsIUnicodeDecoder *mDecoderDetected; // decoder of detected charset (after when auto detection succeeded)
|
||||
PRInt32 mMaxNumCharsDetect; // maximum number of characters in bytes to abort auto detection
|
||||
// (-1 for no limit)
|
||||
PRInt32 mNumChars; // accumulated number of characters converted in bytes
|
||||
PRBool mAutoDetect; // true if apply auto detection
|
||||
nsString mInputCharset; // input charset for auto detection hint as well as need conversion check
|
||||
nsString mOutputCharset; // output charset for need conversion check
|
||||
nsIStringCharsetDetector *mDetector; // charset detector
|
||||
};
|
||||
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
@ -145,3 +188,6 @@ char *INTL_EncodeMimePartIIStr_VarLen(char *subject, PRInt16 wincsid, PRBool bUs
|
|||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif // _COMI18N_LOADED_H_
|
||||
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#define COPY_CHAR(_D,_S) do { if (!_S || !*_S) { *_D++ = 0; }\
|
||||
else { int _LEN = NextChar_UTF8((char *)_S) - _S;\
|
||||
nsCRT::memcpy(_D,_S,_LEN); _D += _LEN; } } while (0)
|
||||
//#define NEXT_CHAR(_STR) (_STR = (* (char *) _STR < 128) ? (char *) _STR + 1 : NextChar_UTF8((char *)_STR))
|
||||
#define NEXT_CHAR(_STR) (_STR = NextChar_UTF8((char *)_STR))
|
||||
#define TRIM_WHITESPACE(_S,_E,_T) do { while (_E > _S && IS_SPACE(_E[-1])) _E--;\
|
||||
*_E++ = _T; } while (0)
|
||||
|
@ -71,19 +72,38 @@ nsMsgHeaderParser::nsMsgHeaderParser()
|
|||
{
|
||||
/* the following macro is used to initialize the ref counting data */
|
||||
NS_INIT_REFCNT();
|
||||
m_USAsciiToUtf8CharsetConverter = nsnull;
|
||||
|
||||
}
|
||||
|
||||
nsMsgHeaderParser::~nsMsgHeaderParser()
|
||||
{}
|
||||
{
|
||||
delete m_USAsciiToUtf8CharsetConverter;
|
||||
}
|
||||
|
||||
/* the following macros actually implement addref, release and query interface for our component. */
|
||||
NS_IMPL_ADDREF(nsMsgHeaderParser)
|
||||
NS_IMPL_RELEASE(nsMsgHeaderParser)
|
||||
NS_IMPL_QUERY_INTERFACE(nsMsgHeaderParser, nsIMsgHeaderParser::GetIID()); /* we need to pass in the interface ID of this interface */
|
||||
|
||||
MimeCharsetConverterClass *nsMsgHeaderParser::GetUSAsciiToUtf8CharsetConverter()
|
||||
{
|
||||
if (!m_USAsciiToUtf8CharsetConverter)
|
||||
{
|
||||
m_USAsciiToUtf8CharsetConverter = new MimeCharsetConverterClass;
|
||||
if (m_USAsciiToUtf8CharsetConverter)
|
||||
{
|
||||
nsresult rv = m_USAsciiToUtf8CharsetConverter->Initialize("us-ascii","utf-8", PR_FALSE);
|
||||
}
|
||||
}
|
||||
return m_USAsciiToUtf8CharsetConverter;
|
||||
}
|
||||
|
||||
nsresult nsMsgHeaderParser::ParseHeaderAddresses (const char *charset, const char *line, char **names, char **addresses, PRUint32 *numAddresses)
|
||||
{
|
||||
char *utf8Str, *outStrings;
|
||||
MimeCharsetConverterClass *converter = nsnull;
|
||||
nsresult rv;
|
||||
|
||||
if (nsnull == line || MIME_ConvertString(CHARSET(charset), "UTF-8", line, &utf8Str) != 0) {
|
||||
utf8Str = nsnull;
|
||||
|
@ -102,8 +122,19 @@ nsresult nsMsgHeaderParser::ParseHeaderAddresses (const char *charset, const cha
|
|||
s += len;
|
||||
}
|
||||
// convert array of strings
|
||||
if (MIME_ConvertCharset(PR_FALSE, "UTF-8", CHARSET(charset), *names,
|
||||
len_all, &outStrings, &outStrLen, NULL) == 0) {
|
||||
if (!charset)
|
||||
{
|
||||
converter = GetUSAsciiToUtf8CharsetConverter();
|
||||
if (converter)
|
||||
rv = converter->Convert(*names, len_all, &outStrings, &outStrLen, nsnull);
|
||||
}
|
||||
if (!converter)
|
||||
{
|
||||
rv = MIME_ConvertCharset(PR_FALSE, "UTF-8", CHARSET(charset), *names,
|
||||
len_all, &outStrings, &outStrLen, NULL) ;
|
||||
}
|
||||
if (NS_SUCCEEDED(rv))
|
||||
{
|
||||
PR_Free(*names);
|
||||
*names = outStrings;
|
||||
}
|
||||
|
@ -117,8 +148,21 @@ nsresult nsMsgHeaderParser::ParseHeaderAddresses (const char *charset, const cha
|
|||
s += len;
|
||||
}
|
||||
// convert array of strings
|
||||
if (MIME_ConvertCharset(PR_FALSE, "UTF-8", CHARSET(charset), *addresses,
|
||||
len_all, &outStrings, &outStrLen, NULL) == 0) {
|
||||
if (!charset)
|
||||
{
|
||||
converter = GetUSAsciiToUtf8CharsetConverter();
|
||||
if (converter)
|
||||
rv = converter->Convert(*addresses,
|
||||
len_all, &outStrings, &outStrLen, nsnull);
|
||||
}
|
||||
// if non null charset, or couldn't get a converter, use MIME_ function.
|
||||
if (!converter)
|
||||
{
|
||||
rv = MIME_ConvertCharset(PR_FALSE, "UTF-8", CHARSET(charset), *addresses,
|
||||
len_all, &outStrings, &outStrLen, NULL);
|
||||
}
|
||||
if (NS_SUCCEEDED(rv))
|
||||
{
|
||||
PR_Free(*addresses);
|
||||
*addresses = outStrings;
|
||||
}
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
|
||||
#include "msgCore.h"
|
||||
#include "nsIMsgHeaderParser.h" /* include the interface we are going to support */
|
||||
#include "comi18n.h"
|
||||
|
||||
/*
|
||||
* RFC-822 parser
|
||||
|
@ -116,7 +117,10 @@
|
|||
names to users. e.g. summary file, address book
|
||||
*/
|
||||
NS_IMETHOD UnquotePhraseOrAddr (const char *charset, const char *line, char** lineout);
|
||||
|
||||
|
||||
MimeCharsetConverterClass *GetUSAsciiToUtf8CharsetConverter();
|
||||
protected:
|
||||
MimeCharsetConverterClass *m_USAsciiToUtf8CharsetConverter;
|
||||
private:
|
||||
};
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче