some performance improvements for header parsing

This commit is contained in:
bienvenu%netscape.com 1999-09-21 00:06:25 +00:00
Родитель d0b8911644
Коммит 9adfdc2ef9
4 изменённых файлов: 109 добавлений и 49 удалений

Просмотреть файл

@ -568,13 +568,13 @@ static PRBool intlmime_only_ascii_str(const char *s)
static unsigned char * utf8_nextchar(unsigned char *str)
{
int len = PL_strlen((char *) str);
if (*str < 128) {
return (str+1);
}
int len = PL_strlen((char *) str);
// RFC 2279 defines more than 3 bytes sequences (0xF0, 0xF8, 0xFC),
// but I think we won't encounter those cases as long as we're supporting UCS-2 and no surrogate.
else if ((len >= 3) && (*str >= 0xE0)) {
if ((len >= 3) && (*str >= 0xE0)) {
return (str+3);
}
else if ((len >= 2) && (*str >= 0xC0)) {
@ -1268,41 +1268,6 @@ static PRInt32 INTL_ConvertFromUnicode(const char* to_charset, const void* uniBu
}
////////////////////////////////////////////////////////////////////////////////
class MimeCharsetConverterClass {
public:
MimeCharsetConverterClass();
virtual ~MimeCharsetConverterClass();
// Initialize converters for charsets, fails if converter not available.
//
PRInt32 Initialize(const char* from_charset, const char* to_charset,
const PRBool autoDetect=PR_FALSE, const PRInt32 maxNumCharsDetect=-1);
// Converts input buffer or duplicates input if converters not available (and returns 0).
// Also duplicates input if convertion not needed.
// C string is generated for converted string.
PRInt32 Convert(const char* inBuffer, const PRInt32 inLength,
char** outBuffer, PRInt32* outLength,
PRInt32* numUnConverted);
protected:
nsIUnicodeDecoder * GetUnicodeDecoder() {return (mAutoDetect && NULL != mDecoderDetected) ? mDecoderDetected : mDecoder;}
nsIUnicodeEncoder * GetUnicodeEncoder() {return mEncoder;}
PRBool NeedCharsetConversion(const nsString& from_charset, const nsString& to_charset);
private:
nsIUnicodeDecoder *mDecoder; // decoder (convert to unicode)
nsIUnicodeEncoder *mEncoder; // encoder (convert from unicode)
nsIUnicodeDecoder *mDecoderDetected; // decoder of detected charset (after when auto detection succeeded)
PRInt32 mMaxNumCharsDetect; // maximum number of characters in bytes to abort auto detection
// (-1 for no limit)
PRInt32 mNumChars; // accumulated number of characters converted in bytes
PRBool mAutoDetect; // true if apply auto detection
nsString mInputCharset; // input charset for auto detection hint as well as need conversion check
nsString mOutputCharset; // output charset for need conversion check
nsIStringCharsetDetector *mDetector; // charset detector
};
MimeCharsetConverterClass::MimeCharsetConverterClass()
{
mDecoder = NULL;
@ -1583,16 +1548,17 @@ PRInt32 MIME_ConvertCharset(const PRBool autoDetection, const char* from_charset
const char* inBuffer, const PRInt32 inLength, char** outBuffer, PRInt32* outLength,
PRInt32* numUnConverted)
{
char srcCharset[kMAX_CSNAME+1], dstCharset[kMAX_CSNAME+1];
// char srcCharset[kMAX_CSNAME+1], dstCharset[kMAX_CSNAME+1];
MimeCharsetConverterClass aMimeCharsetConverterClass;
PRInt32 res;
srcCharset[0] = '\0';
dstCharset[0] = '\0';
PL_strcpy(srcCharset, PL_strcasecmp(from_charset, "us-ascii") ? (char *) from_charset : "iso-8859-1");
PL_strcpy(dstCharset, PL_strcasecmp(to_charset, "us-ascii") ? (char *) to_charset : "iso-8859-1");
// commenting out per Naoki's instructions.
// srcCharset[0] = '\0';
// dstCharset[0] = '\0';
// PL_strcpy(srcCharset, PL_strcasecmp(from_charset, "us-ascii") ? (char *) from_charset : "iso-8859-1");
// PL_strcpy(dstCharset, PL_strcasecmp(from_charset, "us-ascii") ? (char *) to_charset : "iso-8859-1");
res = aMimeCharsetConverterClass.Initialize(srcCharset, dstCharset, autoDetection, -1);
res = aMimeCharsetConverterClass.Initialize(from_charset, from_charset, autoDetection, -1);
if (res != -1) {
res = aMimeCharsetConverterClass.Convert(inBuffer, inLength, outBuffer, outLength, NULL);

Просмотреть файл

@ -15,7 +15,10 @@
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#ifndef _COMI18N_LOADED_H_
#define _COMI18N_LOADED_H_
#include "msgCore.h"
#ifndef kMIME_ENCODED_WORD_SIZE
#define kMIME_ENCODED_WORD_SIZE 75
@ -25,7 +28,47 @@
#define kMAX_CSNAME 64
#endif
class nsIUnicodeDecoder;
class nsIUnicodeEncoder;
class nsIStringCharsetDetector;
class MimeCharsetConverterClass {
public:
MimeCharsetConverterClass();
virtual ~MimeCharsetConverterClass();
// Initialize converters for charsets, fails if converter not available.
//
PRInt32 Initialize(const char* from_charset, const char* to_charset,
const PRBool autoDetect=PR_FALSE, const PRInt32 maxNumCharsDetect=-1);
// Converts input buffer or duplicates input if converters not available (and returns 0).
// Also duplicates input if convertion not needed.
// C string is generated for converted string.
PRInt32 Convert(const char* inBuffer, const PRInt32 inLength,
char** outBuffer, PRInt32* outLength,
PRInt32* numUnConverted);
protected:
nsIUnicodeDecoder * GetUnicodeDecoder() {return (mAutoDetect && NULL != mDecoderDetected) ? mDecoderDetected : mDecoder;}
nsIUnicodeEncoder * GetUnicodeEncoder() {return mEncoder;}
PRBool NeedCharsetConversion(const nsString& from_charset, const nsString& to_charset);
private:
nsIUnicodeDecoder *mDecoder; // decoder (convert to unicode)
nsIUnicodeEncoder *mEncoder; // encoder (convert from unicode)
nsIUnicodeDecoder *mDecoderDetected; // decoder of detected charset (after when auto detection succeeded)
PRInt32 mMaxNumCharsDetect; // maximum number of characters in bytes to abort auto detection
// (-1 for no limit)
PRInt32 mNumChars; // accumulated number of characters converted in bytes
PRBool mAutoDetect; // true if apply auto detection
nsString mInputCharset; // input charset for auto detection hint as well as need conversion check
nsString mOutputCharset; // output charset for need conversion check
nsIStringCharsetDetector *mDetector; // charset detector
};
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
@ -145,3 +188,6 @@ char *INTL_EncodeMimePartIIStr_VarLen(char *subject, PRInt16 wincsid, PRBool bUs
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
#endif // _COMI18N_LOADED_H_

Просмотреть файл

@ -36,6 +36,7 @@
#define COPY_CHAR(_D,_S) do { if (!_S || !*_S) { *_D++ = 0; }\
else { int _LEN = NextChar_UTF8((char *)_S) - _S;\
nsCRT::memcpy(_D,_S,_LEN); _D += _LEN; } } while (0)
//#define NEXT_CHAR(_STR) (_STR = (* (char *) _STR < 128) ? (char *) _STR + 1 : NextChar_UTF8((char *)_STR))
#define NEXT_CHAR(_STR) (_STR = NextChar_UTF8((char *)_STR))
#define TRIM_WHITESPACE(_S,_E,_T) do { while (_E > _S && IS_SPACE(_E[-1])) _E--;\
*_E++ = _T; } while (0)
@ -71,19 +72,38 @@ nsMsgHeaderParser::nsMsgHeaderParser()
{
/* the following macro is used to initialize the ref counting data */
NS_INIT_REFCNT();
m_USAsciiToUtf8CharsetConverter = nsnull;
}
nsMsgHeaderParser::~nsMsgHeaderParser()
{}
{
delete m_USAsciiToUtf8CharsetConverter;
}
/* the following macros actually implement addref, release and query interface for our component. */
NS_IMPL_ADDREF(nsMsgHeaderParser)
NS_IMPL_RELEASE(nsMsgHeaderParser)
NS_IMPL_QUERY_INTERFACE(nsMsgHeaderParser, nsIMsgHeaderParser::GetIID()); /* we need to pass in the interface ID of this interface */
MimeCharsetConverterClass *nsMsgHeaderParser::GetUSAsciiToUtf8CharsetConverter()
{
if (!m_USAsciiToUtf8CharsetConverter)
{
m_USAsciiToUtf8CharsetConverter = new MimeCharsetConverterClass;
if (m_USAsciiToUtf8CharsetConverter)
{
nsresult rv = m_USAsciiToUtf8CharsetConverter->Initialize("us-ascii","utf-8", PR_FALSE);
}
}
return m_USAsciiToUtf8CharsetConverter;
}
nsresult nsMsgHeaderParser::ParseHeaderAddresses (const char *charset, const char *line, char **names, char **addresses, PRUint32 *numAddresses)
{
char *utf8Str, *outStrings;
MimeCharsetConverterClass *converter = nsnull;
nsresult rv;
if (nsnull == line || MIME_ConvertString(CHARSET(charset), "UTF-8", line, &utf8Str) != 0) {
utf8Str = nsnull;
@ -102,8 +122,19 @@ nsresult nsMsgHeaderParser::ParseHeaderAddresses (const char *charset, const cha
s += len;
}
// convert array of strings
if (MIME_ConvertCharset(PR_FALSE, "UTF-8", CHARSET(charset), *names,
len_all, &outStrings, &outStrLen, NULL) == 0) {
if (!charset)
{
converter = GetUSAsciiToUtf8CharsetConverter();
if (converter)
rv = converter->Convert(*names, len_all, &outStrings, &outStrLen, nsnull);
}
if (!converter)
{
rv = MIME_ConvertCharset(PR_FALSE, "UTF-8", CHARSET(charset), *names,
len_all, &outStrings, &outStrLen, NULL) ;
}
if (NS_SUCCEEDED(rv))
{
PR_Free(*names);
*names = outStrings;
}
@ -117,8 +148,21 @@ nsresult nsMsgHeaderParser::ParseHeaderAddresses (const char *charset, const cha
s += len;
}
// convert array of strings
if (MIME_ConvertCharset(PR_FALSE, "UTF-8", CHARSET(charset), *addresses,
len_all, &outStrings, &outStrLen, NULL) == 0) {
if (!charset)
{
converter = GetUSAsciiToUtf8CharsetConverter();
if (converter)
rv = converter->Convert(*addresses,
len_all, &outStrings, &outStrLen, nsnull);
}
// if non null charset, or couldn't get a converter, use MIME_ function.
if (!converter)
{
rv = MIME_ConvertCharset(PR_FALSE, "UTF-8", CHARSET(charset), *addresses,
len_all, &outStrings, &outStrLen, NULL);
}
if (NS_SUCCEEDED(rv))
{
PR_Free(*addresses);
*addresses = outStrings;
}

Просмотреть файл

@ -27,6 +27,7 @@
#include "msgCore.h"
#include "nsIMsgHeaderParser.h" /* include the interface we are going to support */
#include "comi18n.h"
/*
* RFC-822 parser
@ -116,7 +117,10 @@
names to users. e.g. summary file, address book
*/
NS_IMETHOD UnquotePhraseOrAddr (const char *charset, const char *line, char** lineout);
MimeCharsetConverterClass *GetUSAsciiToUtf8CharsetConverter();
protected:
MimeCharsetConverterClass *m_USAsciiToUtf8CharsetConverter;
private:
};