some performance improvements for header parsing

1999-09-21 00:06:25 +00:00 · 1999-09-21 00:06:25 +00:00 · 9adfdc2ef9
--- a/mailnews/mime/src/comi18n.cpp
+++ b/mailnews/mime/src/comi18n.cpp
@ -568,13 +568,13 @@ static PRBool intlmime_only_ascii_str(const char *s)

 static unsigned char * utf8_nextchar(unsigned char *str)
 {
-  int len = PL_strlen((char *) str);
  if (*str < 128) {
    return (str+1);
  }
+  int len = PL_strlen((char *) str);
  // RFC 2279 defines more than 3 bytes sequences (0xF0, 0xF8, 0xFC),
  // but I think we won't encounter those cases as long as we're supporting UCS-2 and no surrogate.
-  else if ((len >= 3) && (*str >= 0xE0)) {
+  if ((len >= 3) && (*str >= 0xE0)) {
    return (str+3);
  }
  else if ((len >= 2) && (*str >= 0xC0)) {
@ -1268,41 +1268,6 @@ static PRInt32 INTL_ConvertFromUnicode(const char* to_charset, const void* uniBu
 }
 ////////////////////////////////////////////////////////////////////////////////

-class MimeCharsetConverterClass {
-public:
-  MimeCharsetConverterClass();
-  virtual ~MimeCharsetConverterClass();
-
-  // Initialize converters for charsets, fails if converter not available.
-  // 
-  PRInt32 Initialize(const char* from_charset, const char* to_charset, 
-                     const PRBool autoDetect=PR_FALSE, const PRInt32 maxNumCharsDetect=-1);
-
-  // Converts input buffer or duplicates input if converters not available (and returns 0).
-  // Also duplicates input if convertion not needed.
-  // C string is generated for converted string.
-  PRInt32 Convert(const char* inBuffer, const PRInt32 inLength, 
-                  char** outBuffer, PRInt32* outLength,
-                  PRInt32* numUnConverted);
-
-protected:
-  nsIUnicodeDecoder * GetUnicodeDecoder() {return (mAutoDetect && NULL != mDecoderDetected) ? mDecoderDetected : mDecoder;}
-  nsIUnicodeEncoder * GetUnicodeEncoder() {return mEncoder;}
-  PRBool NeedCharsetConversion(const nsString& from_charset, const nsString& to_charset);
-
-private:
-  nsIUnicodeDecoder *mDecoder;          // decoder (convert to unicode)  
-  nsIUnicodeEncoder *mEncoder;          // encoder (convert from unicode)
-  nsIUnicodeDecoder *mDecoderDetected;  // decoder of detected charset (after when auto detection succeeded)
-  PRInt32 mMaxNumCharsDetect;           // maximum number of characters in bytes to abort auto detection 
-                                        // (-1 for no limit)
-  PRInt32 mNumChars;                    // accumulated number of characters converted in bytes
-  PRBool mAutoDetect;                   // true if apply auto detection
-  nsString mInputCharset;               // input charset for auto detection hint as well as need conversion check
-  nsString mOutputCharset;              // output charset for need conversion check
-  nsIStringCharsetDetector *mDetector;  // charset detector
-};
-
 MimeCharsetConverterClass::MimeCharsetConverterClass()
 {
  mDecoder = NULL;
@ -1583,16 +1548,17 @@ PRInt32 MIME_ConvertCharset(const PRBool autoDetection, const char* from_charset
                            const char* inBuffer, const PRInt32 inLength, char** outBuffer, PRInt32* outLength,
                            PRInt32* numUnConverted)
 {
-  char srcCharset[kMAX_CSNAME+1], dstCharset[kMAX_CSNAME+1];
+//  char srcCharset[kMAX_CSNAME+1], dstCharset[kMAX_CSNAME+1];
  MimeCharsetConverterClass aMimeCharsetConverterClass;
  PRInt32 res;

-  srcCharset[0] = '\0';
-  dstCharset[0] = '\0';
-  PL_strcpy(srcCharset, PL_strcasecmp(from_charset, "us-ascii") ? (char *) from_charset : "iso-8859-1");
-  PL_strcpy(dstCharset, PL_strcasecmp(to_charset, "us-ascii") ? (char *) to_charset : "iso-8859-1");
+  // commenting out per Naoki's instructions.
+//  srcCharset[0] = '\0';
+//  dstCharset[0] = '\0';
+//  PL_strcpy(srcCharset, PL_strcasecmp(from_charset, "us-ascii") ? (char *) from_charset : "iso-8859-1");
+//  PL_strcpy(dstCharset, PL_strcasecmp(from_charset, "us-ascii") ? (char *) to_charset : "iso-8859-1");

-  res = aMimeCharsetConverterClass.Initialize(srcCharset, dstCharset, autoDetection, -1);
+  res = aMimeCharsetConverterClass.Initialize(from_charset, from_charset, autoDetection, -1);

  if (res != -1) {
    res = aMimeCharsetConverterClass.Convert(inBuffer, inLength, outBuffer, outLength, NULL);
--- a/mailnews/mime/src/comi18n.h
+++ b/mailnews/mime/src/comi18n.h
@ -15,7 +15,10 @@
 * Copyright (C) 1998 Netscape Communications Corporation.  All Rights
 * Reserved.
 */
+#ifndef _COMI18N_LOADED_H_
+#define _COMI18N_LOADED_H_

+#include "msgCore.h"

 #ifndef kMIME_ENCODED_WORD_SIZE
 #define kMIME_ENCODED_WORD_SIZE 75
@ -25,7 +28,47 @@
 #define kMAX_CSNAME 64
 #endif

+class nsIUnicodeDecoder;
+class nsIUnicodeEncoder;
+class nsIStringCharsetDetector;
+
+class MimeCharsetConverterClass {
+public:
+  MimeCharsetConverterClass();
+  virtual ~MimeCharsetConverterClass();
+
+  // Initialize converters for charsets, fails if converter not available.
+  // 
+  PRInt32 Initialize(const char* from_charset, const char* to_charset, 
+                     const PRBool autoDetect=PR_FALSE, const PRInt32 maxNumCharsDetect=-1);
+
+  // Converts input buffer or duplicates input if converters not available (and returns 0).
+  // Also duplicates input if convertion not needed.
+  // C string is generated for converted string.
+  PRInt32 Convert(const char* inBuffer, const PRInt32 inLength, 
+                  char** outBuffer, PRInt32* outLength,
+                  PRInt32* numUnConverted);
+
+protected:
+  nsIUnicodeDecoder * GetUnicodeDecoder() {return (mAutoDetect && NULL != mDecoderDetected) ? mDecoderDetected : mDecoder;}
+  nsIUnicodeEncoder * GetUnicodeEncoder() {return mEncoder;}
+  PRBool NeedCharsetConversion(const nsString& from_charset, const nsString& to_charset);
+
+private:
+  nsIUnicodeDecoder *mDecoder;          // decoder (convert to unicode)  
+  nsIUnicodeEncoder *mEncoder;          // encoder (convert from unicode)
+  nsIUnicodeDecoder *mDecoderDetected;  // decoder of detected charset (after when auto detection succeeded)
+  PRInt32 mMaxNumCharsDetect;           // maximum number of characters in bytes to abort auto detection 
+                                        // (-1 for no limit)
+  PRInt32 mNumChars;                    // accumulated number of characters converted in bytes
+  PRBool mAutoDetect;                   // true if apply auto detection
+  nsString mInputCharset;               // input charset for auto detection hint as well as need conversion check
+  nsString mOutputCharset;              // output charset for need conversion check
+  nsIStringCharsetDetector *mDetector;  // charset detector
+};
+
  
+
 #ifdef __cplusplus
 extern "C" {
 #endif /* __cplusplus */
@ -145,3 +188,6 @@ char *INTL_EncodeMimePartIIStr_VarLen(char *subject, PRInt16 wincsid, PRBool bUs
 #ifdef __cplusplus
 } /* extern "C" */
 #endif /* __cplusplus */
+
+#endif // _COMI18N_LOADED_H_
+
--- a/mailnews/mime/src/nsMsgHeaderParser.cpp
+++ b/mailnews/mime/src/nsMsgHeaderParser.cpp
@ -36,6 +36,7 @@
 #define COPY_CHAR(_D,_S)            do { if (!_S || !*_S) { *_D++ = 0; }\
                                         else { int _LEN = NextChar_UTF8((char *)_S) - _S;\
                                                nsCRT::memcpy(_D,_S,_LEN); _D += _LEN; } } while (0)
+//#define NEXT_CHAR(_STR)             (_STR = (* (char *) _STR < 128) ? (char *) _STR + 1 : NextChar_UTF8((char *)_STR))
 #define NEXT_CHAR(_STR)             (_STR = NextChar_UTF8((char *)_STR))
 #define TRIM_WHITESPACE(_S,_E,_T)   do { while (_E > _S && IS_SPACE(_E[-1])) _E--;\
                                         *_E++ = _T; } while (0)
@ -71,19 +72,38 @@ nsMsgHeaderParser::nsMsgHeaderParser()
 {
  /* the following macro is used to initialize the ref counting data */
  NS_INIT_REFCNT();
+  m_USAsciiToUtf8CharsetConverter = nsnull;
+
 }

 nsMsgHeaderParser::~nsMsgHeaderParser()
-{}
+{
+	delete m_USAsciiToUtf8CharsetConverter;
+}

 /* the following macros actually implement addref, release and query interface for our component. */
 NS_IMPL_ADDREF(nsMsgHeaderParser)
 NS_IMPL_RELEASE(nsMsgHeaderParser)
 NS_IMPL_QUERY_INTERFACE(nsMsgHeaderParser, nsIMsgHeaderParser::GetIID()); /* we need to pass in the interface ID of this interface */

+MimeCharsetConverterClass *nsMsgHeaderParser::GetUSAsciiToUtf8CharsetConverter()
+{
+	if (!m_USAsciiToUtf8CharsetConverter)
+	{
+		m_USAsciiToUtf8CharsetConverter = new MimeCharsetConverterClass;
+		if (m_USAsciiToUtf8CharsetConverter)
+		{
+			nsresult rv = m_USAsciiToUtf8CharsetConverter->Initialize("us-ascii","utf-8", PR_FALSE);
+		}
+	}
+	return m_USAsciiToUtf8CharsetConverter;
+}
+
 nsresult nsMsgHeaderParser::ParseHeaderAddresses (const char *charset, const char *line, char **names, char **addresses, PRUint32 *numAddresses)
 {
  char *utf8Str, *outStrings;
+  MimeCharsetConverterClass *converter = nsnull;
+  nsresult rv;

  if (nsnull == line || MIME_ConvertString(CHARSET(charset), "UTF-8", line, &utf8Str) != 0) {
    utf8Str = nsnull;
@ -102,8 +122,19 @@ nsresult nsMsgHeaderParser::ParseHeaderAddresses (const char *charset, const cha
      s += len;
    }
    // convert array of strings
-    if (MIME_ConvertCharset(PR_FALSE, "UTF-8", CHARSET(charset), *names, 
-                            len_all, &outStrings, &outStrLen, NULL) == 0) {
+	if (!charset)
+	{
+		converter = GetUSAsciiToUtf8CharsetConverter();
+		if (converter)
+			rv = converter->Convert(*names, len_all, &outStrings, &outStrLen, nsnull);
+	}
+	if (!converter)
+	{
+		rv = MIME_ConvertCharset(PR_FALSE, "UTF-8", CHARSET(charset), *names, 
+                            len_all, &outStrings, &outStrLen, NULL) ; 
+	}
+	if (NS_SUCCEEDED(rv))
+	{
      PR_Free(*names);
      *names = outStrings;
    }
@ -117,8 +148,21 @@ nsresult nsMsgHeaderParser::ParseHeaderAddresses (const char *charset, const cha
      s += len;
    }
    // convert array of strings
-    if (MIME_ConvertCharset(PR_FALSE, "UTF-8", CHARSET(charset), *addresses, 
-                            len_all, &outStrings, &outStrLen, NULL) == 0) {
+	if (!charset)
+	{
+		converter = GetUSAsciiToUtf8CharsetConverter();
+		if (converter)
+			rv = converter->Convert(*addresses, 
+                            len_all, &outStrings, &outStrLen, nsnull);
+	}
+	// if non null charset, or couldn't get a converter, use MIME_ function.
+	if (!converter)
+	{
+		rv = MIME_ConvertCharset(PR_FALSE, "UTF-8", CHARSET(charset), *addresses, 
+                            len_all, &outStrings, &outStrLen, NULL);
+	}
+	if (NS_SUCCEEDED(rv))
+	{
      PR_Free(*addresses);
      *addresses = outStrings;
    }
--- a/mailnews/mime/src/nsMsgHeaderParser.h
+++ b/mailnews/mime/src/nsMsgHeaderParser.h
@ -27,6 +27,7 @@

 #include "msgCore.h"
 #include "nsIMsgHeaderParser.h" /* include the interface we are going to support */
+#include "comi18n.h"

 /* 
  * RFC-822 parser
@ -116,7 +117,10 @@
 	   names to users. e.g. summary file, address book
 	 */
 	NS_IMETHOD UnquotePhraseOrAddr (const char *charset, const char *line, char** lineout);
-
+	
+	MimeCharsetConverterClass *GetUSAsciiToUtf8CharsetConverter();
+	protected:
+		MimeCharsetConverterClass *m_USAsciiToUtf8CharsetConverter;
 	private:
 };