From 1e4c9e222d453597c99305d2d5a8fde3250bcd7e Mon Sep 17 00:00:00 2001 From: "rhp%netscape.com" Date: Tue, 11 Jan 2000 01:56:25 +0000 Subject: [PATCH] Fixes for emoticon recognition - Bug #21203 - r: rhp (externally contributed) --- .../converters/mozTXTToHTMLConv.cpp | 186 ++++++++++-------- .../streamconv/converters/mozTXTToHTMLConv.h | 151 +++++++------- .../streamconv/public/mozITXTToHTMLConv.idl | 23 +-- 3 files changed, 179 insertions(+), 181 deletions(-) diff --git a/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp b/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp index 627b90f71b5..0e7510399d8 100644 --- a/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp +++ b/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp @@ -1,4 +1,4 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 - +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- * * The "License" shall be the Mozilla Public License Version 1.1, except * Sections 6.2 and 11, but with the addition of the below defined Section 14. @@ -37,6 +37,18 @@ #include "nsIIOService.h" #include "nsIServiceManager.h" +static nsAutoString +Right(const nsAutoString& text, PRUint32 start) +{ + MOZ_TIMER_START(mRightTimer); + + nsAutoString result; + text.Right(result, text.Length() - start); + + MOZ_TIMER_STOP(mRightTimer); + return result; +} + nsAutoString mozTXTToHTMLConv::EscapeChar(const PRUnichar ch) { @@ -186,16 +198,21 @@ mozTXTToHTMLConv::FindURLStart(const nsAutoString& text, const PRUint32 pos, PRInt32 i = pos + 1; for (; i >= 0 && text[PRUint32(i)] != '>' && text[PRUint32(i)] != '<' - && text[PRUint32(i)] != '"' && text[PRUint32(i)] != '\\' - && text[PRUint32(i)] != '`' && text[PRUint32(i)] != '}' - && text[PRUint32(i)] != ']' && text[PRUint32(i)] != ')' - && text[PRUint32(i)] != '|' + && text[PRUint32(i)] != '"' && text[PRUint32(i)] != '\'' + && text[PRUint32(i)] != '`' && text[PRUint32(i)] != ',' + && text[PRUint32(i)] != '{' && text[PRUint32(i)] != '[' + && text[PRUint32(i)] != '(' && text[PRUint32(i)] != '|' + && text[PRUint32(i)] != '\\' && !nsString::IsSpace(text[PRUint32(i)]) ; i--) ; - if (PRUint32(++i) != pos) + if + ( + nsString::IsAlpha(text[PRUint32(++i)]) || + nsString::IsDigit(text[PRUint32(i)]) + ) { - start = i; + start = PRUint32(i); return PR_TRUE; } else @@ -238,9 +255,9 @@ mozTXTToHTMLConv::FindURLEnd(const nsAutoString& text, const PRUint32 pos, for (; PRInt32(i) < text.Length() && text[i] != '>' && text[i] != '<' && text[i] != '"' && text[i] != '\'' - && text[i] != '`' && text[i] != '}' - && text[i] != ']' && text[i] != ')' - && text[i] != '|' + && text[i] != '`' && text[i] != ',' + && text[i] != '}' && text[i] != ']' + && text[i] != ')' && text[i] != '|' && !nsString::IsSpace(text[i]) ; i++) ; @@ -303,7 +320,7 @@ mozTXTToHTMLConv::CalculateURLBoundaries(const nsAutoString& text, nsAutoString temp; text.Mid(temp, descstart, pos - descstart); replaceBefore = ScanTXT(temp, ~kURLs /*prevents loop*/ - & whathasbeendone).Length(); + & whathasbeendone).Length(); return; } @@ -425,18 +442,6 @@ mozTXTToHTMLConv::FindURL(const nsAutoString& text, const PRUint32 pos, return state[check] == success; } -nsAutoString -mozTXTToHTMLConv::Right(const nsAutoString& text, PRUint32 start) -{ - MOZ_TIMER_START(mRightTimer); - - nsAutoString result; - text.Right(result, text.Length() - start); - - MOZ_TIMER_STOP(mRightTimer); - return result; -} - PRBool mozTXTToHTMLConv::ItMatchesDelimited(const nsAutoString& text, const char* rep, LIMTYPE before, LIMTYPE after) @@ -481,9 +486,7 @@ mozTXTToHTMLConv::ItMatchesDelimited(const nsAutoString& text, text[afterPos] == *rep ) || !(before == LT_IGNORE ? text : Right(text, 1)).Equals(rep, - PR_TRUE, repLen) // XXX bug #21071 -/* !Equals((before == LT_IGNORE ? text : Right(text, 1)), rep, - PR_TRUE, rep.Length())*/ + PR_TRUE, repLen) ) return PR_FALSE; @@ -606,75 +609,80 @@ mozTXTToHTMLConv::GlyphHit(const nsAutoString& text, PRBool col0, if ( - ((col0 ? text.First() : text[1]) == ':' || // Performance increase - (col0 ? text.First() : text[1]) == ';' ) + ( // Performance increase + (col0 ? text.First() : text[1]) == ':' || + (col0 ? text.First() : text[1]) == ';' + ) && ( - SmilyHit(text, col0, ":-)", "", outputHTML, glyphTextLen) || - SmilyHit(text, col0, ":)", "", outputHTML, glyphTextLen) || - SmilyHit(text, col0, ":-(", "", outputHTML, glyphTextLen) || - SmilyHit(text, col0, ":(", "", outputHTML, glyphTextLen) || - SmilyHit(text, col0, ";-)", "", outputHTML, glyphTextLen) || - SmilyHit(text, col0, ";-P", "", outputHTML, glyphTextLen) + SmilyHit(text, col0, ":-)", "\":-)\"", outputHTML, glyphTextLen) || + SmilyHit(text, col0, ":)", "\":)\"", outputHTML, glyphTextLen) || + SmilyHit(text, col0, ":-(", "\":-(\"", outputHTML, glyphTextLen) || + SmilyHit(text, col0, ":(", "\":(\"", outputHTML, glyphTextLen) || + SmilyHit(text, col0, ";-)", "\";-)\"", outputHTML, glyphTextLen) || + SmilyHit(text, col0, ";-P", "\";-P\"", outputHTML, glyphTextLen) ) ) { MOZ_TIMER_STOP(mGlyphHitTimer); return PR_TRUE; } - else if // XXX Hotfix + if // XXX Hotfix + ( + col0 // Performance increase + && ( - !col0 // Performance increase - && - ( - text[1] == ':' || - text[1] == ';' - ) - && - ( - SmilyHit(text, PR_FALSE, ":-)", "", outputHTML, glyphTextLen) || - SmilyHit(text, PR_FALSE, ":)", "", outputHTML, glyphTextLen) || - SmilyHit(text, PR_FALSE, ":-(", "", outputHTML, glyphTextLen) || - SmilyHit(text, PR_FALSE, ":(", "", outputHTML, glyphTextLen) || - SmilyHit(text, PR_FALSE, ";-)", "", outputHTML, glyphTextLen) || - SmilyHit(text, PR_FALSE, ";-P", "", outputHTML, glyphTextLen) - ) + text[1] == ':' || + text[1] == ';' + ) + && + ( + SmilyHit(text, PR_FALSE, ":-)", "\":-)\"", outputHTML, glyphTextLen) || + SmilyHit(text, PR_FALSE, ":)", "\":)\"", outputHTML, glyphTextLen) || + SmilyHit(text, PR_FALSE, ":-(", "\":-(\"", outputHTML, glyphTextLen) || + SmilyHit(text, PR_FALSE, ":(", "\":(\"", outputHTML, glyphTextLen) || + SmilyHit(text, PR_FALSE, ";-)", "\";-P\"", outputHTML, glyphTextLen) || + SmilyHit(text, PR_FALSE, ";-P", "\";-P\"", outputHTML, glyphTextLen) + ) ) { MOZ_TIMER_STOP(mGlyphHitTimer); return PR_TRUE; } - else if (ItMatchesDelimited(text, "(c)", LT_IGNORE, LT_DELIMITER)) - // Note: ItMatchesDelimited compares case-insensitive + if (text.First() == '(') { - outputHTML = "©"; - glyphTextLen = 3; - MOZ_TIMER_STOP(mGlyphHitTimer); - return PR_TRUE; + if (ItMatchesDelimited(text, "(c)", LT_IGNORE, LT_DELIMITER)) + // Note: ItMatchesDelimited compares case-insensitive + { + outputHTML = "©"; + glyphTextLen = 3; + MOZ_TIMER_STOP(mGlyphHitTimer); + return PR_TRUE; + } + if (ItMatchesDelimited(text, "(r)", LT_IGNORE, LT_DELIMITER)) + // see above + { + outputHTML = "®"; + glyphTextLen = 3; + MOZ_TIMER_STOP(mGlyphHitTimer); + return PR_TRUE; + } } - else if (ItMatchesDelimited(text, "(r)", LT_IGNORE, LT_DELIMITER)) - // see above - { - outputHTML = "®"; - glyphTextLen = 3; - MOZ_TIMER_STOP(mGlyphHitTimer); - return PR_TRUE; - } - else if (ItMatchesDelimited(text, " +/-", LT_IGNORE, LT_IGNORE)) + if (ItMatchesDelimited(text, " +/-", LT_IGNORE, LT_IGNORE)) { outputHTML = " ±"; glyphTextLen = 4; MOZ_TIMER_STOP(mGlyphHitTimer); return PR_TRUE; } - else if (col0 && ItMatchesDelimited(text, "+/-", LT_IGNORE, LT_IGNORE)) + if (col0 && ItMatchesDelimited(text, "+/-", LT_IGNORE, LT_IGNORE)) { outputHTML = "±"; glyphTextLen = 3; MOZ_TIMER_STOP(mGlyphHitTimer); return PR_TRUE; } - else if // x^2 -> sup + if // x^2 -> sup ( text[1] == '^' // Performance increase && @@ -849,24 +857,36 @@ printf(text.ToNewCString()); switch (text[i]) // Performance increase { case '*': - case '_': - case '/': - case '|': - if - ( - StructPhraseHit(i == 0 ? text : Right(text, i - 1), i == 0, + if (StructPhraseHit(i == 0 ? text : Right(text, i - 1), i == 0, "*", "strong", "class=txt_star", - HTMLnsStr, structPhrase_strong) || - StructPhraseHit(i == 0 ? text : Right(text, i - 1), i == 0, + HTMLnsStr, structPhrase_strong)) + { + result += HTMLnsStr; + i++; + continue; + } + case '_': + if (StructPhraseHit(i == 0 ? text : Right(text, i - 1), i == 0, "_", "em" /* is deprecated */, "class=txt_underscore", - HTMLnsStr, structPhrase_underline) || - StructPhraseHit(i == 0 ? text : Right(text, i - 1), i == 0, + HTMLnsStr, structPhrase_underline)) + { + result += HTMLnsStr; + i++; + continue; + } + case '/': + if (StructPhraseHit(i == 0 ? text : Right(text, i - 1), i == 0, "/", "em", "class=txt_slash", - HTMLnsStr, structPhrase_italic) || - StructPhraseHit(i == 0 ? text : Right(text, i - 1), i == 0, + HTMLnsStr, structPhrase_italic)) + { + result += HTMLnsStr; + i++; + continue; + } + case '|': + if (StructPhraseHit(i == 0 ? text : Right(text, i - 1), i == 0, "|", "code", "class=txt_verticalline", - HTMLnsStr, structPhrase_code) - ) + HTMLnsStr, structPhrase_code)) { result += HTMLnsStr; i++; @@ -1047,7 +1067,7 @@ mozTXTToHTMLConv::ScanTXT(const PRUnichar *text, PRUint32 whattodo, if (!_retval || !text) return NS_ERROR_NULL_POINTER; *_retval = ScanTXT(text, whattodo).ToNewUnicode(); - return _retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY; + return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY; } NS_IMETHODIMP @@ -1057,7 +1077,7 @@ mozTXTToHTMLConv::ScanHTML(const PRUnichar *text, PRUint32 whattodo, if (!_retval || !text) return NS_ERROR_NULL_POINTER; *_retval = ScanHTML(text, whattodo).ToNewUnicode(); - return _retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY; + return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY; } diff --git a/netwerk/streamconv/converters/mozTXTToHTMLConv.h b/netwerk/streamconv/converters/mozTXTToHTMLConv.h index e7f98581625..5c2896c9562 100644 --- a/netwerk/streamconv/converters/mozTXTToHTMLConv.h +++ b/netwerk/streamconv/converters/mozTXTToHTMLConv.h @@ -49,7 +49,10 @@ static NS_DEFINE_CID(kTXTToHTMLConvCID, MOZITXTTOHTMLCONV_CID); class mozTXTToHTMLConv : public mozITXTToHTMLConv { + +////////////////////////////////////////////////////////// public: +////////////////////////////////////////////////////////// mozTXTToHTMLConv(); virtual ~mozTXTToHTMLConv(); @@ -84,16 +87,29 @@ public: MOZ_TIMER_DECLARE(mRightTimer) MOZ_TIMER_DECLARE(mTotalMimeTime) -/////////////////////////////////////////////////////////////////// + +////////////////////////////////////////////////////////// protected: -/////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////// /** - @param text (in): the source string - @param start (in): offset of text specifying the start of the new object - @return a new (local) object containing the substring -*/ - nsAutoString Right(const nsAutoString& text, PRUint32 start); + Completes + It does no check, if the resulting URL is valid. + @param text (in): abbreviated URL + @param pos (in): position of "@" (case 1) or first "." (case 2 and 3) + @return Completed URL at success and empty string at failure + */ + nsAutoString CompleteAbbreviatedURL(const nsAutoString& text, + const PRUint32 pos); + + +////////////////////////////////////////////////////////// +private: +////////////////////////////////////////////////////////// enum LIMTYPE { @@ -145,20 +161,6 @@ protected: */ nsAutoString UnescapeStr(const nsAutoString& aString); -/** - Completes - It does no check, if the resulting URL is valid. - @param text (in): abbreviated URL - @param pos (in): position of "@" (case 1) or first "." (case 2 and 3) - @return Completed URL at success and empty string at failure - */ - nsAutoString CompleteAbbreviatedURL(const nsAutoString& text, - const PRUint32 pos); - /** Note: I use different strategies to pass context between the functions (full text and pos vs. cutted text and col0, glphyTextLen vs. @@ -185,60 +187,6 @@ protected: const PRUint32 whathasbeendone, nsAutoString& outputHTML, PRInt32& replaceBefore, PRInt32& replaceAfter); -/** - @param text (in): line of text possibly with tagTXT.

- if col0 is true, - starting with tagTXT
- else - starting one char before tagTXT - @param col0 (in): tagTXT is on the beginning of the line (or paragraph). - open must be 0 then. - @param tagTXT (in): Tag in plaintext to search for, e.g. "*" - @param tagHTML (in): HTML-Tag to replace tagTXT with, - without "<" and ">", e.g. "strong" - @param attributeHTML (in): HTML-attribute to add to opening tagHTML, - e.g. "class=txt_star" - @param outputHTML (out): string to insert in output stream - @param open (in/out): Number of currently open tags of type tagHTML - @return Conversion succeeded -*/ - PRBool StructPhraseHit(const nsAutoString& text, PRBool col0, - const char* tagTXT, - const char* tagHTML, const char* attributeHTML, - nsAutoString& outputHTML, PRUint32& openTags); - -/** - @param text (in), col0 (in): see GlyphHit - @param tagTXT (in): Smily, see also StructPhraseHit - @param tagHTML (in): see StructPhraseHit - @param outputHTML (out), glyphTextLen (out): see GlyphHit -*/ - PRBool SmilyHit(const nsAutoString& text, PRBool col0, - const char* tagTXT, const char* tagHTML, - nsAutoString& outputHTML, PRInt32& glyphTextLen); - -/** - Checks, if we can replace some chars at the start of line with prettier HTML - code.

- If success is reported, replace the first glyphTextLen chars with outputHTML - - @param text (in): line of text possibly with Glyph.

- If col0 is true, - starting with Glyph
- else - starting one char before Glyph - @param col0 (in): text starts at the beginning of the line (or paragraph) - @param outputHTML (out): see StructPhraseHit - @param glyphTextLen (out): Length of original text to replace - @return see StructPhraseHit -*/ - PRBool GlyphHit(const nsAutoString& text, PRBool col0, - nsAutoString& outputHTML, PRInt32& glyphTextLen); - -////////////////////////////////////////////////////////// -private: -////////////////////////////////////////////////////////// - enum modetype { unknown, RFC1738, /* Check, if RFC1738, APPENDIX compliant, @@ -249,7 +197,7 @@ private: Also allow email addresses without scheme, e.g. "" */ freetext, /* assume heading scheme - with "[a-zA-Z][a-zA-Z0-9+\-.]*:" like "news:" + with "[a-zA-Z][a-zA-Z0-9+\-\.]*:" like "news:" (see RFC2396, Section 3.1). Certain characters (see code) or any whitespace (including linebreaks) end the URL. @@ -306,6 +254,57 @@ private: PRBool CheckURLAndCreateHTML( const nsAutoString& txtURL, const nsAutoString& desc, nsAutoString& outputHTML); + +/** + @param text (in): line of text possibly with tagTXT.

+ if col0 is true, + starting with tagTXT
+ else + starting one char before tagTXT + @param col0 (in): tagTXT is on the beginning of the line (or paragraph). + open must be 0 then. + @param tagTXT (in): Tag in plaintext to search for, e.g. "*" + @param tagHTML (in): HTML-Tag to replace tagTXT with, + without "<" and ">", e.g. "strong" + @param attributeHTML (in): HTML-attribute to add to opening tagHTML, + e.g. "class=txt_star" + @param outputHTML (out): string to insert in output stream + @param open (in/out): Number of currently open tags of type tagHTML + @return Conversion succeeded +*/ + PRBool StructPhraseHit(const nsAutoString& text, PRBool col0, + const char* tagTXT, + const char* tagHTML, const char* attributeHTML, + nsAutoString& outputHTML, PRUint32& openTags); + +/** + @param text (in), col0 (in): see GlyphHit + @param tagTXT (in): Smily, see also StructPhraseHit + @param tagHTML (in): see StructPhraseHit + @param outputHTML (out), glyphTextLen (out): see GlyphHit +*/ + PRBool SmilyHit(const nsAutoString& text, PRBool col0, + const char* tagTXT, const char* tagHTML, + nsAutoString& outputHTML, PRInt32& glyphTextLen); + +/** + Checks, if we can replace some chars at the start of line with prettier HTML + code.

+ If success is reported, replace the first glyphTextLen chars with outputHTML + + @param text (in): line of text possibly with Glyph.

+ If col0 is true, + starting with Glyph
+ else + starting one char before Glyph + @param col0 (in): text starts at the beginning of the line (or paragraph) + @param outputHTML (out): see StructPhraseHit + @param glyphTextLen (out): Length of original text to replace + @return see StructPhraseHit +*/ + PRBool GlyphHit(const nsAutoString& text, PRBool col0, + nsAutoString& outputHTML, PRInt32& glyphTextLen); + }; // It's said, that Win32 and Mac don't like static const members diff --git a/netwerk/streamconv/public/mozITXTToHTMLConv.idl b/netwerk/streamconv/public/mozITXTToHTMLConv.idl index 90c5f5a1644..1fc1485aa87 100644 --- a/netwerk/streamconv/public/mozITXTToHTMLConv.idl +++ b/netwerk/streamconv/public/mozITXTToHTMLConv.idl @@ -38,27 +38,6 @@

Wrapper class for various parsing routines, that convert plain text to HTML. They try to recognize cites, URLs, plain text formattting like *bold* etc. -

- Use the nsString versions of these functions in the mozTXTToHTMLConv - implementation when calling from C++. -

- There're a lot of protected virtual functions in the mozTXTToHTMLConv - implementation, that might be helpful, too. Add wrappers for them to this or - any other interface, if you need them. It's easier to add a function than to - remove it, so I don't add them now. Here's the list: - - nsAutoString EscapeChar(const PRUnichar ch) - nsAutoString EscapeStr(const nsAutoString& aString) - nsAutoString UnescapeStr(const nsAutoString& aString) - nsAutoString CompleteAbbreviatedURL(const nsAutoString& text, PRUint32 pos) - PRBool FindURL(const nsAutoString& text, PRUint32 pos, - PRUint32 whathasbeendone, nsAutoString& outputHTML, - PRInt32& replaceBefore, PRInt32& replaceAfter) - nsAutoString Right(const nsAutoString& text, PRUint32 start) - PRBool ItMatchesDelimited(const nsAutoString& text, const nsAutoString& rep, - LIMTYPE before, LIMTYPE after) - PRUint32 NumberOfMatches(const nsAutoString& text, const nsAutoString& rep, - LIMTYPE before, LIMTYPE after) */ #include "nsIStreamConverter.idl" @@ -89,7 +68,7 @@ interface mozITXTToHTMLConv : nsIStreamConverter { /* Adds additional formatting to user edited text, that the user was too lazy - "unknowledged" (DELETEME: is that a word?) to make. + or "unknowledged" (DELETEME: is that a word?) to make.

Note: Don't use kGlyphSubstitution with this function. This option generates tags, that are unuseable for UAs other than Mozilla. This would