Description: Currently only functions to enhance plain text with HTML tags. See mozITXTToHTMLConv. Stream conversion is defunct.
#ifndef _mozTXTToHTMLConv_h__
#define _mozTXTToHTMLConv_h__
#include "mozITXTToHTMLConv.h"
#include "nsIIOService.h"
#include "nsString.h"
#include "nsTimer.h"
#include "nsCOMPtr.h"
class mozTXTToHTMLConv : public mozITXTToHTMLConv
virtual ~mozTXTToHTMLConv();
// XXX Is this really needed? This isn't an interface.
see mozITXTToHTMLConv::ScanTXT
void ScanTXT(const PRUnichar * aInString, PRInt32 aInStringLength, PRUint32 whattodo, nsString& aOutString);
see mozITXTToHTMLConv::ScanHTML. We will modify aInString potentially...
void ScanHTML(nsString& aInString, PRUint32 whattodo, nsString &aOutString);
see mozITXTToHTMLConv::CiteLevelTXT
PRInt32 CiteLevelTXT(const PRUnichar * line,PRUint32& logLineStart);
// Timing!
nsCOMPtr<nsIIOService> mIOService; // for performance reasons, cache the netwerk service...
<li>Case 1: mailto: "" -> ""
<li>Case 2: http: "" -> ""
<li>Case 3: ftp: "" -> ""
It does no check, if the resulting URL is valid.
@param text (in): abbreviated URL
@param pos (in): position of "@" (case 1) or first "." (case 2 and 3)
@return Completed URL at success and empty string at failure
void CompleteAbbreviatedURL(const PRUnichar * aInString, PRInt32 aInLength,
const PRUint32 pos, nsString& aOutString);
LT_IGNORE, // limitation not checked
LT_DELIMITER, // not alphanumeric and not rep[0]. End of text is also ok.
LT_ALPHA, // alpha char
@param text (in): the string to search through.<p>
If before = IGNORE,<br>
rep is compared starting at 1. char of text (text[0]),<br>
else starting at 2. char of text (text[1]).
Chars after "after"-delimiter are ignored.
@param rep (in): the string to look for
@param aRepLen (in): the number of bytes in the string to look for
@param before (in): limitation before rep
@param after (in): limitation after rep
@return true, if rep is found and limitation spec is met or rep is empty
PRBool ItMatchesDelimited(const PRUnichar * aInString, PRInt32 aInLength,
const PRUnichar * rep, PRInt32 aRepLen, LIMTYPE before, LIMTYPE after);
@param see ItMatchesDelimited
@return Number of ItMatchesDelimited in text
PRUint32 NumberOfMatches(const PRUnichar * aInString, PRInt32 aInStringLength,
const PRUnichar* rep, PRInt32 aRepLen, LIMTYPE before, LIMTYPE after);
Currently only changes "<", ">" and "&". All others stay as they are.<p>
"Char" in function name to avoid side effects with nsString(ch)
@param ch (in)
@param aStringToAppendto (out) - the string to append the escaped
string to.
void EscapeChar(const PRUnichar ch, nsString& aStringToAppendto);
See EscapeChar. Escapes the string in place.
void EscapeStr(nsString& aInString);
Currently only reverts "<", ">" and "&". All others stay as they are.<p>
@param aInString (in) HTML string
@param aStartPos (in) start index into the buffer
@param aLength (in) length of the buffer
@param aOutString (out) unescaped buffer
void UnescapeStr(const PRUnichar * aInString, PRInt32 aStartPos, PRInt32 aLength, nsString& aOutString);
<em>Note</em>: I use different strategies to pass context between the
functions (full text and pos vs. cutted text and col0, glphyTextLen vs.
replaceBefore/-After). It makes some sense, but is hard to understand
(maintain) :-(.
<p><em>Note:</em> replaceBefore + replaceAfter + 1 (for char at pos) chars
in text should be replaced by outputHTML.</p>
<p><em>Note:</em> This function should be able to process a URL on multiple
lines, but currently, ScanForURLs is called for every line, so it can't.</p>
@param text (in): includes possibly a URL
@param pos (in): position in text, where either ":", "." or "@" are found
@param whathasbeendone (in): What the calling ScanTXT did/has to do with the
(not-linkified) text, i.e. usually the "whattodo" parameter.
(Needed to calculate replaceBefore.) NOT what will be done with
the content of the link.
@param outputHTML (out): URL with HTML-a tag
@param replaceBefore (out): Number of chars of URL before pos
@param replaceAfter (out): Number of chars of URL after pos
@return URL found
PRBool FindURL(const PRUnichar * aInString, PRInt32 aInLength, const PRUint32 pos,
const PRUint32 whathasbeendone,
nsString& outputHTML, PRInt32& replaceBefore, PRInt32& replaceAfter);
enum modetype {
RFC1738, /* Check, if RFC1738, APPENDIX compliant,
like "<URL:>". */
RFC2396E, /* RFC2396, APPENDIX E allows anglebrackets (like
"<>") (without "URL:") or
quotation marks(like """").
Also allow email addresses without scheme,
e.g. "<>" */
freetext, /* assume heading scheme
with "[a-zA-Z][a-zA-Z0-9+\-\.]*:" like "news:"
(see RFC2396, Section 3.1).
Certain characters (see code) or any whitespace
(including linebreaks) end the URL.
Other certain (punctation) characters (see code)
at the end are stripped off. */
abbreviated /* Similar to freetext, but without scheme, e.g.
"", "" and
"". */
/* RFC1738 and RFC2396E type URLs may use multiple lines,
whitespace is stripped. Special characters like ")" stay intact.*/
* @param text (in), pos (in): see FindURL
* @param check (in): Start must be conform with this mode
* @param start (out): Position in text, where URL (including brackets or
* similar) starts
* @return |check|-conform start has been found
PRBool FindURLStart(const PRUnichar * aInString, PRInt32 aInLength, const PRUint32 pos,
const modetype check, PRUint32& start);
* @param text (in), pos (in): see FindURL
* @param check (in): End must be conform with this mode
* @param start (in): see FindURLStart
* @param end (out): Similar to |start| param of FindURLStart
* @return |check|-conform end has been found
PRBool FindURLEnd(const PRUnichar * aInString, PRInt32 aInStringLength, const PRUint32 pos,
const modetype check, const PRUint32 start, PRUint32& end);
* @param text (in), pos (in), whathasbeendone (in): see FindURL
* @param check (in): Current mode
* @param start (in), end (in): see FindURLEnd
* @param txtURL (out): Guessed (raw) URL.
* Without whitespace, but not completed.
* @param desc (out): Link as shown to the user, but already escaped.
* Should be placed between the <a> and </a> tags.
* @param replaceBefore(out), replaceAfter (out): see FindURL
void CalculateURLBoundaries(const PRUnichar * aInString, PRInt32 aInStringLength,
const PRUint32 pos, const PRUint32 whathasbeendone,
const modetype check, const PRUint32 start, const PRUint32 end,
nsString& txtURL, nsString& desc,
PRInt32& replaceBefore, PRInt32& replaceAfter);
* @param txtURL (in), desc (in): see CalculateURLBoundaries
* @param outputHTML (out): see FindURL
* @return A valid URL could be found (and creation of HTML successful)
PRBool CheckURLAndCreateHTML(
const nsString& txtURL, const nsString& desc, const modetype mode,
nsString& outputHTML);
@param text (in): line of text possibly with tagTXT.<p>
if col0 is true,
starting with tagTXT<br>
starting one char before tagTXT
@param col0 (in): tagTXT is on the beginning of the line (or paragraph).
open must be 0 then.
@param tagTXT (in): Tag in plaintext to search for, e.g. "*"
@param aTagTxtLen (in): length of tagTXT.
@param tagHTML (in): HTML-Tag to replace tagTXT with,
without "<" and ">", e.g. "strong"
@param attributeHTML (in): HTML-attribute to add to opening tagHTML,
e.g. "class=txt_star"
@param aOutString: string to APPEND the converted html into
@param open (in/out): Number of currently open tags of type tagHTML
@return Conversion succeeded
PRBool StructPhraseHit(const PRUnichar * aInString, PRInt32 aInStringLength, PRBool col0,
const PRUnichar* tagTXT,
PRInt32 aTagTxtLen,
const char* tagHTML, const char* attributeHTML,
nsString& aOutputString, PRUint32& openTags);
@param text (in), col0 (in): see GlyphHit
@param tagTXT (in): Smily, see also StructPhraseHit
@param aTagTxtLen (in): length of tagTXT
@param tagHTML (in): see StructPhraseHit
@param outputHTML (out): new string containing the html for the smily
@param glyphTextLen (out): see GlyphHit
PRBool SmilyHit(const PRUnichar * aInString, PRInt32 aLength, PRBool col0,
const PRUnichar* tagTXT, PRInt32 aTagTxtLen, const char* tagHTML,
nsString& outputHTML, PRInt32& glyphTextLen);
Checks, if we can replace some chars at the start of line with prettier HTML
If success is reported, replace the first glyphTextLen chars with outputHTML
@param text (in): line of text possibly with Glyph.<p>
If col0 is true,
starting with Glyph <br><!-- (br not part of text) -->
starting one char before Glyph
@param col0 (in): text starts at the beginning of the line (or paragraph)
@param aOutString (out): APPENDS html for the glyph to this string
@param glyphTextLen (out): Length of original text to replace
@return see StructPhraseHit
PRBool GlyphHit(const PRUnichar * aInString, PRInt32 aInLength, PRBool col0,
nsString& aOutString, PRInt32& glyphTextLen);
// It's said, that Win32 and Mac don't like static const members
const PRInt32 mozTXTToHTMLConv_lastMode = 4;
// Needed (only) by mozTXTToHTMLConv::FindURL
const PRInt32 mozTXTToHTMLConv_numberOfModes = 4; // dito; unknown not counted