pjs/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp

/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
 *
 * The "License" shall be the Mozilla Public License Version 1.1, except
 * Sections 6.2 and 11, but with the addition of the below defined Section 14.
 * You may obtain a copy of the Mozilla Public License Version 1.1 at
 * <http://www.mozilla.org/MPL/>. The contents of this file are subject to the
 * License; you may not use this file except in compliance with the License.
 *
 * Section 14: MISCELLANEOUS.
 * This License represents the complete agreement concerning subject matter
 * hereof. If any provision of this License is held to be unenforceable, such
 * provision shall be reformed only to the extent necessary to make it
 * enforceable. This License shall be governed by German law provisions. Any
 * litigation relating to this License shall be subject to German jurisdiction.
 *
 * Once Covered Code has been published under a particular version of the
 * License, You may always continue to use it under the terms of that version.
 + The Initial Developer and no one else has the right to modify the terms
 * applicable to Covered Code created under this License.
 * (End of Section 14)
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
 * License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is the Text to HTML converter code.
 *
 * The Initial Developer of the Original Code is Ben Bucksch
 * <http://www.bucksch.org>. Portions created by Ben Bucksch are Copyright
 * (C) 1999 Ben Bucksch. All Rights Reserved.
 *
 * Contributor(s):
 */

#include "mozTXTToHTMLConv.h"
#include "nsIIOService.h"
#include "nsIServiceManager.h"

static nsAutoString
Right(const nsAutoString& text, PRUint32 start)
{
  MOZ_TIMER_START(mRightTimer);

  nsAutoString result;
  text.Right(result, text.Length() - start);

  MOZ_TIMER_STOP(mRightTimer);
  return result;
}

nsAutoString
mozTXTToHTMLConv::EscapeChar(const PRUnichar ch)
{
    switch (ch)
    {
    case '<':
      return "&lt;";
    case '>':
      return "&gt;";
    case '&':
      return "&amp;";
    default:
      return ch;
    }
}

nsAutoString
mozTXTToHTMLConv::EscapeStr(const nsAutoString& aString)
{
  nsAutoString result;
  for (PRUint32 i = 0; PRInt32(i) < aString.Length(); i++)
    result += EscapeChar(aString[i]);
  return result;
}

nsAutoString
mozTXTToHTMLConv::UnescapeStr(const nsAutoString& aString)
{
  nsAutoString result;
  for (PRUint32 i = 0; PRInt32(i) < aString.Length();)
  {
    if (aString[i] == '&')
    {
      nsAutoString temp;
      if (aString.Mid(temp, i, 4), temp == "&lt;")
      {
        result += '<';
        i += 4;
      }
      else if (aString.Mid(temp, i, 4), temp == "&gt;")
      {
        result += '>';
        i += 4;
      }
      else if (aString.Mid(temp, i, 5), temp == "&amp;")
      {
        result += '&';
        i += 5;
      }
      else
      {
        result += aString[i];
        i++;
      }
    }
    else
    {
      result += aString[i];
      i++;
    }
  }
  return result;
}

nsAutoString
mozTXTToHTMLConv::CompleteAbbreviatedURL(const nsAutoString& text,
                                         const PRUint32 pos)
{
  nsAutoString result;
  if (text[pos] == '@')
  {
    result = "mailto:";
    result += text;
  }
  else if (text[pos] == '.')
  {
    if (ItMatchesDelimited(text, "www.", LT_IGNORE, LT_IGNORE))
    {
      result = "http://";
      result += text;
    }
    else if (ItMatchesDelimited(text, "ftp.", LT_IGNORE, LT_IGNORE))
    {
      result = "ftp://";
      result += text;
    }
  }
  return result;
}

PRBool
mozTXTToHTMLConv::FindURLStart(const nsAutoString& text, const PRUint32 pos,
            	               const modetype check, PRUint32& start)
{
  switch(check)
  { // no breaks, because end of blocks is never reached
  case RFC1738:
  {
    nsAutoString temp;
    text.Mid(temp, MaxInt(pos - 4, 0), 5);
    if (temp == "<URL:")
    {
      start = pos + 1;
      return PR_TRUE;
    }
    else
      return PR_FALSE;
  }
  case RFC2396E:
  {
    PRInt32 i = pos - 1;
    for (; i >= 0
             && text[PRUint32(i)] != '<'
             && text[PRUint32(i)] != '"'
             && text[PRUint32(i)] != '>'
         ; i--)
      ;
    if (i >= 0 && (text[PRUint32(i)] == '<' || text[PRUint32(i)] == '"'))
    {
      start = PRUint32(++i);
      return PR_TRUE;
    }
    else
      return PR_FALSE;
  }
  case freetext:
  {
    PRInt32 i = pos - 1;
    for (; i >= 0 && (
         nsString::IsAlpha(text[PRUint32(i)]) ||
         nsString::IsDigit(text[PRUint32(i)]) ||
         text[PRUint32(i)] == '+' ||
         text[PRUint32(i)] == '-' ||
         text[PRUint32(i)] == '.'
         ); i--)
      ;
    if (nsString::IsAlpha(text[PRUint32(++i)]))
    {
      start = PRUint32(i);
      return PR_TRUE;
    }
    else
      return PR_FALSE;
  }
  case abbreviated:
  {
    PRInt32 i = pos + 1;
    for (; i >= 0
             && text[PRUint32(i)] != '>' && text[PRUint32(i)] != '<'
             && text[PRUint32(i)] != '"' && text[PRUint32(i)] != '\''
             && text[PRUint32(i)] != '`' && text[PRUint32(i)] != ','
             && text[PRUint32(i)] != '{' && text[PRUint32(i)] != '['
             && text[PRUint32(i)] != '(' && text[PRUint32(i)] != '|'
             && text[PRUint32(i)] != '\\'
             && !nsString::IsSpace(text[PRUint32(i)])
         ; i--)
      ;
    if
      (
        nsString::IsAlpha(text[PRUint32(++i)]) ||
        nsString::IsDigit(text[PRUint32(i)])
      )
    {
      start = PRUint32(i);
      return PR_TRUE;
    }
    else
      return PR_FALSE;
  }
  default:
    return PR_FALSE;
  } //switch
}

PRBool
mozTXTToHTMLConv::FindURLEnd(const nsAutoString& text, const PRUint32 pos,
           const modetype check, const PRUint32 start, PRUint32& end)
{
  switch(check)
  { // no breaks, because end of blocks is never reached
  case RFC1738:
  case RFC2396E:
  {
    PRUint32 i = pos + 1;
    for (; PRInt32(i) < text.Length()
             && text[i] != '>'
             && text[i] != '"'
             && text[i] != '<'
           ; i++)
      ;
    if (text[i] == (check == RFC1738 || text[start - 1] == '<' ? '>' : '"')
        && --i != pos)
    {
      end = i;
      return PR_TRUE;
    }
    else
      return PR_FALSE;
  }
  case freetext:
  case abbreviated:
  {
    PRUint32 i = pos + 1;
    for (; PRInt32(i) < text.Length()
             && text[i] != '>' && text[i] != '<'
             && text[i] != '"' && text[i] != '\''
             && text[i] != '`' && text[i] != ','
             && text[i] != '}' && text[i] != ']'
             && text[i] != ')' && text[i] != '|'
             && !nsString::IsSpace(text[i])
         ; i++)
      ;
    while (--i > pos && (
             text[i] == '.' || text[i] == ',' || text[i] == ';' ||
             text[i] == '!' || text[i] == '?' || text[i] == '-'
             ))
        ;
    if (i > pos)
    {
      end = i;
      return PR_TRUE;
    }
    else
      return PR_FALSE;
  }
  default:
    return PR_FALSE;
  } //switch
}

void
mozTXTToHTMLConv::CalculateURLBoundaries(const nsAutoString& text,
     const PRUint32 pos, const PRUint32 whathasbeendone,
     const modetype check, const PRUint32 start, const PRUint32 end,
     nsAutoString& txtURL, nsAutoString& desc,
     PRInt32& replaceBefore, PRInt32& replaceAfter)
{
  PRUint32 descstart;

  switch(check)
  {
  case RFC1738:
  {
    descstart = start - 5;
    text.Mid(desc, descstart, end - descstart + 2); // include "<URL:" and ">"
    replaceAfter = end - pos + 1;
  } break;
  case RFC2396E:
  {
    descstart = start - 1;
    text.Mid(desc, descstart, end - descstart + 2); // include brackets
    replaceAfter = end - pos + 1;
  } break;
  case freetext:
  case abbreviated:
  {
    descstart = start;
    text.Mid(desc, descstart, end - start + 1);   // don't include brackets
    replaceAfter = end - pos;
  } break;
  default: break;
  } //switch

  desc = EscapeStr(desc);

  text.Mid(txtURL, start, end - start + 1);
  txtURL.StripWhitespace();

  nsAutoString temp;
  text.Mid(temp, descstart, pos - descstart);
  replaceBefore = ScanTXT(temp, ~kURLs /*prevents loop*/
       & whathasbeendone).Length();

  return;
}

PRBool
mozTXTToHTMLConv::CheckURLAndCreateHTML(
     const nsAutoString& txtURL, const nsAutoString& desc,
     nsAutoString& outputHTML)
{
  // Create *uri from txtURL
  nsIURI* uri;
  nsresult rv;
  static NS_DEFINE_CID(kIOServiceCID, NS_IOSERVICE_CID);
  NS_WITH_SERVICE(nsIIOService, serv, kIOServiceCID, &rv);
  if (NS_FAILED(rv))
    return PR_FALSE;
  char* specStr = txtURL.ToNewCString(); //I18N this forces a single byte char
  if (specStr == nsnull)
    return PR_FALSE;
  rv = serv->NewURI(specStr, nsnull, &uri);
  Recycle(specStr);

  // Real work
  if (NS_SUCCEEDED(rv) && uri)
  {
    //PRUnichar* validURL;
    //uri->ToString(&validURL);

    outputHTML = "<a href=\"";
    //outputHTML += validURL;
    outputHTML += txtURL;
    outputHTML += "\">";
    outputHTML += desc;
    outputHTML += "</a>";
    //Recycle(validURL);
    NS_RELEASE(uri);
    return PR_TRUE;
  }
  else
    return PR_FALSE;
}

PRBool
mozTXTToHTMLConv::FindURL(const nsAutoString& text, const PRUint32 pos,
     const PRUint32 whathasbeendone,
     nsAutoString& outputHTML, PRInt32& replaceBefore, PRInt32& replaceAfter)
{
  enum statetype {unchecked, invalid, startok, endok, success};
  const modetype ranking[mozTXTToHTMLConv_numberOfModes] =
                      {RFC1738, RFC2396E, freetext, abbreviated};

  statetype state[mozTXTToHTMLConv_lastMode + 1]; // 0(=unknown)..lastMode
  /* I don't like this abuse of enums as index for the array,
     but I don't know a better method */

  // Define, which modes to check
  /* all modes but abbreviated are checked for text[pos] == ':',
     only abbreviated for '.', RFC2396E and abbreviated for '@' */
  for (modetype iState = unknown; iState <= mozTXTToHTMLConv_lastMode;
       iState = modetype(iState + 1))
    state[iState] = text[pos] == ':' ? unchecked : invalid;
  switch (text[pos])
  {
  case '@':
    state[RFC2396E] = unchecked;
    // no break here
  case '.':
    state[abbreviated] = unchecked;
    break;
  case ':':
    state[abbreviated] = invalid;
    break;
  default:
    break;
  }

  // Test, first successful mode wins, sequence defined by |ranking|
  PRInt32 iCheck = 0;  // the currently tested modetype
  modetype check = ranking[iCheck];
  for (; iCheck < mozTXTToHTMLConv_numberOfModes && state[check] != success;
       iCheck++)
    /* check state from last run.
       If this is the first, check this one, which isn't = success yet */
  {
    check = ranking[iCheck];

    PRUint32 start, end;

    if (state[check] == unchecked)
      if (FindURLStart(text, pos, check, start))
        state[check] = startok;

    if (state[check] == startok)
      if (FindURLEnd(text, pos, check, start, end))
        state[check] = endok;

    if (state[check] == endok)
    {
      nsAutoString txtURL, desc;
      PRInt32 resultReplaceBefore, resultReplaceAfter;

      CalculateURLBoundaries(text, pos, whathasbeendone, check, start, end,
                             txtURL, desc,
                             resultReplaceBefore, resultReplaceAfter);

      nsAutoString temp;
      temp = CompleteAbbreviatedURL(txtURL, pos - start);
      if (!temp.IsEmpty())
        txtURL = temp;

      if (CheckURLAndCreateHTML(txtURL, desc, outputHTML))
      {
        replaceBefore = resultReplaceBefore;
        replaceAfter = resultReplaceAfter;
        state[check] = success;
      }
    } // if
  } // for
  return state[check] == success;
}

PRBool
mozTXTToHTMLConv::ItMatchesDelimited(const nsAutoString& text,
    const char* rep, LIMTYPE before, LIMTYPE after)
{
  PRInt32 repLen = rep ? nsCRT::strlen(rep) : 0;

  if
    (
      (before == LT_IGNORE && (after == LT_IGNORE || after == LT_DELIMITER))
        && text.Length() < repLen ||
      (before != LT_IGNORE || after != LT_IGNORE && after != LT_DELIMITER)
        && text.Length() < repLen + 1 ||
      before != LT_IGNORE && after != LT_IGNORE && after != LT_DELIMITER
        && text.Length() < repLen + 2
    )
    return PR_FALSE;

    PRUint32 afterPos = repLen + (before == LT_IGNORE ? 0 : 1);

  if
    (
      before == LT_ALPHA
        && !nsString::IsAlpha(text.First()) ||
      before == LT_DIGIT
        && !nsString::IsDigit(text.First()) ||
      before == LT_DELIMITER
        &&
        (
          nsString::IsAlpha(text.First()) ||
          nsString::IsDigit(text.First()) ||
          text.First() == *rep
        ) ||
      after == LT_ALPHA
        && !nsString::IsAlpha(text[afterPos]) ||
      after == LT_DIGIT
        && !nsString::IsDigit(text[afterPos]) ||
      after == LT_DELIMITER
        &&
        (
          nsString::IsAlpha(text[afterPos]) ||
          nsString::IsDigit(text[afterPos]) ||
          text[afterPos] == *rep
        ) ||
      !(before == LT_IGNORE ? text : Right(text, 1)).Equals(rep,
           PR_TRUE, repLen)
    )
    return PR_FALSE;

  return PR_TRUE;
}

PRUint32
mozTXTToHTMLConv::NumberOfMatches(const nsAutoString& text,
     const char* rep, LIMTYPE before, LIMTYPE after)
{
  PRInt32 result = 0;
  for (PRInt32 i = 0; i < text.Length(); i++)
    if (ItMatchesDelimited(Right(text, i), rep, before, after))
      result++;
  return result;
}

PRBool
mozTXTToHTMLConv::StructPhraseHit(const nsAutoString& text, PRBool col0,
     const char* tagTXT,
     const char* tagHTML, const char* attributeHTML,
     nsAutoString& outputHTML, PRUint32& openTags)
{
  /* We're searching for the following pattern:
     LT_DELIMITER - "*" - ALPHA -
     [ some text (maybe more "*"-pairs) - ALPHA ] "*" - LT_DELIMITER.
     <strong> is only inserted, if existance of a pair could be verified
     We use the first opening/closing tag, if we can choose */

  // opening tag
  if
    (
      ItMatchesDelimited(text, tagTXT,
           (col0 ? LT_IGNORE : LT_DELIMITER), LT_ALPHA) // opening tag
        && NumberOfMatches((col0 ? text : Right(text, 1)), tagTXT,
             LT_ALPHA, LT_DELIMITER) /* remaining closing tags */ > openTags
    )
  {
    openTags++;
    outputHTML = "<";
    outputHTML += tagHTML;
    outputHTML += ' ';
    outputHTML += attributeHTML;
    outputHTML += '>';
    outputHTML += tagTXT;
    return PR_TRUE;
  }

  // closing tag
  else if (openTags > 0
       && ItMatchesDelimited(text, tagTXT, LT_ALPHA, LT_DELIMITER))
  {
    openTags--;
    outputHTML = tagTXT;
    outputHTML += "</";
    outputHTML += tagHTML;
    outputHTML += '>';
    return PR_TRUE;
  }

  return PR_FALSE;
}

PRBool
mozTXTToHTMLConv::SmilyHit(const nsAutoString& text, PRBool col0,
         const char* tagTXT, const char* tagHTML,
         nsAutoString& outputHTML, PRInt32& glyphTextLen)
{
  PRInt32  tagLen = nsCRT::strlen(tagTXT);
  PRInt32  txtLen = text.Length();

  PRUint32 delim = (col0 ? 0 : 1) + tagLen;
  if
    (
      (col0 || nsString::IsSpace(text.First()))
        &&
        (
          txtLen <= PRInt32(delim) ||
          nsString::IsSpace(text[delim]) ||
          txtLen > PRInt32(delim + 1)
            &&
            (
              text[delim] == '.' ||
              text[delim] == ',' ||
              text[delim] == ';' ||
              text[delim] == '!' ||
              text[delim] == '?'
            )
            && nsString::IsSpace(text[delim + 1])
        )
        && ItMatchesDelimited(text, tagTXT,
                              col0 ? LT_IGNORE : LT_DELIMITER, LT_IGNORE)
	        // Note: tests at different pos for LT_IGNORE and LT_DELIMITER
    )
  {
    if (col0)
    {
      outputHTML = tagHTML;
    }
    else
    {
      outputHTML.Truncate();
      outputHTML += ' ';
      outputHTML += tagHTML;
    }
    glyphTextLen = (col0 ? 0 : 1) + tagLen;
    return PR_TRUE;
  }
  else
  {
    return PR_FALSE;
  }
}

PRBool
mozTXTToHTMLConv::GlyphHit(const nsAutoString& text, PRBool col0,
         nsAutoString& outputHTML, PRInt32& glyphTextLen)
{
  MOZ_TIMER_START(mGlyphHitTimer);

  if
    (
      (  // Performance increase
        (col0 ? text.First() : text[1]) == ':' ||
        (col0 ? text.First() : text[1]) == ';'
      )
	&&
        (
          SmilyHit(text, col0, ":-)", "<img SRC=\"chrome://messenger/skin/smile.gif\" alt=\":-)\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
          SmilyHit(text, col0, ":)", "<img SRC=\"chrome://messenger/skin/smile.gif\" alt=\":)\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
          SmilyHit(text, col0, ":-(", "<img SRC=\"chrome://messenger/skin/frown.gif\" alt=\":-(\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
          SmilyHit(text, col0, ":(", "<img SRC=\"chrome://messenger/skin/frown.gif\" alt=\":(\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
          SmilyHit(text, col0, ";-)", "<img SRC=\"chrome://messenger/skin/wink.gif\" alt=\";-)\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
          SmilyHit(text, col0, ";-P", "<img SRC=\"chrome://messenger/skin/sick.gif\" alt=\";-P\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen)
        )
    )
  {
    MOZ_TIMER_STOP(mGlyphHitTimer);
    return PR_TRUE;
  }
  if   // XXX Hotfix
    (
      col0    // Performance increase
        &&
        (
          text[1] == ':' ||
	  text[1] == ';'
        )
        &&
        (
          SmilyHit(text, PR_FALSE, ":-)", "<img SRC=\"chrome://messenger/skin/smile.gif\" alt=\":-)\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
          SmilyHit(text, PR_FALSE, ":)", "<img SRC=\"chrome://messenger/skin/smile.gif\" alt=\":)\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
          SmilyHit(text, PR_FALSE, ":-(", "<img SRC=\"chrome://messenger/skin/frown.gif\" alt=\":-(\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
          SmilyHit(text, PR_FALSE, ":(", "<img SRC=\"chrome://messenger/skin/frown.gif\" alt=\":(\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
          SmilyHit(text, PR_FALSE, ";-)", "<img SRC=\"chrome://messenger/skin/wink.gif\" alt=\";-P\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
          SmilyHit(text, PR_FALSE, ";-P", "<img SRC=\"chrome://messenger/skin/sick.gif\" alt=\";-P\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen)
        )
    )
  {
    MOZ_TIMER_STOP(mGlyphHitTimer);
    return PR_TRUE;
  }
  if (text.First() == '(')
  {
    if (ItMatchesDelimited(text, "(c)", LT_IGNORE, LT_DELIMITER))
         // Note: ItMatchesDelimited compares case-insensitive
    {
      outputHTML = "&copy;";
      glyphTextLen = 3;
      MOZ_TIMER_STOP(mGlyphHitTimer);
      return PR_TRUE;
    }
    if (ItMatchesDelimited(text, "(r)", LT_IGNORE, LT_DELIMITER))
         // see above
    {
      outputHTML = "&reg;";
      glyphTextLen = 3;
      MOZ_TIMER_STOP(mGlyphHitTimer);
      return PR_TRUE;
    }
  }
  if (ItMatchesDelimited(text, " +/-", LT_IGNORE, LT_IGNORE))
  {
    outputHTML = " &plusmn;";
    glyphTextLen = 4;
    MOZ_TIMER_STOP(mGlyphHitTimer);
    return PR_TRUE;
  }
  if (col0 && ItMatchesDelimited(text, "+/-", LT_IGNORE, LT_IGNORE))
  {
    outputHTML = "&plusmn;";
    glyphTextLen = 3;
    MOZ_TIMER_STOP(mGlyphHitTimer);
    return PR_TRUE;
  }
  if    // x^2 -> sup
    (
      text[1] == '^' // Performance increase
        &&
        (
          ItMatchesDelimited(text, "^", LT_DIGIT, LT_DIGIT) ||
          ItMatchesDelimited(text, "^", LT_ALPHA, LT_DIGIT) ||
          ItMatchesDelimited(Right(text, 1), "^", LT_IGNORE, LT_DIGIT)
            && text.First() == ')'
        )
    )
  {
    // Find first non-digit
    PRInt32 delimPos = 3;  // 3 = Position after first digit after "^"
    for (; delimPos < text.Length() &&
         nsString::IsDigit(text[PRUint32(delimPos)]); delimPos++)
      ;
    // Note: (delimPos == text.Length()) could be true

    if (nsString::IsAlpha(text[PRUint32(delimPos)]))
    {
      MOZ_TIMER_STOP(mGlyphHitTimer);
      return PR_FALSE;
    }

    outputHTML.Truncate();
    outputHTML += text.First();
    outputHTML += "<sup>";
    nsAutoString temp;
    if (text.Mid(temp, 2, delimPos - 2) != PRUint32(delimPos - 2))
    {
      MOZ_TIMER_STOP(mGlyphHitTimer);
      return PR_FALSE;
    }
    outputHTML += temp;
    outputHTML += "</sup>";
    glyphTextLen = delimPos /* - 1 + 1 */ ;
    MOZ_TIMER_STOP(mGlyphHitTimer);
    return PR_TRUE;
  }
  /*
   The following strings are not substituted:
   |TXT   |HTML     |Reason
   +------+---------+----------
    ->     &larr;    Bug #454
    =>     &lArr;    dito
    <-     &rarr;    dito
    <=     &rArr;    dito
    (tm)   &trade;   dito
    1/4    &frac14;  is triggered by 1/4 Part 1, 2/4 Part 2, ...
    3/4    &frac34;  dito
    1/2    &frac12;  similar
  */
  MOZ_TIMER_STOP(mGlyphHitTimer);
  return PR_FALSE;
}

/***************************************************************************
  Library-internal Interface
****************************************************************************/

mozTXTToHTMLConv::mozTXTToHTMLConv()
{
  NS_INIT_ISUPPORTS();
  MOZ_TIMER_RESET(mScanTXTTimer);
  MOZ_TIMER_RESET(mGlyphHitTimer);
  MOZ_TIMER_RESET(mRightTimer);
  MOZ_TIMER_RESET(mTotalMimeTime);
  MOZ_TIMER_START(mTotalMimeTime);
}

mozTXTToHTMLConv::~mozTXTToHTMLConv()
{
  MOZ_TIMER_START(mTotalMimeTime);
  MOZ_TIMER_DEBUGLOG(("MIME Total Processing Time: "));
  MOZ_TIMER_PRINT(mTotalMimeTime);

  MOZ_TIMER_DEBUGLOG(("mozTXTToHTMLConv::ScanTXT(): "));
  MOZ_TIMER_PRINT(mScanTXTTimer);

  MOZ_TIMER_DEBUGLOG(("mozTXTToHTMLConv::GlyphHit(): "));
  MOZ_TIMER_PRINT(mGlyphHitTimer);

  MOZ_TIMER_DEBUGLOG(("mozTXTToHTMLConv::Right(): "));
  MOZ_TIMER_PRINT(mRightTimer);
}

NS_IMPL_ISUPPORTS(mozTXTToHTMLConv, NS_GET_IID(mozTXTToHTMLConv));

PRInt32
mozTXTToHTMLConv::CiteLevelTXT(const nsAutoString& line,
				    PRUint32& logLineStart)
{
  PRInt32 result = 0;

  PRBool moreCites = PR_TRUE;
  while (moreCites)
  {
    /* E.g. the following counts as quote:

       >text
       > text
       ] text
           > text
       USER> text
       user] text

       logLineStart is the position of "t" in this example
    */
    PRUint32 i = logLineStart;
    for (; PRInt32(i) < line.Length() && nsString::IsSpace(line[i]); i++)
      ;
    for (; PRInt32(i) < line.Length() && nsString::IsAlpha(line[i]); i++)
      ;
    if (line[i] == '>' || line[i] == ']')
    {
      // Sendmail
      nsAutoString temp;
      line.Mid(temp, logLineStart, 6);
      if (temp == ">From ")      //XXX RFC2646
        moreCites = PR_FALSE;
      else
      {
        result++;
        logLineStart = i + 1;
      }
    }
    else
      moreCites = PR_FALSE;
  }

  return result;
}

nsAutoString
mozTXTToHTMLConv::ScanTXT(const nsAutoString& text, PRUint32 whattodo)
{
  PRBool doURLs = whattodo & kURLs;
  PRBool doGlyphSubstitution = whattodo & kGlyphSubstitution;
  PRBool doStructPhrase = whattodo & kStructPhrase;

#ifdef DEBUG_BenB
printf("ScanTXT orginal: ");
printf(text.ToNewCString());
#endif

  MOZ_TIMER_START(mScanTXTTimer);

  nsAutoString result;

  PRUint32 structPhrase_strong = 0;  // Number of currently open tags
  PRUint32 structPhrase_underline = 0;
  PRUint32 structPhrase_italic = 0;
  PRUint32 structPhrase_code = 0;

  for(PRUint32 i = 0; PRInt32(i) < text.Length();)
  {
    if (doGlyphSubstitution)
    {
      PRInt32 glyphTextLen;
      nsAutoString glyphHTML;
      if (GlyphHit(Right(text, i), i == 0, glyphHTML, glyphTextLen))
      {
        result += glyphHTML;
        i += glyphTextLen;
        continue;
      }
    }

    if (doStructPhrase)
    {
      nsAutoString HTMLnsStr;
      switch (text[i]) // Performance increase
      {
      case '*':
        if (StructPhraseHit(i == 0 ? text : Right(text, i - 1), i == 0,
                 "*", "strong", "class=txt_star",
                 HTMLnsStr, structPhrase_strong))
        {
          result += HTMLnsStr;
          i++;
          continue;
        }
      case '_':
        if (StructPhraseHit(i == 0 ? text : Right(text, i - 1), i == 0,
                 "_", "em" /* <u> is deprecated */, "class=txt_underscore",
                 HTMLnsStr, structPhrase_underline))
        {
          result += HTMLnsStr;
          i++;
          continue;
        }
      case '/':
        if (StructPhraseHit(i == 0 ? text : Right(text, i - 1), i == 0,
                 "/", "em", "class=txt_slash",
                 HTMLnsStr, structPhrase_italic))
        {
          result += HTMLnsStr;
          i++;
          continue;
        }
      case '|':
        if (StructPhraseHit(i == 0 ? text : Right(text, i - 1), i == 0,
                 "|", "code", "class=txt_verticalline",
                 HTMLnsStr, structPhrase_code))
        {
          result += HTMLnsStr;
          i++;
          continue;
        }
      }
    }

    if (doURLs)
    {
      switch (text[i])
      {
      case ':':
      case '@':
      case '.':
        if (text[i - 1] != ' ' && text[i + 1] != ' ') // Peformance increase
        {
          nsAutoString outputHTML;
          PRInt32 replaceBefore;
          PRInt32 replaceAfter;
          if (FindURL(text, i, whattodo,
                      outputHTML, replaceBefore, replaceAfter)
                  && !(text[i] == '@' && (   // workaround for bug #19445
                    structPhrase_strong + structPhrase_italic +   // dito
                    structPhrase_underline + structPhrase_code != 0  )))
          {
            nsAutoString temp;
            result.Left(temp, result.Length() - replaceBefore);
            result = temp;
            result += outputHTML;
            i += replaceAfter + 1;
            continue;
          }
        }
        break;
      } //switch
    }

    switch (text[i])
    {
    // Special symbols
    case '<':
    case '>':
    case '&':
      result += EscapeChar(text[i]);
      i++;
      break;
    // Normal characters
    default:
      result += text[i];
      i++;
    }
  }

#ifdef DEBUG_BenB
printf("ScanTXT result:  ");
printf(result.ToNewCString());
printf("\n");
#endif

  MOZ_TIMER_STOP(mScanTXTTimer);

  return result;
}

nsAutoString
mozTXTToHTMLConv::ScanHTML(const nsAutoString& text, PRUint32 whattodo)
{
  nsAutoString result;

#ifdef DEBUG_BenB
printf("ScanHTML orginal: ");
printf(text.ToNewCString());
printf("\n");
#endif

  // Look for simple entities not included in a tags and scan them.
  /* Skip all tags ("<[...]>") and content in an a tag ("<a[...]</a>").
     Unescape the rest (text between tags) and pass it to ScanTXT. */
  for (PRUint32 i = 0; PRInt32(i) < text.Length();)
  {
    if (text[i] == '<')  // html tag
    {
      PRUint32 start = i;
      nsAutoString temp;
      if (nsCRT::ToLower(text[i + 1]) == 'a')  // if a tag, skip until </a>
      {
        for (; PRInt32(i + 3) < text.Length()
        	 && (text.Mid(temp, i, 4), temp.ToLowerCase(), temp != "</a>")
             ; i++)
          ;
        i += 4;
      }
      else  // just skip tag (attributes etc.)
      {
        for (; PRInt32(i) < text.Length() && text[i] != '>'; i++)
          ;
        i++;
      }
      text.Mid(temp, start, i - start);  // i is one char after the tag
      result += temp;
    }
    else
    {
      PRUint32 start = i;
      for (; PRInt32(i) < text.Length() && text[i] != '<'; i++)
        ;
      nsAutoString temp;
      text.Mid(temp, start, i - start);  // i is first char of the tag
      result += ScanTXT(UnescapeStr(temp), whattodo);
    }
  }

#ifdef DEBUG_BenB
printf("ScanHTML result:  ");
printf(result.ToNewCString());
printf("\n");
#endif

  return result;
}

/****************************************************************************
  XPCOM Interface
*****************************************************************************/

NS_IMETHODIMP
mozTXTToHTMLConv::Convert(nsIInputStream *aFromStream,
                             const PRUnichar *aFromType,
                             const PRUnichar *aToType,
                             nsISupports *aCtxt, nsIInputStream **_retval)
{
  return NS_ERROR_NOT_IMPLEMENTED;
}

NS_IMETHODIMP
mozTXTToHTMLConv::AsyncConvertData(const PRUnichar *aFromType,
                                      const PRUnichar *aToType,
                                      nsIStreamListener *aListener, nsISupports *aCtxt) {
  return NS_ERROR_NOT_IMPLEMENTED;
}

NS_IMETHODIMP
mozTXTToHTMLConv::OnDataAvailable(nsIChannel *channel, nsISupports *ctxt,
                                     nsIInputStream *inStr, PRUint32 sourceOffset,
                                     PRUint32 count)
{
  return NS_ERROR_NOT_IMPLEMENTED;
}

NS_IMETHODIMP
mozTXTToHTMLConv::OnStartRequest(nsIChannel *channel, nsISupports *ctxt)
{
  return NS_ERROR_NOT_IMPLEMENTED;
}

NS_IMETHODIMP
mozTXTToHTMLConv::OnStopRequest(nsIChannel *channel, nsISupports *ctxt,
                                nsresult status, const PRUnichar *errorMsg)
{
  return NS_ERROR_NOT_IMPLEMENTED;
}

NS_IMETHODIMP
mozTXTToHTMLConv::CiteLevelTXT(const PRUnichar *line, PRUint32 *logLineStart,
				PRUint32 *_retval)
{
   if (!logLineStart || !_retval || !line)
     return NS_ERROR_NULL_POINTER;
   *_retval = CiteLevelTXT(line, *logLineStart);
   return NS_OK;
}

NS_IMETHODIMP
mozTXTToHTMLConv::ScanTXT(const PRUnichar *text, PRUint32 whattodo,
			   PRUnichar **_retval)
{
  if (!_retval || !text)
    return NS_ERROR_NULL_POINTER;
  *_retval = ScanTXT(text, whattodo).ToNewUnicode();
  return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
}

NS_IMETHODIMP
mozTXTToHTMLConv::ScanHTML(const PRUnichar *text, PRUint32 whattodo,
			    PRUnichar **_retval)
{
  if (!_retval || !text)
    return NS_ERROR_NULL_POINTER;
  *_retval = ScanHTML(text, whattodo).ToNewUnicode();
  return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
}


/**************************************************************************
  Global functions
***************************************************************************/
nsresult
MOZ_NewTXTToHTMLConv(mozTXTToHTMLConv** aConv)
{
    NS_PRECONDITION(aConv != nsnull, "null ptr");
    if (!aConv)
      return NS_ERROR_NULL_POINTER;

    *aConv = new mozTXTToHTMLConv();
    if (!*aConv)
      return NS_ERROR_OUT_OF_MEMORY;

    NS_ADDREF(*aConv);
    //    return (*aConv)->Init();
    return NS_OK;
}