pjs/string/obsolete/nsString2.h

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
 * Version: NPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Netscape Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/NPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is mozilla.org code.
 *
 * The Initial Developer of the Original Code is
 * Netscape Communications Corporation.
 * Portions created by the Initial Developer are Copyright (C) 1998
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *   Rick Gessner <rickg@netscape.com> (original author)
 *   Scott Collins <scc@mozilla.org>
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either the GNU General Public License Version 2 or later (the "GPL"), or
 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the NPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the NPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */

/*
 * nsString2.h --- rickg's original strings of 2-byte chars, |nsString|
 * and |nsAutoString|; these classes will be replaced by the new
 * shared-buffer string (see bug #53065)
 */

#ifndef nsString2_h__
#define nsString2_h__

#include "prtypes.h"
#include "nscore.h"
#include <stdio.h>

#ifndef nsAString_h__
#include "nsAString.h"
#endif

#ifndef nsAFlatString_h___
#include "nsAFlatString.h"
#endif

#ifndef nsLiteralString_h__
#include "nsLiteralString.h"
#endif

#ifndef nsDependentSubstring_h__
#include "nsDependentSubstring.h"
#endif

#ifndef nsPromiseFlatString_h__
#include "nsPromiseFlatString.h"
#endif

#ifndef nsXPIDLString_h__
#include "nsXPIDLString.h"
#endif

#include "nsStr.h"

class UTF8traits
  {
    public:
      static PRBool isASCII(char c) { return (c & 0x80) == 0x00; }
      static PRBool isInSeq(char c) { return (c & 0xC0) == 0x80; }
      static PRBool is2byte(char c) { return (c & 0xE0) == 0xC0; }
      static PRBool is3byte(char c) { return (c & 0xF0) == 0xE0; }
      static PRBool is4byte(char c) { return (c & 0xF8) == 0xF0; }
      static PRBool is5byte(char c) { return (c & 0xFC) == 0xF8; }
      static PRBool is6byte(char c) { return (c & 0xFE) == 0xFC; }
  };

#ifdef STANDALONE_MI_STRING_TESTS
  class nsAFlatString { public: virtual ~nsAString() { } };
#endif

class nsISizeOfHandler;
class nsCString;


class NS_COM nsString :
  public nsAFlatString,
  public nsStr {

public:
  friend class nsCString;
  friend class nsLinebreakConverter;
  friend void ToLowerCase( nsString& );
  friend void ToUpperCase( nsString& );

protected:
  virtual const nsBufferHandle<PRUnichar>* GetFlatBufferHandle() const;
  virtual const PRUnichar* GetReadableFragment( nsReadableFragment<PRUnichar>&, nsFragmentRequest, PRUint32 ) const;
  virtual PRUnichar* GetWritableFragment( nsWritableFragment<PRUnichar>&, nsFragmentRequest, PRUint32 );

public:
  virtual const PRUnichar* get() const;

public:
  /**
   * Default constructor.
   */
  nsString();

  /**
   * This is our copy constructor
   * @param   reference to another nsString
   */
  nsString(const nsString& aString);

  explicit nsString(const nsAString&);

  explicit nsString(const PRUnichar*);
  nsString(const PRUnichar*, PRInt32);


  /**
   * Destructor
   *
   */
  virtual ~nsString();

  /**
   * Retrieve the length of this string
   * @return string length
   */
  virtual PRUint32 Length() const { return mLength; }

  /**
   * Call this method if you want to force a different string length
   * @update  gess7/30/98
   * @param   aLength -- contains new length for mStr
   * @return
   */
  void SetLength(PRUint32 aLength);

  /**
   * Sets the new length of the string.
   * @param   aLength is new string length.
   * @return  nada
   */
  void SetCapacity(PRUint32 aLength);


  /**********************************************************************
    Getters/Setters...
   *********************************************************************/

   /**
     * Set nth character.
     */
  PRBool SetCharAt(PRUnichar aChar,PRUint32 anIndex);


  /**********************************************************************
    Lexomorphic transforms...
   *********************************************************************/


  /**
   *  This method is used to remove all occurances of the
   *  characters found in aSet from this string.
   *
   *  @param  aSet -- characters to be cut from this
   *  @return *this
   */
  void StripChars( const char* aSet );
  void StripChar( PRUnichar aChar, PRInt32 anOffset=0 );
  /**
   *  This method strips whitespace throughout the string
   *
   *  @return  this
   */
  void StripWhitespace();

  /**
   *  swaps occurence of 1 string for another
   *
   *  @return  this
   */
  void ReplaceChar( PRUnichar anOldChar, PRUnichar aNewChar );
  void ReplaceChar( const char* aSet, PRUnichar aNewChar );

  void ReplaceSubstring( const nsString& aTarget, const nsString& aNewValue );
  void ReplaceSubstring( const PRUnichar* aTarget, const PRUnichar* aNewValue );

  /**
   *  This method trims characters found in aTrimSet from
   *  either end of the underlying string.
   *
   *  @param   aTrimSet -- contains chars to be trimmed from
   *           both ends
   *  @param   aEliminateLeading
   *  @param   aEliminateTrailing
   *  @param   aIgnoreQuotes
   *  @return  this
   */
  void Trim(const char* aSet,PRBool aEliminateLeading=PR_TRUE,PRBool aEliminateTrailing=PR_TRUE,PRBool aIgnoreQuotes=PR_FALSE);

  /**
   *  This method strips whitespace from string.
   *  You can control whether whitespace is yanked from
   *  start and end of string as well.
   *
   *  @param   aEliminateLeading controls stripping of leading ws
   *  @param   aEliminateTrailing controls stripping of trailing ws
   *  @return  this
   */
  void CompressWhitespace( PRBool aEliminateLeading=PR_TRUE,PRBool aEliminateTrailing=PR_TRUE);

  /**********************************************************************
    string conversion methods...
   *********************************************************************/

  /**
   * Copies data from internal buffer onto given char* buffer
   * NOTE: This only copies as many chars as will fit in given buffer (clips)
   * @param aBuf is the buffer where data is stored
   * @param aBuflength is the max # of chars to move to buffer
   * @return ptr to given buffer
   */
  char* ToCString(char* aBuf,PRUint32 aBufLength,PRUint32 anOffset=0) const;

  /**
   * Perform string to float conversion.
   * @param   aErrorCode will contain error if one occurs
   * @return  float rep of string value
   */
  float ToFloat(PRInt32* aErrorCode) const;

  /**
   * Perform string to int conversion.
   * @param   aErrorCode will contain error if one occurs
   * @param   aRadix tells us which radix to assume; kAutoDetect tells us to determine the radix for you.
   * @return  int rep of string value, and possible (out) error code
   */
  PRInt32   ToInteger(PRInt32* aErrorCode,PRUint32 aRadix=kRadix10) const;


  /**********************************************************************
    String manipulation methods...
   *********************************************************************/

  /**
   * assign given string to this string
   * @param   aStr: buffer to be assigned to this
   * @param   aCount is the length of the given str (or -1) if you want me to determine its length
   *  NOTE:   IFF you pass -1 as aCount, then your buffer must be null terminated.

   * @return  this
   */

  nsString& operator=( const nsString& aString )                              { Assign(aString); return *this; }
  nsString& operator=( const nsAString& aReadable )                           { Assign(aReadable); return *this; }
//nsString& operator=( const nsPromiseReadable<PRUnichar>& aReadable )        { Assign(aReadable); return *this; }
  nsString& operator=( const PRUnichar* aPtr )                                { Assign(aPtr); return *this; }
  nsString& operator=( PRUnichar aChar )                                      { Assign(aChar); return *this; }

  void AssignWithConversion(const char*);
  void AssignWithConversion(const char*, PRInt32);


  /*
   *  Appends n characters from given string to this,
   *  This version computes the length of your given string
   *
   *  @param   aString is the source to be appended to this
   *  @return  number of chars copied
   */

  void AppendInt(PRInt32, PRInt32=10); //radix=8,10 or 16
  void AppendFloat(double);
  void AppendWithConversion(const char*, PRInt32=-1);

  virtual void do_AppendFromElement( PRUnichar );


//void InsertWithConversion(char);
  void InsertWithConversion(const char*, PRUint32, PRInt32=-1);

  // Takes ownership of aPtr, sets the current length to aLength if specified.
  void Adopt( PRUnichar* aPtr, PRInt32 aLength = -1 );

  /*
    |Left|, |Mid|, and |Right| are annoying signatures that seem better almost
    any _other_ way than they are now.  Consider these alternatives

    aWritable = aReadable.Left(17);   // ...a member function that returns a |Substring|
    aWritable = Left(aReadable, 17);  // ...a global function that returns a |Substring|
    Left(aReadable, 17, aWritable);   // ...a global function that does the assignment

    as opposed to the current signature

    aReadable.Left(aWritable, 17);    // ...a member function that does the assignment

    or maybe just stamping them out in favor of |Substring|, they are just duplicate functionality

    aWritable = Substring(aReadable, 0, 17);
  */

  size_type  Left( self_type&, size_type ) const;
  size_type  Mid( self_type&, PRUint32, PRUint32 ) const;
  size_type  Right( self_type&, size_type ) const;

  /**********************************************************************
    Searching methods...
   *********************************************************************/
   /**
   *  Search for given character within this string
   *
   *  @param   aChar is the character to search for
   *  @param   anOffset tells us where in this string to start searching
               (optional parameter)
   *  @param   aCount tells us how far from the offset we are to search. Use
               -1 to search the whole string. (optional parameter)
   *  @return  offset in string, or -1 (kNotFound)
   */
  PRInt32 FindChar(PRUnichar aChar, PRInt32 anOffset=0, PRInt32 aCount=-1) const;

  /**
   *  Search for given substring within this string
   *
   *  @param   aString is substring to be sought in this
   *  @param   aIgnoreCase selects case sensitivity
   *  @param   anOffset tells us where in this string to start searching
   *  @param   aCount tells us how far from the offset we are to search. Use
               -1 to search the whole string.
   *  @return  offset in string, or -1 (kNotFound)
   */
  PRInt32 Find(const nsCString& aString,PRBool aIgnoreCase=PR_FALSE,PRInt32 anOffset=0,PRInt32 aCount=-1) const;
  PRInt32 Find(const char* aString,PRBool aIgnoreCase=PR_FALSE,PRInt32 anOffset=0,PRInt32 aCount=-1) const;

  PRInt32 Find(const nsAFlatString& aString, PRInt32 anOffset=0, PRInt32 aCount=-1) const;

  PRInt32 Find(const PRUnichar* aString, PRInt32 anOffset=0, PRInt32 aCount=-1) const;

  /**
   * This method searches this string for the first character
   * found in the given charset
   * @param aString contains set of chars to be found
   * @param anOffset tells us where to start searching in this
   * @return -1 if not found, else the offset in this
   */
  PRInt32 FindCharInSet(const char* aString,PRInt32 anOffset=0) const;
  PRInt32 FindCharInSet(const PRUnichar* aString,PRInt32 anOffset=0) const;

  /**
   * This methods scans the string backwards, looking for the given string
   * @param   aString is substring to be sought in this
   * @param   aIgnoreCase tells us whether or not to do caseless compare
   * @param   anOffset tells us where in this strig to start searching (counting from left)
   * @param   aCount tells us how many iterations to make starting at the given offset
   * @return  offset in string, or -1 (kNotFound)
   */
  PRInt32 RFind(const char* aCString,PRBool aIgnoreCase=PR_FALSE,PRInt32 anOffset=-1,PRInt32 aCount=-1) const;

  PRInt32 RFind(const nsAFlatString& aString, PRInt32 anOffset=-1,PRInt32 aCount=-1) const;
  PRInt32 RFind(const PRUnichar* aString,PRInt32 anOffset=-1,PRInt32 aCount=-1) const;


  /**
   *  Search for given char within this string
   *
   *  @param   aString is substring to be sought in this
   *  @param   anOffset tells us where in this strig to start searching (counting from left)
   *  @param   aIgnoreCase selects case sensitivity
   *  @param   aCount tells us how many iterations to make starting at the given offset
   *  @return  find pos in string, or -1 (kNotFound)
   */
  PRInt32 RFindChar(PRUnichar aChar,PRInt32 anOffset=-1,PRInt32 aCount=-1) const;

  /**
   * This method searches this string for the last character
   * found in the given string
   * @param aString contains set of chars to be found
   *  @param   anOffset tells us where in this strig to start searching (counting from left)
   * @return -1 if not found, else the offset in this
   */
  PRInt32 RFindCharInSet(const PRUnichar* aString,PRInt32 anOffset=-1) const;


  /**********************************************************************
    Comparison methods...
   *********************************************************************/

  /**
   * Compares a given string type to this string.
   * @update  gess 7/27/98
   * @param   S is the string to be compared
   * @param   aIgnoreCase tells us how to treat case
   * @param   aCount tells us how many chars to compare
   * @return  -1,0,1
   */

  PRInt32 CompareWithConversion(const char* aString, PRBool aIgnoreCase=PR_FALSE, PRInt32 aCount=-1) const;

  PRBool  EqualsWithConversion(const char* aString,PRBool aIgnoreCase=PR_FALSE,PRInt32 aCount=-1) const;

  PRBool  EqualsIgnoreCase(const char* aString,PRInt32 aCount=-1) const;

  /**
   *  Determine if given buffer is plain ascii
   *
   *  @param   aBuffer -- if null, then we test *this, otherwise we test given buffer
   *  @return  TRUE if is all ascii chars or if strlen==0
   */
  PRBool IsASCII(const PRUnichar* aBuffer=0);

  /**
   *  Determine if given char is a valid space character
   *
   *  @param   aChar is character to be tested
   *  @return  TRUE if is valid space char
   */
  static  PRBool IsSpace(PRUnichar ch);

#ifdef DEBUG
  /**
   * Retrieve the size of this string
   * @return string length
   */
  virtual void SizeOf(nsISizeOfHandler* aHandler, PRUint32* aResult) const;
#endif

private:
    // NOT TO BE IMPLEMENTED
    //  these signatures help clients not accidentally call the wrong thing helped by C++ automatic integral promotion
  void operator=( char );
  void AssignWithConversion( const PRUnichar*, PRInt32=-1 );
  void AppendWithConversion( const PRUnichar*, PRInt32=-1 );
  void InsertWithConversion( const PRUnichar*, PRUint32, PRInt32=-1 );
};

inline
nsString::size_type
nsString::Left( nsAString& aResult, size_type aLengthToCopy ) const
  {
    return Mid(aResult, 0, aLengthToCopy);
  }

inline
nsString::size_type
nsString::Right( self_type& aResult, size_type aLengthToCopy ) const
  {
    size_type myLength = Length();
    aLengthToCopy = NS_MIN(myLength, aLengthToCopy);
    return Mid(aResult, myLength-aLengthToCopy, aLengthToCopy);
  }

// NS_DEF_STRING_COMPARISON_OPERATORS(nsString, PRUnichar)
// NS_DEF_DERIVED_STRING_OPERATOR_PLUS(nsString, PRUnichar)

/**************************************************************
  Here comes the AutoString class which uses internal memory
  (typically found on the stack) for its default buffer.
  If the buffer needs to grow, it gets reallocated on the heap.
 **************************************************************/

class NS_COM nsAutoString : public nsString {
public:

    virtual ~nsAutoString() {}

    nsAutoString();
    nsAutoString(const nsAutoString& aString);
    explicit nsAutoString(const nsAString& aString);
    explicit nsAutoString(const nsString& aString);
    explicit nsAutoString(const PRUnichar* aString);
    nsAutoString(const PRUnichar* aString,PRInt32 aLength);
    explicit nsAutoString(PRUnichar aChar);
    explicit nsAutoString(const CBufDescriptor& aBuffer);

    nsAutoString& operator=( const nsAutoString& aString )                          { Assign(aString); return *this; }
  private:
    void operator=( char ); // NOT TO BE IMPLEMENTED
  public:
    nsAutoString& operator=( const nsAString& aReadable )                           { Assign(aReadable); return *this; }
//  nsAutoString& operator=( const nsPromiseReadable<PRUnichar>& aReadable )        { Assign(aReadable); return *this; }
    nsAutoString& operator=( const PRUnichar* aPtr )                                { Assign(aPtr); return *this; }
    nsAutoString& operator=( PRUnichar aChar )                                      { Assign(aChar); return *this; }

#ifdef DEBUG
    /**
     * Retrieve the size of this string
     * @return string length
     */
    virtual void SizeOf(nsISizeOfHandler* aHandler, PRUint32* aResult) const;
#endif

    PRUnichar mBuffer[kDefaultStringSize];
};

// NS_DEF_DERIVED_STRING_OPERATOR_PLUS(nsAutoString, PRUnichar)

class NS_COM NS_ConvertASCIItoUCS2
      : public nsAutoString
    /*
      ...
    */
  {
    public:
      explicit
      NS_ConvertASCIItoUCS2( const nsACString& aCString );

      explicit
      NS_ConvertASCIItoUCS2( const nsAFlatCString& aCString )
        {
          Init( aCString.get(), aCString.Length() );
        }

      explicit
      NS_ConvertASCIItoUCS2( const char* aCString )
        {
          Init( aCString, ~PRUint32(0) /* MAXINT */ );
        }

      NS_ConvertASCIItoUCS2( const char* aCString, PRUint32 aLength )
        {
          Init( aCString, aLength );
        }

#if 0
      operator const nsDependentString() const
        {
          return nsDependentString(mUStr, mLength);
        }
#endif

    protected:
      void Init( const char* aCString, PRUint32 aLength );

    private:
        // NOT TO BE IMPLEMENTED
      NS_ConvertASCIItoUCS2( PRUnichar );
  };

class NS_COM NS_ConvertUTF8toUCS2
      : public nsAutoString
  {
    public:
      explicit
      NS_ConvertUTF8toUCS2( const nsACString& aCString )
        {
          Init( aCString );
        }

      explicit
      NS_ConvertUTF8toUCS2( const char* aCString )
        {
          Init( nsDependentCString( aCString ) );
        }

      NS_ConvertUTF8toUCS2( const char* aCString, PRUint32 aLength )
        {
          Init( Substring( aCString, aCString + aLength ) );
        }

    protected:
      void Init( const nsACString& aCString );

    private:
      NS_ConvertUTF8toUCS2( PRUnichar );
  };

#define PLANE1_BASE           0x00010000
#define UCS2_REPLACEMENT_CHAR 0xfffd

class ConvertUTF8toUCS2
  {
    public:
      typedef nsACString::char_type value_type;
      typedef nsAString::char_type  buffer_type;

    ConvertUTF8toUCS2( buffer_type* aBuffer )
        : mStart(aBuffer), mBuffer(aBuffer), mErrorEncountered(PR_FALSE) {}

    size_t Length() const { return mBuffer - mStart; }

    PRUint32 write( const value_type* start, PRUint32 N )
      {
        if ( mErrorEncountered )
          return N;

        // algorithm assumes utf8 units won't
        // be spread across fragments
        const value_type* p = start;
        const value_type* end = start + N;
        for ( ; p != end /* && *p */; )
          {
            char c = *p++;

            if ( UTF8traits::isASCII(c) )
              {
                *mBuffer++ = buffer_type(c);
                continue;
              }

            PRUint32 ucs4;
            PRUint32 minUcs4;
            PRInt32 state = 0;

            if ( UTF8traits::is2byte(c) )
              {
                ucs4 = (PRUint32(c) << 6) & 0x000007C0L;
                state = 1;
                minUcs4 = 0x00000080;
              }
            else if ( UTF8traits::is3byte(c) )
              {
                ucs4 = (PRUint32(c) << 12) & 0x0000F000L;
                state = 2;
                minUcs4 = 0x00000800;
              }
            else if ( UTF8traits::is4byte(c) )
              {
                ucs4 = (PRUint32(c) << 18) & 0x001F0000L;
                state = 3;
                minUcs4 = 0x00010000;
              }
            else if ( UTF8traits::is5byte(c) )
              {
                ucs4 = (PRUint32(c) << 24) & 0x03000000L;
                state = 4;
                minUcs4 = 0x00200000;
              }
            else if ( UTF8traits::is6byte(c) )
              {
                ucs4 = (PRUint32(c) << 30) & 0x40000000L;
                state = 5;
                minUcs4 = 0x04000000;
              }
            else
              {
                NS_ERROR("Not a UTF-8 string. This code should only be used for converting from known UTF-8 strings.");
                mErrorEncountered = PR_TRUE;
                return N;
              }

            while ( state-- )
              {
                c = *p++;

                if ( UTF8traits::isInSeq(c) )
                  {
                    PRInt32 shift = state * 6;
                    ucs4 |= (PRUint32(c) & 0x3F) << shift;
                  }
                else
                  {
                    NS_ERROR("not a UTF8 string");
                    mErrorEncountered = PR_TRUE;
                    return N;
                  }
              }

            if ( ucs4 < minUcs4 )
              {
                // Overlong sequence
                *mBuffer++ = UCS2_REPLACEMENT_CHAR;
              }
            else if ( ucs4 <= 0xD7FF )
              {
                *mBuffer++ = ucs4;
              }
            else if ( /* ucs4 >= 0xD800 && */ ucs4 <= 0xDFFF )
              {
                // Surrogates
                *mBuffer++ = UCS2_REPLACEMENT_CHAR;
              }
            else if ( ucs4 == 0xFFFE || ucs4 == 0xFFFF )
              {
                // Prohibited characters
                *mBuffer++ = UCS2_REPLACEMENT_CHAR;
              }
            else if ( ucs4 >= PLANE1_BASE )
              {
                if ( ucs4 >= 0x00110000 )
                  *mBuffer++ = UCS2_REPLACEMENT_CHAR;
                else {
                  // surrogate, see unicode specification 3.7 for following math.
                  ucs4 -= PLANE1_BASE;
                  *mBuffer++ = (PRUnichar)(ucs4 >> 10) | 0xd800u;
                  *mBuffer++ = (PRUnichar)(ucs4 & 0x3ff) | 0xdc00u;
                }
              }
            else
              {
                if ( ucs4 != 0xFEFF ) // ignore BOM
                    *mBuffer++ = ucs4;
              }
          }
        return p - start;
      }

    private:
      buffer_type* mStart;
      buffer_type* mBuffer;
      PRBool mErrorEncountered;
  };

#endif /* !defined(nsString2_h__) */