/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* * The contents of this file are subject to the Netscape Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/NPL/ * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. * * The Original Code is mozilla.org code. * * The Initial Developer of the Original Code is Netscape * Communications Corporation. Portions created by Netscape are * Copyright (C) 1998 Netscape Communications Corporation. All * Rights Reserved. * * Contributor(s): * Scott Collins */ #ifndef _nsAReadableString_h__ #define _nsAReadableString_h__ // WORK IN PROGRESS #include "nscore.h" // for |PRUnichar| #include // for |char_traits| #include // for |bidirectional_iterator_tag| /* This file defines the abstract interfaces |nsAReadableString| and |nsAReadableCString| (the 'A' is for 'abstract', as opposed to the 'I' in [XP]COM interface names). These types are intended to be as source compatible as possible with the original definitions of |const nsString&| and |const nsCString&|, respectively. In otherwords, these interfaces provide only non-mutating access to the underlying strings. We split the these interfaces out from the mutating parts (see "nsAWritableString.h") because tests showed that we could exploit specialized implementations in some areas; we need an abstract interface to bring the whole family of strings together. |nsAReadableString| is a string of |PRUnichar|s. |nsAReadableCString| (note the 'C') is a string of |char|s. */ template class basic_nsAWritableString; // ...because we sometimes use them as `out' params template class basic_nsLiteralString; // ...because we sometimes use them as in params to force the conversion of |CharT*|s // // nsAReadable[C]String // template class basic_nsAReadableString /* ... */ { protected: struct ConstFragment { const CharT* mStart; const CharT* mEnd; PRUint32 mFragmentIdentifier; ConstFragment() : mStart(0), mEnd(0), mFragmentIdentifier(0) { // nothing else to do here } }; public: enum FragmentRequest { kPrevFragment, kFirstFragment, kLastFragment, kNextFragment, kFragmentAt }; // Damn! Had to make |GetConstFragment| and |Implementation| public because the compilers suck. Should be protected. virtual const char* Implementation() const; virtual const CharT* GetConstFragment( ConstFragment&, FragmentRequest, PRUint32 = 0 ) const = 0; friend class ConstIterator; class ConstIterator : public std::bidirectional_iterator_tag { public: typedef ptrdiff_t difference_type; typedef CharT value_type; typedef const CharT* pointer; typedef const CharT& reference; typedef bidirectional_iterator_tag iterator_category; private: friend class basic_nsAReadableString; ConstFragment mFragment; const CharT* mPosition; const basic_nsAReadableString* mOwningString; void normalize_forward() { if ( mPosition == mFragment.mEnd ) if ( mOwningString->GetConstFragment(mFragment, kNextFragment) ) mPosition = mFragment.mStart; } void normalize_backward() { if ( mPosition == mFragment.mStart ) if ( mOwningString->GetConstFragment(mFragment, kPrevFragment) ) mPosition = mFragment.mEnd; } ConstIterator( const ConstFragment& aFragment, const CharT* aStartingPosition, const basic_nsAReadableString& aOwningString ) : mFragment(aFragment), mPosition(aStartingPosition), mOwningString(&aOwningString) { // nothing else to do here } public: // ConstIterator( const ConstIterator& ); ...use default copy-constructor // ConstIterator& operator=( const ConstIterator& ); ...use default copy-assignment operator CharT operator*() { return *mPosition; } ConstIterator& operator++() { ++mPosition; normalize_forward(); return *this; } ConstIterator operator++( int ) { ConstIterator result(*this); ++mPosition; normalize_forward(); return result; } ConstIterator& operator--() { normalize_backward(); --mPosition; return *this; } ConstIterator operator--( int ) { ConstIterator result(*this); normalize_backward(); --mPosition; return result; } // Damn again! Problems with templates made me implement comparisons as members. PRBool operator==( const ConstIterator& rhs ) { return mPosition == rhs.mPosition; } PRBool operator!=( const ConstIterator& rhs ) { return mPosition != rhs.mPosition; } }; public: ConstIterator Begin( PRUint32 aOffset = 0 ) const { ConstFragment fragment; const CharT* startPos = GetConstFragment(fragment, kFragmentAt, aOffset); return ConstIterator(fragment, startPos, *this); } ConstIterator End( PRUint32 aOffset = 0 ) const { ConstFragment fragment; const CharT* startPos = GetConstFragment(fragment, kFragmentAt, max(0U, Length()-aOffset)); return ConstIterator(fragment, startPos, *this); } public: virtual ~basic_nsAReadableString() { } // ...yes, I expect to be sub-classed. virtual PRUint32 Length() const = 0; PRBool IsEmpty() const { return Length() == 0; } /* RickG says the following three routines, |IsUnicode()|, |GetBuffer()|, and |GetUnicode()| shouldn't be implemented because they're wrong access. I agree. Callers who really need this access should use the iterators instead. We'll use these to ease the transition to |nsAReadable...|, and then remove them as soon as possible. */ PRBool IsUnicode() const { return PR_FALSE; } // ...but note specialization for |PRUnichar|, below const char* GetBuffer() const { return 0; } const PRUnichar* GetUnicode() const { return 0; } // ...but note specializations for |char| and |PRUnichar|, below CharT CharAt( PRUint32 ) const; CharT operator[]( PRUint32 ) const; CharT First() const; CharT Last() const; PRUint32 CountChar( CharT ) const; PRUint32 Left( basic_nsAWritableString&, PRUint32 ) const; PRUint32 Mid( basic_nsAWritableString&, PRUint32, PRUint32 ) const; PRUint32 Right( basic_nsAWritableString&, PRUint32 ) const; // Find( ... ) const; // FindChar( ... ) const; // FindCharInSet( ... ) const; // RFind( ... ) const; // RFindChar( ... ) const; // RFindCharInSet( ... ) const; int Compare( const basic_nsAReadableString& rhs ) const; int Compare( const basic_nsLiteralString& rhs ) const; // |Equals()| is a synonym for |Compare()| PRBool Equals( const basic_nsAReadableString& rhs ) const { return Compare(rhs) == 0; } PRBool Equals( const basic_nsLiteralString& rhs ) const { return Compare(rhs) == 0; } /* Shouldn't be implemented because they're i18n sensitive. Let's leave them in |nsString| for now. */ // ToLowerCase // ToUpperCase // EqualsIgnoreCase // IsASCII // IsSpace // IsAlpha // IsDigit // ToFloat // ToInteger // char* ToNewCString() const; // char* ToNewUTF8String() const; // PRUnichar* ToNewUnicode() const; // char* ToCString( char*, PRUint32, PRUint32 ) const; /* Shouldn't be implemented because it's wrong duplication. Let's leave it in |nsString| for now. */ // nsString* ToNewString() const; // NO! The right way to say this is |new nsString( fromAReadableString )| /* Shouldn't be implemented because they're not generally applicable. Let's leave them in |nsString| for now. */ // IsOrdered // BinarySearch // Comparison operators are all synonyms for |Compare()| PRBool operator!=( const basic_nsAReadableString& rhs ) const { return Compare(rhs)!=0; } PRBool operator< ( const basic_nsAReadableString& rhs ) const { return Compare(rhs)< 0; } PRBool operator<=( const basic_nsAReadableString& rhs ) const { return Compare(rhs)<=0; } PRBool operator==( const basic_nsAReadableString& rhs ) const { return Compare(rhs)==0; } PRBool operator>=( const basic_nsAReadableString& rhs ) const { return Compare(rhs)>=0; } PRBool operator> ( const basic_nsAReadableString& rhs ) const { return Compare(rhs)> 0; } }; #define NS_DEF_1_STRING_COMPARISON_OPERATOR(comp, T1, T2) \ template \ inline \ PRBool \ operator comp( T1 lhs, T2 rhs ) \ { \ return PRBool(Compare(lhs, rhs) comp 0); \ } #define NS_DEF_STRING_COMPARISON_OPERATORS(T1, T2) \ NS_DEF_1_STRING_COMPARISON_OPERATOR(!=, T1, T2) \ NS_DEF_1_STRING_COMPARISON_OPERATOR(< , T1, T2) \ NS_DEF_1_STRING_COMPARISON_OPERATOR(<=, T1, T2) \ NS_DEF_1_STRING_COMPARISON_OPERATOR(==, T1, T2) \ NS_DEF_1_STRING_COMPARISON_OPERATOR(>=, T1, T2) \ NS_DEF_1_STRING_COMPARISON_OPERATOR(> , T1, T2) #define NS_DEF_STRING_COMPARISONS(T) \ NS_DEF_STRING_COMPARISON_OPERATORS(const T&, const CharT*) \ NS_DEF_STRING_COMPARISON_OPERATORS(const CharT*, const T&) NS_DEF_STRING_COMPARISONS(basic_nsAReadableString) NS_SPECIALIZE_TEMPLATE inline PRBool basic_nsAReadableString::IsUnicode() const { return PR_TRUE; } NS_SPECIALIZE_TEMPLATE inline const char* basic_nsAReadableString::GetBuffer() const // DEPRECATED: use the iterators instead { ConstFragment fragment; GetConstFragment(fragment, kFirstFragment); return fragment.mStart; } NS_SPECIALIZE_TEMPLATE inline const PRUnichar* basic_nsAReadableString::GetUnicode() const // DEPRECATED: use the iterators instead { ConstFragment fragment; GetConstFragment(fragment, kFirstFragment); return fragment.mStart; } template const char* basic_nsAReadableString::Implementation() const { return 0; } /* Note: the following four functions, |CharAt|, |operator[]|, |First|, and |Last|, are implemented in the simplest reasonable scheme; by calling |GetConstFragment| and resolving the pointer it returns. The alternative is to force at least one of these methods to be |virtual|. The ideal candidate for that change would be |CharAt|. This is something to measure in the context of how string classes are actually used. In practice, do people extract a character at a time in performance critical places? If so, can they use iterators instead? If they must extract single characters, _and_ they can't use iterators, _and_ it happens enough to notice, then we'll take the hit and make |CharAt| virtual. */ template inline CharT basic_nsAReadableString::CharAt( PRUint32 aIndex ) const { // ??? Is |CharAt()| supposed to be the 'safe' version? ConstFragment fragment; return *GetConstFragment(fragment, kFragmentAt, aIndex); } template inline CharT basic_nsAReadableString::operator[]( PRUint32 aIndex ) const { return CharAt(aIndex); } template inline CharT basic_nsAReadableString::First() const { return CharAt(0); } template inline CharT basic_nsAReadableString::Last() const { return CharAt(Length()-1); } template PRUint32 basic_nsAReadableString::CountChar( CharT c ) const { return PRUint32(count(Begin(), End(), c)); } /* Note: |Left()|, |Mid()|, and |Right()| could be modified to notice when they degenerate into copying the entire string, and call |Assign()| instead. This would be a win when the underlying implementation of both strings could do buffer sharing. This is _definitely_ something that should be measured before being implemented. */ template PRUint32 basic_nsAReadableString::Left( basic_nsAWritableString& aResult, PRUint32 aLengthToCopy ) const { aResult = Substring(*this, 0, aLengthToCopy); return aResult.Length(); } template PRUint32 basic_nsAReadableString::Mid( basic_nsAWritableString& aResult, PRUint32 aStartPos, PRUint32 aLengthToCopy ) const { aResult = Substring(*this, aStartPos, aLengthToCopy); return aResult.Length(); } template PRUint32 basic_nsAReadableString::Right( basic_nsAWritableString& aResult, PRUint32 aLengthToCopy ) const { PRUint32 myLength = Length(); aLengthToCopy = min(myLength, aLengthToCopy); aResult = Substring(*this, myLength-aLengthToCopy, aLengthToCopy); return aResult.Length(); } template inline int basic_nsAReadableString::Compare( const basic_nsAReadableString& rhs ) const { return ::Compare(*this, rhs); } template inline int basic_nsAReadableString::Compare( const basic_nsLiteralString& rhs ) const { return ::Compare(*this, rhs); } // // nsLiteral[C]String // template class basic_nsLiteralString : public basic_nsAReadableString /* ...this class wraps a constant literal string and lets it act like an |nsAReadable...|. Use it like this: SomeFunctionTakingACString( nsLiteralCString("Hello, World!") ); With some tweaking, I think I can make this work as well... SomeStringFunc( nsLiteralString( L"Hello, World!" ) ); This class just holds a pointer. If you don't supply the length, it must calculate it. No copying or allocations are performed. |const basic_nsLiteralString&| appears frequently in interfaces because it allows the automatic conversion of a |CharT*|. */ { typedef typename basic_nsAReadableString::FragmentRequest FragmentRequest; typedef typename basic_nsAWritableString::ConstFragment ConstFragment; protected: virtual const CharT* GetConstFragment( ConstFragment&, FragmentRequest, PRUint32 ) const; public: // Note: _not_ explicit basic_nsLiteralString( const CharT* aLiteral ) : mStart(aLiteral), mEnd(mStart + char_traits::length(mStart)) { // nothing else to do here } basic_nsLiteralString( const CharT* aLiteral, PRUint32 aLength ) : mStart(aLiteral) mEnd(mStart + aLength) { // nothing else to do here } virtual PRUint32 Length() const; private: const CharT* mStart; const CharT* mEnd; }; NS_DEF_STRING_COMPARISONS(basic_nsLiteralString) template const CharT* basic_nsLiteralString::GetConstFragment( ConstFragment& aFragment, FragmentRequest aRequest, PRUint32 aOffset ) const { switch ( aRequest ) { case kFirstFragment: case kLastFragment: case kFragmentAt: aFragment.mStart = mStart; aFragment.mEnd = mEnd; return mStart + aOffset; case kPrevFragment: case kNextFragment: default: return 0; } } template PRUint32 basic_nsLiteralString::Length() const { return PRUint32(mEnd - mStart); } // // nsPromiseConcatenation // template class nsPromiseConcatenation : public basic_nsAReadableString /* NOT FOR USE BY HUMANS Instances of this class only exist as anonymous temporary results from |operator+()|. This is the machinery that makes string concatenation efficient. No allocations or character copies are required unless and until a final assignment is made. It works its magic by overriding and forwarding calls to |GetConstFragment()|. Note: |nsPromiseConcatenation| imposes some limits on string concatenation with |operator+()|. - no more than 33 strings, e.g., |s1 + s2 + s3 + ... s32 + s33| - left to right evaluation is required ... do not use parentheses to override this In practice, neither of these is onerous. Parentheses do not change the semantics of the concatenation, only the order in which the result is assembled ... so there's no reason for a user to need to control it. Too many strings summed together can easily be worked around with an intermediate assignment. I wouldn't have the parentheses limitation if I assigned the identifier mask starting at the top, the first time anybody called |GetConstFragment()|. */ { typedef typename basic_nsAReadableString::FragmentRequest FragmentRequest; typedef typename basic_nsAWritableString::ConstFragment ConstFragment; protected: virtual const CharT* GetConstFragment( ConstFragment&, FragmentRequest, PRUint32 ) const; static const int kLeftString = 0; static const int kRightString = 1; int GetCurrentStringFromFragment( const ConstFragment& aFragment ) const { return (aFragment.mFragmentIdentifier & mFragmentIdentifierMask) ? kRightString : kLeftString; } int SetLeftStringInFragment( ConstFragment& aFragment ) const { aFragment.mFragmentIdentifier &= ~mFragmentIdentifierMask; return kLeftString; } int SetRightStringInFragment( ConstFragment& aFragment ) const { aFragment.mFragmentIdentifier |= mFragmentIdentifierMask; return kRightString; } public: nsPromiseConcatenation( const basic_nsAReadableString& aLeftString, const basic_nsAReadableString& aRightString, PRUint32 aMask = 1 ) : mFragmentIdentifierMask(aMask) { mStrings[kLeftString] = &aLeftString; mStrings[kRightString] = &aRightString; } virtual PRUint32 Length() const; nsPromiseConcatenation operator+( const basic_nsAReadableString& rhs ) const; private: void operator+( const nsPromiseConcatenation& ); // NOT TO BE IMPLEMENTED // making this |private| stops you from over parenthesizing concatenation expressions, e.g., |(A+B) + (C+D)| // which would break the algorithm for distributing bits in the fragment identifier private: const basic_nsAReadableString* mStrings[2]; PRUint32 mFragmentIdentifierMask; }; NS_DEF_STRING_COMPARISONS(nsPromiseConcatenation) template PRUint32 nsPromiseConcatenation::Length() const { return mStrings[kLeftString]->Length() + mStrings[kRightString]->Length(); } template const CharT* nsPromiseConcatenation::GetConstFragment( ConstFragment& aFragment, FragmentRequest aRequest, PRUint32 aPosition ) const { const int kLeftString = 0; const int kRightString = 1; int whichString; // based on the request, pick which string we will forward the |GetConstFragment()| call into switch ( aRequest ) { case kPrevFragment: case kNextFragment: whichString = GetCurrentStringFromFragment(aFragment); break; case kFirstFragment: whichString = SetLeftStringInFragment(aFragment); break; case kLastFragment: whichString = SetRightStringInFragment(aFragment); break; case kFragmentAt: PRUint32 leftLength = mStrings[kLeftString]->Length(); if ( aPosition < leftLength ) whichString = SetLeftStringInFragment(aFragment); else { whichString = SetRightStringInFragment(aFragment); aPosition -= leftLength; } break; } const CharT* result; bool done; do { done = true; result = mStrings[whichString]->GetConstFragment(aFragment, aRequest, aPosition); if ( !result ) { done = false; if ( aRequest == kNextFragment && whichString == kLeftString ) { aRequest = kFirstFragment; whichString = SetRightStringInFragment(aFragment); } else if ( aRequest == kPrevFragment && whichString == kRightString ) { aRequest = kLastFragment; whichString = SetLeftStringInFragment(aFragment); } else done = true; } } while ( !done ); return result; } template nsPromiseConcatenation nsPromiseConcatenation::operator+( const basic_nsAReadableString& rhs ) const { return nsPromiseConcatenation(*this, rhs, mFragmentIdentifierMask<<1); } // // nsPromiseSubstring // template class nsPromiseSubstring : public basic_nsAReadableString /* NOT FOR USE BY HUMANS (mostly) ...not unlike |nsPromiseConcatenation|. Instances of this class exist only as anonymous temporary results from |Substring()|. Like |nsPromiseConcatenation|, this class only holds a pointer, no string data of its own. It does its magic by overriding and forwarding calls to |GetConstFragment()|. */ { typedef typename basic_nsAReadableString::FragmentRequest FragmentRequest; typedef typename basic_nsAWritableString::ConstFragment ConstFragment; protected: virtual const CharT* GetConstFragment( ConstFragment&, FragmentRequest, PRUint32 ) const; public: nsPromiseSubstring( const basic_nsAReadableString& aString, PRUint32 aStartPos, PRUint32 aLength ) : mString(aString), mStartPos( min(aStartPos, aString.Length()) ), mLength( min(aLength, aString.Length()-mStartPos) ) { // nothing else to do here } virtual PRUint32 Length() const; private: const basic_nsAReadableString& mString; PRUint32 mStartPos; PRUint32 mLength; }; NS_DEF_STRING_COMPARISONS(nsPromiseSubstring) template PRUint32 nsPromiseSubstring::Length() const { return mLength; } template const CharT* nsPromiseSubstring::GetConstFragment( ConstFragment& aFragment, FragmentRequest aRequest, PRUint32 aPosition ) const { // Offset any request for a specific position (First, Last, At) by our // substrings startpos within the owning string if ( aRequest == kFirstFragment ) { aPosition = mStartPos; aRequest = kFragmentAt; } else if ( aRequest == kLastFragment ) { aPosition = mStartPos + mLength; aRequest = kFragmentAt; } else if ( aRequest == kFragmentAt ) aPosition += mStartPos; return mString.GetConstFragment(aFragment, aRequest, aPosition); } // // Global functions // template nsPromiseSubstring Substring( const basic_nsAReadableString& aString, PRUint32 aStartPos, PRUint32 aSubstringLength ) { return nsPromiseSubstring(aString, aStartPos, aSubstringLength); } template int Compare( const basic_nsAReadableString& lhs, const basic_nsAReadableString& rhs ) { /* If this turns out to be too slow (after measurement), there are two important modifications 1) chunky iterators 2) and then possibly use |char_traits::compare| */ PRUint32 lLength = lhs.Length(); PRUint32 rLength = rhs.Length(); PRUint32 lengthToCompare = min(lLength, rLength); typedef typename basic_nsAReadableString::ConstIterator ConstIterator; ConstIterator lPos = lhs.Begin(); ConstIterator lEnd = lhs.Begin(lengthToCompare); ConstIterator rPos = rhs.Begin(); while ( lPos != lEnd ) { if ( *lPos < *rPos ) return -1; if ( *rPos < *lPos ) return 1; ++lPos; ++rPos; } if ( lLength < rLength ) return -1; else if ( rLength < lLength ) return 1; else return 0; } template inline int Compare( const basic_nsAReadableString& lhs, const CharT* rhs ) { return Compare(lhs, basic_nsLiteralString(rhs)); } template inline int Compare( const CharT* lhs, const basic_nsAReadableString& rhs ) { return Compare(basic_nsLiteralString(lhs), rhs); } /* How shall we provide |operator+()|? What would it return? It has to return a stack based object, because the client will not be given an opportunity to handle memory management in an expression like myWritableString = stringA + stringB + stringC; ...so the `obvious' answer of returning a new |nsSharedString| is no good. We could return an |nsString|, if that name were in scope here, though there's no telling what the client will really want to do with the result. What might be better, though, is to return a `promise' to concatenate some strings... By making |nsPromiseConcatenation| inherit from readable strings, we automatically handle assignment and other interesting uses within writable strings, plus we drastically reduce the number of cases we have to write |operator+()| for. The cost is extra temporary concat strings in the evaluation of strings of '+'s, e.g., |A + B + C + D|, and that we have to do some work to implement the virtual functions of readables. */ template nsPromiseConcatenation operator+( const basic_nsAReadableString& lhs, const basic_nsAReadableString& rhs ) { return nsPromiseConcatenation(lhs, rhs); } template nsPromiseConcatenation operator+( const basic_nsAReadableString& lhs, const basic_nsLiteralString& rhs ) { return nsPromiseConcatenation(lhs, rhs); } template nsPromiseConcatenation operator+( const basic_nsLiteralString& lhs, const basic_nsAReadableString& rhs ) { return nsPromiseConcatenation(lhs, rhs); } template nsPromiseConcatenation operator+( const basic_nsLiteralString& lhs, const basic_nsLiteralString& rhs ) { return nsPromiseConcatenation(lhs, rhs); } template basic_ostream& operator<<( basic_ostream& os, const basic_nsAReadableString& s ) { std::copy(s.Begin(), s.End(), ostream_iterator(os)); return os; } typedef basic_nsAReadableString nsAReadableString; typedef basic_nsAReadableString nsAReadableCString; typedef basic_nsLiteralString nsLiteralString; typedef basic_nsLiteralString nsLiteralCString; #endif // !defined(_nsAReadableString_h__)