1998-04-14 00:24:54 +04:00
|
|
|
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
2012-05-21 15:12:37 +04:00
|
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
1998-04-14 00:24:54 +04:00
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* MODULE NOTES:
|
|
|
|
* @update gess 4/1/98
|
|
|
|
*
|
|
|
|
* The scanner is a low-level service class that knows
|
|
|
|
* how to consume characters out of an (internal) stream.
|
|
|
|
* This class also offers a series of utility methods
|
2007-06-27 04:21:47 +04:00
|
|
|
* that most tokenizers want, such as readUntil()
|
|
|
|
* and SkipWhitespace().
|
1998-04-14 00:24:54 +04:00
|
|
|
*/
|
|
|
|
|
1998-05-07 11:19:47 +04:00
|
|
|
|
1998-04-14 00:24:54 +04:00
|
|
|
#ifndef SCANNER
|
|
|
|
#define SCANNER
|
|
|
|
|
2003-03-15 04:04:32 +03:00
|
|
|
#include "nsCOMPtr.h"
|
1998-04-14 00:24:54 +04:00
|
|
|
#include "nsString.h"
|
1999-01-09 04:09:02 +03:00
|
|
|
#include "nsIParser.h"
|
1999-02-01 21:23:31 +03:00
|
|
|
#include "nsIUnicodeDecoder.h"
|
2004-02-19 05:44:03 +03:00
|
|
|
#include "nsScannerString.h"
|
2000-12-13 00:58:14 +03:00
|
|
|
|
2001-08-16 09:24:17 +04:00
|
|
|
class nsReadEndCondition {
|
|
|
|
public:
|
2014-01-04 19:02:17 +04:00
|
|
|
const char16_t *mChars;
|
|
|
|
char16_t mFilter;
|
|
|
|
explicit nsReadEndCondition(const char16_t* aTerminateChars);
|
2001-08-16 09:24:17 +04:00
|
|
|
private:
|
|
|
|
nsReadEndCondition(const nsReadEndCondition& aOther); // No copying
|
|
|
|
void operator=(const nsReadEndCondition& aOther); // No assigning
|
|
|
|
};
|
|
|
|
|
1999-01-09 04:09:02 +03:00
|
|
|
class nsScanner {
|
1998-04-14 00:24:54 +04:00
|
|
|
public:
|
1998-05-15 02:19:08 +04:00
|
|
|
|
1998-07-25 01:57:43 +04:00
|
|
|
/**
|
2012-11-06 15:57:51 +04:00
|
|
|
* Use this constructor for the XML fragment parsing case
|
1998-07-25 01:57:43 +04:00
|
|
|
*/
|
2014-09-02 02:04:52 +04:00
|
|
|
explicit nsScanner(const nsAString& anHTMLString);
|
1998-07-25 01:57:43 +04:00
|
|
|
|
1998-05-15 02:19:08 +04:00
|
|
|
/**
|
1998-07-14 01:13:09 +04:00
|
|
|
* Use this constructor if you want i/o to be based on
|
|
|
|
* a file (therefore a stream) or just data you provide via Append().
|
1998-05-15 02:19:08 +04:00
|
|
|
*/
|
2012-11-06 15:57:51 +04:00
|
|
|
nsScanner(nsString& aFilename, bool aCreateStream);
|
1998-05-15 02:19:08 +04:00
|
|
|
|
1999-01-09 04:09:02 +03:00
|
|
|
~nsScanner();
|
1998-05-07 11:19:47 +04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* retrieve next char from internal input stream
|
|
|
|
*
|
|
|
|
* @update gess 3/25/98
|
|
|
|
* @param ch is the char to accept new value
|
|
|
|
* @return error code reflecting read status
|
|
|
|
*/
|
2014-01-04 19:02:17 +04:00
|
|
|
nsresult GetChar(char16_t& ch);
|
1998-05-07 11:19:47 +04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* peek ahead to consume next char from scanner's internal
|
|
|
|
* input buffer
|
|
|
|
*
|
|
|
|
* @update gess 3/25/98
|
|
|
|
* @param ch is the char to accept new value
|
|
|
|
* @return error code reflecting read status
|
|
|
|
*/
|
2014-01-04 19:02:17 +04:00
|
|
|
nsresult Peek(char16_t& ch, uint32_t aOffset=0);
|
1998-05-07 11:19:47 +04:00
|
|
|
|
2012-08-22 19:56:38 +04:00
|
|
|
nsresult Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset = 0);
|
1998-05-07 11:19:47 +04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Skip over chars as long as they equal given char
|
|
|
|
*
|
|
|
|
* @update gess 3/25/98
|
|
|
|
* @param char to be skipped
|
|
|
|
* @return error code
|
|
|
|
*/
|
2014-01-04 19:02:17 +04:00
|
|
|
nsresult SkipOver(char16_t aSkipChar);
|
1998-05-07 11:19:47 +04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Skip whitespace on scanner input stream
|
|
|
|
*
|
|
|
|
* @update gess 3/25/98
|
|
|
|
* @return error status
|
|
|
|
*/
|
2012-08-22 19:56:38 +04:00
|
|
|
nsresult SkipWhitespace(int32_t& aNewlinesSkipped);
|
1998-05-07 11:19:47 +04:00
|
|
|
|
1999-09-30 08:04:53 +04:00
|
|
|
/**
|
2004-05-02 15:16:26 +04:00
|
|
|
* Consume characters until you run into space, a '<', a '>', or a '/'.
|
1999-09-30 08:04:53 +04:00
|
|
|
*
|
2004-05-02 15:16:26 +04:00
|
|
|
* @param aString - receives new data from stream
|
|
|
|
* @return error code
|
|
|
|
*/
|
2004-11-25 10:03:20 +03:00
|
|
|
nsresult ReadTagIdentifier(nsScannerSharedSubstring& aString);
|
2004-05-02 15:16:26 +04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Consume characters until you run into a char that's not valid in an
|
|
|
|
* entity name
|
|
|
|
*
|
|
|
|
* @param aString - receives new data from stream
|
1999-09-30 08:04:53 +04:00
|
|
|
* @return error code
|
|
|
|
*/
|
2004-05-02 15:16:26 +04:00
|
|
|
nsresult ReadEntityIdentifier(nsString& aString);
|
2012-08-22 19:56:38 +04:00
|
|
|
nsresult ReadNumber(nsString& aString,int32_t aBase);
|
2004-11-25 10:03:20 +03:00
|
|
|
nsresult ReadWhitespace(nsScannerSharedSubstring& aString,
|
2012-08-22 19:56:38 +04:00
|
|
|
int32_t& aNewlinesSkipped,
|
2011-09-29 10:19:26 +04:00
|
|
|
bool& aHaveCR);
|
2004-02-19 05:44:03 +03:00
|
|
|
nsresult ReadWhitespace(nsScannerIterator& aStart,
|
|
|
|
nsScannerIterator& aEnd,
|
2012-08-22 19:56:38 +04:00
|
|
|
int32_t& aNewlinesSkipped);
|
1999-09-30 08:04:53 +04:00
|
|
|
|
1998-05-07 11:19:47 +04:00
|
|
|
/**
|
|
|
|
* Consume characters until you find the terminal char
|
|
|
|
*
|
|
|
|
* @update gess 3/25/98
|
|
|
|
* @param aString receives new data from stream
|
|
|
|
* @param aTerminal contains terminating char
|
|
|
|
* @param addTerminal tells us whether to append terminal to aString
|
|
|
|
* @return error code
|
|
|
|
*/
|
2002-03-24 03:16:18 +03:00
|
|
|
nsresult ReadUntil(nsAString& aString,
|
2014-01-04 19:02:17 +04:00
|
|
|
char16_t aTerminal,
|
2011-09-29 10:19:26 +04:00
|
|
|
bool addTerminal);
|
1998-05-07 11:19:47 +04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Consume characters until you find one contained in given
|
|
|
|
* terminal set.
|
|
|
|
*
|
|
|
|
* @update gess 3/25/98
|
|
|
|
* @param aString receives new data from stream
|
|
|
|
* @param aTermSet contains set of terminating chars
|
|
|
|
* @param addTerminal tells us whether to append terminal to aString
|
|
|
|
* @return error code
|
|
|
|
*/
|
2002-03-24 03:16:18 +03:00
|
|
|
nsresult ReadUntil(nsAString& aString,
|
2001-08-16 09:24:17 +04:00
|
|
|
const nsReadEndCondition& aEndCondition,
|
2011-09-29 10:19:26 +04:00
|
|
|
bool addTerminal);
|
2001-04-11 06:28:17 +04:00
|
|
|
|
2004-11-25 10:03:20 +03:00
|
|
|
nsresult ReadUntil(nsScannerSharedSubstring& aString,
|
|
|
|
const nsReadEndCondition& aEndCondition,
|
2011-09-29 10:19:26 +04:00
|
|
|
bool addTerminal);
|
2004-11-25 10:03:20 +03:00
|
|
|
|
2004-02-19 05:44:03 +03:00
|
|
|
nsresult ReadUntil(nsScannerIterator& aStart,
|
|
|
|
nsScannerIterator& aEnd,
|
2001-08-16 09:24:17 +04:00
|
|
|
const nsReadEndCondition& aEndCondition,
|
2011-09-29 10:19:26 +04:00
|
|
|
bool addTerminal);
|
2000-12-13 00:58:14 +03:00
|
|
|
|
1998-05-15 02:19:08 +04:00
|
|
|
/**
|
|
|
|
* Records current offset position in input stream. This allows us
|
|
|
|
* to back up to this point if the need should arise, such as when
|
|
|
|
* tokenization gets interrupted.
|
|
|
|
*
|
|
|
|
* @update gess 5/12/98
|
|
|
|
* @param
|
|
|
|
* @return
|
|
|
|
*/
|
2012-08-22 19:56:38 +04:00
|
|
|
int32_t Mark(void);
|
1998-05-15 02:19:08 +04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Resets current offset position of input stream to marked position.
|
|
|
|
* This allows us to back up to this point if the need should arise,
|
|
|
|
* such as when tokenization gets interrupted.
|
|
|
|
* NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
|
|
|
|
*
|
|
|
|
* @update gess 5/12/98
|
|
|
|
* @param
|
|
|
|
* @return
|
|
|
|
*/
|
2000-12-13 00:58:14 +03:00
|
|
|
void RewindToMark(void);
|
1998-05-15 02:19:08 +04:00
|
|
|
|
|
|
|
|
2000-01-15 23:35:57 +03:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* @update harishd 01/12/99
|
|
|
|
* @param
|
|
|
|
* @return
|
|
|
|
*/
|
2011-09-29 10:19:26 +04:00
|
|
|
bool UngetReadable(const nsAString& aBuffer);
|
2000-01-15 23:35:57 +03:00
|
|
|
|
1998-05-15 02:19:08 +04:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* @update gess 5/13/98
|
|
|
|
* @param
|
|
|
|
* @return
|
|
|
|
*/
|
2002-03-24 03:16:18 +03:00
|
|
|
nsresult Append(const nsAString& aBuffer);
|
1998-05-15 02:19:08 +04:00
|
|
|
|
1998-05-22 00:38:32 +04:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* @update gess 5/21/98
|
|
|
|
* @param
|
|
|
|
* @return
|
|
|
|
*/
|
2012-08-22 19:56:38 +04:00
|
|
|
nsresult Append(const char* aBuffer, uint32_t aLen,
|
2004-11-05 09:50:27 +03:00
|
|
|
nsIRequest *aRequest);
|
1998-11-06 05:07:17 +03:00
|
|
|
|
1999-02-01 07:24:37 +03:00
|
|
|
/**
|
|
|
|
* Call this to copy bytes out of the scanner that have not yet been consumed
|
|
|
|
* by the tokenization process.
|
|
|
|
*
|
|
|
|
* @update gess 5/12/98
|
|
|
|
* @param aCopyBuffer is where the scanner buffer will be copied to
|
|
|
|
* @return nada
|
|
|
|
*/
|
|
|
|
void CopyUnusedData(nsString& aCopyBuffer);
|
|
|
|
|
1998-07-02 12:14:22 +04:00
|
|
|
/**
|
1998-07-14 01:13:09 +04:00
|
|
|
* Retrieve the name of the file that the scanner is reading from.
|
|
|
|
* In some cases, it's just a given name, because the scanner isn't
|
|
|
|
* really reading from a file.
|
1998-07-02 12:14:22 +04:00
|
|
|
*
|
|
|
|
* @update gess 5/12/98
|
|
|
|
* @return
|
|
|
|
*/
|
1998-07-14 01:13:09 +04:00
|
|
|
nsString& GetFilename(void);
|
1998-07-02 12:14:22 +04:00
|
|
|
|
1998-04-14 00:24:54 +04:00
|
|
|
static void SelfTest();
|
|
|
|
|
1999-02-01 21:23:31 +03:00
|
|
|
/**
|
1999-02-16 21:32:02 +03:00
|
|
|
* Use this setter to change the scanner's unicode decoder
|
|
|
|
*
|
1999-03-08 23:00:23 +03:00
|
|
|
* @update ftang 3/02/99
|
1999-02-16 21:32:02 +03:00
|
|
|
* @param aCharset a normalized (alias resolved) charset name
|
1999-03-08 23:00:23 +03:00
|
|
|
* @param aCharsetSource- where the charset info came from
|
1999-02-16 21:32:02 +03:00
|
|
|
* @return
|
|
|
|
*/
|
2012-08-22 19:56:38 +04:00
|
|
|
nsresult SetDocumentCharset(const nsACString& aCharset, int32_t aSource);
|
1999-02-01 21:23:31 +03:00
|
|
|
|
2004-02-19 05:44:03 +03:00
|
|
|
void BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd);
|
|
|
|
void CurrentPosition(nsScannerIterator& aPosition);
|
|
|
|
void EndReading(nsScannerIterator& aPosition);
|
|
|
|
void SetPosition(nsScannerIterator& aPosition,
|
2011-09-29 10:19:26 +04:00
|
|
|
bool aTruncate = false,
|
|
|
|
bool aReverse = false);
|
2004-02-19 05:44:03 +03:00
|
|
|
void ReplaceCharacter(nsScannerIterator& aPosition,
|
2014-01-04 19:02:17 +04:00
|
|
|
char16_t aChar);
|
2000-12-13 00:58:14 +03:00
|
|
|
|
1999-07-25 21:23:24 +04:00
|
|
|
/**
|
|
|
|
* Internal method used to cause the internal buffer to
|
|
|
|
* be filled with data.
|
|
|
|
*
|
|
|
|
* @update gess4/3/98
|
|
|
|
*/
|
2011-09-29 10:19:26 +04:00
|
|
|
bool IsIncremental(void) {return mIncremental;}
|
|
|
|
void SetIncremental(bool anIncrValue) {mIncremental=anIncrValue;}
|
1999-07-25 21:23:24 +04:00
|
|
|
|
2004-08-24 22:37:33 +04:00
|
|
|
/**
|
|
|
|
* Return the position of the first non-whitespace
|
|
|
|
* character. This is only reliable before consumers start
|
|
|
|
* reading from this scanner.
|
|
|
|
*/
|
2012-08-22 19:56:38 +04:00
|
|
|
int32_t FirstNonWhitespacePosition()
|
2004-08-24 22:37:33 +04:00
|
|
|
{
|
|
|
|
return mFirstNonWhitespacePosition;
|
|
|
|
}
|
|
|
|
|
2009-02-16 15:22:47 +03:00
|
|
|
/**
|
|
|
|
* Override replacement character used by nsIUnicodeDecoder.
|
|
|
|
* Default behavior is that it uses nsIUnicodeDecoder's mapping.
|
|
|
|
*
|
|
|
|
* @param aReplacementCharacter the replacement character
|
|
|
|
* XML (expat) parser uses 0xffff
|
|
|
|
*/
|
2014-01-04 19:02:17 +04:00
|
|
|
void OverrideReplacementCharacter(char16_t aReplacementCharacter);
|
2009-02-16 15:22:47 +03:00
|
|
|
|
2005-01-04 01:06:27 +03:00
|
|
|
protected:
|
|
|
|
|
2012-08-22 19:56:38 +04:00
|
|
|
bool AppendToBuffer(nsScannerString::Buffer *, nsIRequest *aRequest, int32_t aErrorPos = -1);
|
2011-09-29 10:19:26 +04:00
|
|
|
bool AppendToBuffer(const nsAString& aStr)
|
2004-11-05 09:50:27 +03:00
|
|
|
{
|
2008-07-14 17:05:15 +04:00
|
|
|
nsScannerString::Buffer* buf = nsScannerString::AllocBufferFromString(aStr);
|
|
|
|
if (!buf)
|
2011-10-17 18:59:28 +04:00
|
|
|
return false;
|
2012-07-30 18:20:58 +04:00
|
|
|
AppendToBuffer(buf, nullptr);
|
2011-10-17 18:59:28 +04:00
|
|
|
return true;
|
2004-11-05 09:50:27 +03:00
|
|
|
}
|
2000-12-13 00:58:14 +03:00
|
|
|
|
|
|
|
nsScannerString* mSlidingBuffer;
|
2004-02-19 05:44:03 +03:00
|
|
|
nsScannerIterator mCurrentPosition; // The position we will next read from in the scanner buffer
|
|
|
|
nsScannerIterator mMarkPosition; // The position last marked (we may rewind to here)
|
|
|
|
nsScannerIterator mEndPosition; // The current end of the scanner buffer
|
2009-02-16 15:22:47 +03:00
|
|
|
nsScannerIterator mFirstInvalidPosition; // The position of the first invalid character that was detected
|
1998-07-14 01:13:09 +04:00
|
|
|
nsString mFilename;
|
2012-08-22 19:56:38 +04:00
|
|
|
uint32_t mCountRemaining; // The number of bytes still to be read
|
2000-12-13 00:58:14 +03:00
|
|
|
// from the scanner buffer
|
2011-09-29 10:19:26 +04:00
|
|
|
bool mIncremental;
|
|
|
|
bool mHasInvalidCharacter;
|
2014-01-04 19:02:17 +04:00
|
|
|
char16_t mReplacementCharacter;
|
2012-08-22 19:56:38 +04:00
|
|
|
int32_t mFirstNonWhitespacePosition;
|
|
|
|
int32_t mCharsetSource;
|
2003-06-11 22:16:03 +04:00
|
|
|
nsCString mCharset;
|
2008-10-01 10:48:47 +04:00
|
|
|
nsCOMPtr<nsIUnicodeDecoder> mUnicodeDecoder;
|
|
|
|
|
|
|
|
private:
|
|
|
|
nsScanner &operator =(const nsScanner &); // Not implemented.
|
1998-04-14 00:24:54 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
#endif
|
1998-04-25 23:45:14 +04:00
|
|
|
|
|
|
|
|