зеркало из https://github.com/mozilla/gecko-dev.git
189 строки
5.2 KiB
C++
189 строки
5.2 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
/**
|
|
* MODULE NOTES:
|
|
* @update gess 4/1/98
|
|
*
|
|
* The scanner is a low-level service class that knows
|
|
* how to consume characters out of an (internal) stream.
|
|
* This class also offers a series of utility methods
|
|
* that most tokenizers want, such as readUntil()
|
|
* and SkipWhitespace().
|
|
*/
|
|
|
|
#ifndef SCANNER
|
|
#define SCANNER
|
|
|
|
#include "nsCharsetSource.h"
|
|
#include "nsCOMPtr.h"
|
|
#include "nsString.h"
|
|
#include "nsIParser.h"
|
|
#include "mozilla/Encoding.h"
|
|
#include "nsScannerString.h"
|
|
#include "mozilla/CheckedInt.h"
|
|
|
|
class nsReadEndCondition {
|
|
public:
|
|
const char16_t* mChars;
|
|
char16_t mFilter;
|
|
explicit nsReadEndCondition(const char16_t* aTerminateChars);
|
|
|
|
private:
|
|
nsReadEndCondition(const nsReadEndCondition& aOther); // No copying
|
|
void operator=(const nsReadEndCondition& aOther); // No assigning
|
|
};
|
|
|
|
class nsScanner final {
|
|
using Encoding = mozilla::Encoding;
|
|
template <typename T>
|
|
using NotNull = mozilla::NotNull<T>;
|
|
|
|
public:
|
|
/**
|
|
* Use this constructor for the XML fragment parsing case
|
|
*/
|
|
nsScanner(const nsAString& anHTMLString, bool aIncremental);
|
|
|
|
/**
|
|
* Use this constructor if you want i/o to be based on
|
|
* a file (therefore a stream) or just data you provide via Append().
|
|
*/
|
|
explicit nsScanner(nsIURI* aURI);
|
|
|
|
~nsScanner();
|
|
|
|
/**
|
|
* retrieve next char from internal input stream
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param ch is the char to accept new value
|
|
* @return error code reflecting read status
|
|
*/
|
|
nsresult GetChar(char16_t& ch);
|
|
|
|
/**
|
|
* Records current offset position in input stream. This allows us
|
|
* to back up to this point if the need should arise, such as when
|
|
* tokenization gets interrupted.
|
|
*
|
|
* @update gess 5/12/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
int32_t Mark(void);
|
|
|
|
/**
|
|
* Resets current offset position of input stream to marked position.
|
|
* This allows us to back up to this point if the need should arise,
|
|
* such as when tokenization gets interrupted.
|
|
* NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
|
|
*
|
|
* @update gess 5/12/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
void RewindToMark(void);
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update harishd 01/12/99
|
|
* @param
|
|
* @return
|
|
*/
|
|
bool UngetReadable(const nsAString& aBuffer);
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 5/13/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsresult Append(const nsAString& aBuffer);
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 5/21/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsresult Append(const char* aBuffer, uint32_t aLen);
|
|
|
|
/**
|
|
* Call this to copy bytes out of the scanner that have not yet been consumed
|
|
* by the tokenization process.
|
|
*
|
|
* @update gess 5/12/98
|
|
* @param aCopyBuffer is where the scanner buffer will be copied to
|
|
* @return true if OK or false on OOM
|
|
*/
|
|
bool CopyUnusedData(nsString& aCopyBuffer);
|
|
|
|
/**
|
|
* Retrieve the URI of the file that the scanner is reading from.
|
|
* In some cases, it's just a given name, because the scanner isn't
|
|
* really reading from a file.
|
|
*/
|
|
nsIURI* GetURI(void) const { return mURI; }
|
|
|
|
static void SelfTest();
|
|
|
|
/**
|
|
* Use this setter to change the scanner's unicode decoder
|
|
*
|
|
* @update ftang 3/02/99
|
|
* @param aCharset a normalized (alias resolved) charset name
|
|
* @param aCharsetSource- where the charset info came from
|
|
* @return
|
|
*/
|
|
nsresult SetDocumentCharset(NotNull<const Encoding*> aEncoding,
|
|
int32_t aSource);
|
|
|
|
void BindSubstring(nsScannerSubstring& aSubstring,
|
|
const nsScannerIterator& aStart,
|
|
const nsScannerIterator& aEnd);
|
|
void CurrentPosition(nsScannerIterator& aPosition);
|
|
void EndReading(nsScannerIterator& aPosition);
|
|
void SetPosition(nsScannerIterator& aPosition, bool aTruncate = false);
|
|
|
|
/**
|
|
* Internal method used to cause the internal buffer to
|
|
* be filled with data.
|
|
*
|
|
* @update gess4/3/98
|
|
*/
|
|
bool IsIncremental(void) { return mIncremental; }
|
|
void SetIncremental(bool anIncrValue) { mIncremental = anIncrValue; }
|
|
|
|
protected:
|
|
void AppendToBuffer(nsScannerString::Buffer* aBuffer);
|
|
bool AppendToBuffer(const nsAString& aStr) {
|
|
nsScannerString::Buffer* buf = nsScannerString::AllocBufferFromString(aStr);
|
|
if (!buf) return false;
|
|
AppendToBuffer(buf);
|
|
return true;
|
|
}
|
|
|
|
mozilla::UniquePtr<nsScannerString> mSlidingBuffer;
|
|
nsScannerIterator mCurrentPosition; // The position we will next read from in
|
|
// the scanner buffer
|
|
nsScannerIterator
|
|
mMarkPosition; // The position last marked (we may rewind to here)
|
|
nsScannerIterator mEndPosition; // The current end of the scanner buffer
|
|
nsCOMPtr<nsIURI> mURI;
|
|
bool mIncremental;
|
|
int32_t mCharsetSource = kCharsetUninitialized;
|
|
nsCString mCharset;
|
|
mozilla::UniquePtr<mozilla::Decoder> mUnicodeDecoder;
|
|
|
|
private:
|
|
nsScanner& operator=(const nsScanner&); // Not implemented.
|
|
};
|
|
|
|
#endif
|