gecko-dev/parser/html/nsHtml5TreeOpExecutor.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

318 строки
8.4 KiB
C
Исходник Обычный вид История

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef nsHtml5TreeOpExecutor_h
#define nsHtml5TreeOpExecutor_h
#include "nsAtom.h"
#include "nsTraceRefcnt.h"
#include "nsHtml5TreeOperation.h"
#include "nsHtml5SpeculativeLoad.h"
#include "nsTArray.h"
#include "nsContentSink.h"
#include "nsNodeInfoManager.h"
#include "nsHtml5DocumentMode.h"
#include "nsIScriptElement.h"
#include "nsIParser.h"
#include "nsAHtml5TreeOpSink.h"
#include "nsHtml5TreeOpStage.h"
#include "nsIURI.h"
#include "nsTHashSet.h"
#include "nsHashKeys.h"
#include "mozilla/LinkedList.h"
#include "nsHtml5DocumentBuilder.h"
#include "nsCharsetSource.h"
class nsHtml5Parser;
class nsHtml5StreamParser;
class nsIContent;
namespace mozilla {
namespace dom {
class Document;
}
} // namespace mozilla
class nsHtml5TreeOpExecutor final
: public nsHtml5DocumentBuilder,
public nsIContentSink,
public nsAHtml5TreeOpSink,
public mozilla::LinkedListElement<nsHtml5TreeOpExecutor> {
friend class nsHtml5FlushLoopGuard;
typedef mozilla::dom::ReferrerPolicy ReferrerPolicy;
using Encoding = mozilla::Encoding;
template <typename T>
using NotNull = mozilla::NotNull<T>;
public:
NS_DECL_ISUPPORTS_INHERITED
private:
#ifdef DEBUG_NS_HTML5_TREE_OP_EXECUTOR_FLUSH
static uint32_t sAppendBatchMaxSize;
static uint32_t sAppendBatchSlotsExamined;
static uint32_t sAppendBatchExaminations;
static uint32_t sLongestTimeOffTheEventLoop;
static uint32_t sTimesFlushLoopInterrupted;
#endif
/**
* Whether EOF needs to be suppressed
*/
bool mSuppressEOF;
bool mReadingFromStage;
nsTArray<nsHtml5TreeOperation> mOpQueue;
nsHtml5StreamParser* mStreamParser;
/**
* URLs already preloaded/preloading.
*/
nsTHashSet<nsCString> mPreloadedURLs;
nsCOMPtr<nsIURI> mSpeculationBaseURI;
nsCOMPtr<nsIURI> mViewSourceBaseURI;
/**
* Whether the parser has started
*/
bool mStarted;
nsHtml5TreeOpStage mStage;
bool mRunFlushLoopOnStack;
bool mCallContinueInterruptedParsingIfEnabled;
/**
* Whether this executor has already complained about matters related
* to character encoding declarations.
*/
bool mAlreadyComplainedAboutCharset;
/**
* Whether this executor has already complained about the tree being too
* deep.
*/
bool mAlreadyComplainedAboutDeepTree;
public:
nsHtml5TreeOpExecutor();
protected:
virtual ~nsHtml5TreeOpExecutor();
public:
// nsIContentSink
/**
* Unimplemented. For interface compat only.
*/
NS_IMETHOD WillParse() override;
/**
*
*/
NS_IMETHOD WillBuildModel(nsDTDMode aDTDMode) override;
/**
* Emits EOF.
*/
NS_IMETHOD DidBuildModel(bool aTerminated) override;
/**
* Forwards to nsContentSink
*/
NS_IMETHOD WillInterrupt() override;
/**
* Unimplemented. For interface compat only.
*/
NS_IMETHOD WillResume() override;
virtual void InitialTranslationCompleted() override;
/**
* Sets the parser.
*/
NS_IMETHOD SetParser(nsParserBase* aParser) override;
/**
* No-op for backwards compat.
*/
virtual void FlushPendingNotifications(mozilla::FlushType aType) override;
/**
* Don't call. For interface compat only.
*/
virtual void SetDocumentCharset(NotNull<const Encoding*> aEncoding) override {
MOZ_ASSERT_UNREACHABLE("No one should call this.");
}
/**
* Returns the document.
*/
virtual nsISupports* GetTarget() override;
virtual void ContinueInterruptedParsingAsync() override;
bool IsScriptExecuting() override { return IsScriptExecutingImpl(); }
// Not from interface
void SetStreamParser(nsHtml5StreamParser* aStreamParser) {
mStreamParser = aStreamParser;
}
void InitializeDocWriteParserState(nsAHtml5TreeBuilderState* aState,
int32_t aLine);
bool IsScriptEnabled();
virtual nsresult MarkAsBroken(nsresult aReason) override;
void StartLayout(bool* aInterrupted);
void PauseDocUpdate(bool* aInterrupted);
void FlushSpeculativeLoads();
void RunFlushLoop();
nsresult FlushDocumentWrite();
Bug 1701828 - meta charset rewrite. r=smaug Implements https://github.com/whatwg/html/issues/6962 . Improves performance when <meta charset> occurs in head but after the first kilobyte and aligns behavior better with WebKit and Blink. The main change is to avoid reloads when meta appears within head but after the first kilobyte. Prior to this change, Gecko reloaded in that case (in compliance with the spec!) even though WebKit and Blink did not. Differences from WebKit and Blink: * WebKit and Blink honor <meta charset> in <noscript>. This implementation does not. * WebKit and Blink look for meta as if the tree builder was unaware of foreign content. This implementation is foreign content-aware. This makes a difference for CDATA sections that contain a > before the meta as well as style and script elements within foreign content. This could happen if the CDATA section that has mysteriously been introduced around a what looks like a meta tag also contains another prior tag-looking run of text. * This implementation processes rel=preload and speculative loads that are seen before <meta charset> has been seen. WebKit and Blink instead first look for the meta and rewind before starting speculative parsing. * Unlike WebKit, if there is neither an honored meta nor syntax resembling an XML declaration, detection from content takes place (as in Blink). * Unlike Blink, if there is neither an honored meta nor syntax resembling an XML declaration, the detection from content is not dependent of network buffer boundaries. * Unlike Blink, detection from content can trigger a reload at the end of the stream if the guess made at that point differs from the first guess. (See below for the definition of the input to the first guess.) Differences from the old spec and Gecko previously: * Meta inside script and RCDATA elements is no longer honored. * Late meta is now ignored and no longer triggers a reload. * Later meta counts as early enough meta: In addition to the previous meta within the first 1024 bytes, now a meta that started within the first 1024 bytes counts as early enough. Additionally, if by then there hasn't been a template start tag and head hasn't ended, meta occurring before the earlier of the end of the head or a template start tag counts as early enough. * Meta now counts as not-late even if the encoding label has numeric character reference escapes. * Syntax resembling an XML declaration longer than a kilobyte is honored if there is no honored meta. * If there is neither an honored meta nor syntax resembling an XML declaration, the initial chardetng scan is potentially longer than before: the first 1024 bytes, the token spanning the 1024-byte boundary if there is such a token, and, if by then head hasn't ended and there hasn't been a template start tag until the end of the template start tag or the end of the token that causes head to end, ever comes first. However, if the token implying the end of the head is a text token, bytes only to the end of the previous non-text token is considered. (This definition avoids depending on network buffer boundaries.) * XML View Source now uses the code for syntax resembling an XML declaration instead of expat for extracting the internal encoding label. Reftest are added as both WPT and Gecko reftests in order to test both http: and file: URL scenarios. The Gecko tests retain the WPT <link> tags in order to use the exact same bytes. An encoding declaration has been added to a number of old tests that didn't intend to test the new speculation behavior especially in the context of https://bugzilla.mozilla.org/show_bug.cgi?id=1727750 . Differential Revision: https://phabricator.services.mozilla.com/D125808
2021-12-08 14:34:20 +03:00
void CommitToInternalEncoding();
void TakeOpsFromStage();
void MaybeSuspend();
void Start();
void SetDocumentCharsetAndSource(NotNull<const Encoding*> aEncoding,
nsCharsetSource aCharsetSource);
void UpdateCharsetSource(nsCharsetSource aCharsetSource);
void NeedsCharsetSwitchTo(NotNull<const Encoding*> aEncoding, int32_t aSource,
uint32_t aLineNumber);
void MaybeComplainAboutCharset(const char* aMsgId, bool aError,
uint32_t aLineNumber);
Bug 1701828 - meta charset rewrite. r=smaug Implements https://github.com/whatwg/html/issues/6962 . Improves performance when <meta charset> occurs in head but after the first kilobyte and aligns behavior better with WebKit and Blink. The main change is to avoid reloads when meta appears within head but after the first kilobyte. Prior to this change, Gecko reloaded in that case (in compliance with the spec!) even though WebKit and Blink did not. Differences from WebKit and Blink: * WebKit and Blink honor <meta charset> in <noscript>. This implementation does not. * WebKit and Blink look for meta as if the tree builder was unaware of foreign content. This implementation is foreign content-aware. This makes a difference for CDATA sections that contain a > before the meta as well as style and script elements within foreign content. This could happen if the CDATA section that has mysteriously been introduced around a what looks like a meta tag also contains another prior tag-looking run of text. * This implementation processes rel=preload and speculative loads that are seen before <meta charset> has been seen. WebKit and Blink instead first look for the meta and rewind before starting speculative parsing. * Unlike WebKit, if there is neither an honored meta nor syntax resembling an XML declaration, detection from content takes place (as in Blink). * Unlike Blink, if there is neither an honored meta nor syntax resembling an XML declaration, the detection from content is not dependent of network buffer boundaries. * Unlike Blink, detection from content can trigger a reload at the end of the stream if the guess made at that point differs from the first guess. (See below for the definition of the input to the first guess.) Differences from the old spec and Gecko previously: * Meta inside script and RCDATA elements is no longer honored. * Late meta is now ignored and no longer triggers a reload. * Later meta counts as early enough meta: In addition to the previous meta within the first 1024 bytes, now a meta that started within the first 1024 bytes counts as early enough. Additionally, if by then there hasn't been a template start tag and head hasn't ended, meta occurring before the earlier of the end of the head or a template start tag counts as early enough. * Meta now counts as not-late even if the encoding label has numeric character reference escapes. * Syntax resembling an XML declaration longer than a kilobyte is honored if there is no honored meta. * If there is neither an honored meta nor syntax resembling an XML declaration, the initial chardetng scan is potentially longer than before: the first 1024 bytes, the token spanning the 1024-byte boundary if there is such a token, and, if by then head hasn't ended and there hasn't been a template start tag until the end of the template start tag or the end of the token that causes head to end, ever comes first. However, if the token implying the end of the head is a text token, bytes only to the end of the previous non-text token is considered. (This definition avoids depending on network buffer boundaries.) * XML View Source now uses the code for syntax resembling an XML declaration instead of expat for extracting the internal encoding label. Reftest are added as both WPT and Gecko reftests in order to test both http: and file: URL scenarios. The Gecko tests retain the WPT <link> tags in order to use the exact same bytes. An encoding declaration has been added to a number of old tests that didn't intend to test the new speculation behavior especially in the context of https://bugzilla.mozilla.org/show_bug.cgi?id=1727750 . Differential Revision: https://phabricator.services.mozilla.com/D125808
2021-12-08 14:34:20 +03:00
void ComplainAboutBogusProtocolCharset(mozilla::dom::Document* aDoc,
bool aUnrecognized);
void MaybeComplainAboutDeepTree(uint32_t aLineNumber);
bool HasStarted() { return mStarted; }
bool IsFlushing() { return mFlushState >= eInFlush; }
#ifdef DEBUG
bool IsInFlushLoop() { return mRunFlushLoopOnStack; }
#endif
void RunScript(nsIContent* aScriptElement);
/**
* Flush the operations from the tree operations from the argument
* queue unconditionally. (This is for the main thread case.)
*/
virtual void MoveOpsFrom(nsTArray<nsHtml5TreeOperation>& aOpQueue) override;
void ClearOpQueue();
void RemoveFromStartOfOpQueue(size_t aNumberOfOpsToRemove);
inline size_t OpQueueLength() { return mOpQueue.Length(); }
nsHtml5TreeOpStage* GetStage() { return &mStage; }
void StartReadingFromStage() { mReadingFromStage = true; }
void StreamEnded();
#ifdef DEBUG
void AssertStageEmpty() { mStage.AssertEmpty(); }
#endif
nsIURI* GetViewSourceBaseURI();
void PreloadScript(const nsAString& aURL, const nsAString& aCharset,
const nsAString& aType, const nsAString& aCrossOrigin,
const nsAString& aMedia, const nsAString& aIntegrity,
ReferrerPolicy aReferrerPolicy, bool aScriptFromHead,
bool aAsync, bool aDefer, bool aNoModule,
bool aLinkPreload);
void PreloadStyle(const nsAString& aURL, const nsAString& aCharset,
const nsAString& aCrossOrigin, const nsAString& aMedia,
const nsAString& aReferrerPolicy,
const nsAString& aIntegrity, bool aLinkPreload);
void PreloadImage(const nsAString& aURL, const nsAString& aCrossOrigin,
const nsAString& aMedia, const nsAString& aSrcset,
const nsAString& aSizes,
const nsAString& aImageReferrerPolicy, bool aLinkPreload,
const mozilla::TimeStamp& aInitTimestamp);
void PreloadOpenPicture();
void PreloadEndPicture();
void PreloadPictureSource(const nsAString& aSrcset, const nsAString& aSizes,
const nsAString& aType, const nsAString& aMedia);
void PreloadFont(const nsAString& aURL, const nsAString& aCrossOrigin,
const nsAString& aMedia, const nsAString& aReferrerPolicy);
void PreloadFetch(const nsAString& aURL, const nsAString& aCrossOrigin,
const nsAString& aMedia, const nsAString& aReferrerPolicy);
void SetSpeculationBase(const nsAString& aURL);
void UpdateReferrerInfoFromMeta(const nsAString& aMetaReferrer);
void AddSpeculationCSP(const nsAString& aCSP);
void AddBase(const nsAString& aURL);
private:
nsHtml5Parser* GetParser();
bool IsExternalViewSource();
/**
* Get a nsIURI for an nsString if the URL hasn't been preloaded yet.
*/
already_AddRefed<nsIURI> ConvertIfNotPreloadedYet(const nsAString& aURL);
/**
* The above, plus also checks that the media attribute applies.
*/
already_AddRefed<nsIURI> ConvertIfNotPreloadedYetAndMediaApplies(
const nsAString& aURL, const nsAString& aMedia);
/** Returns whether the given media attribute applies to mDocument */
bool MediaApplies(const nsAString& aMedia);
/**
* The base URI we would use for current preload operations
*/
nsIURI* BaseURIForPreload();
/**
* Returns true if we haven't preloaded this URI yet, and adds it to the
* list of preloaded URIs
*/
bool ShouldPreloadURI(nsIURI* aURI);
ReferrerPolicy GetPreloadReferrerPolicy(const nsAString& aReferrerPolicy);
ReferrerPolicy GetPreloadReferrerPolicy(ReferrerPolicy aReferrerPolicy);
};
#endif // nsHtml5TreeOpExecutor_h