Backed out changeset 3dfd3c94a105 (bug 1701828) for causing mochitest failures on browser_hsts_host.js CLOSED TREE

This commit is contained in:
Norisz Fay 2021-12-07 12:05:44 +02:00
Родитель d95b521bae
Коммит 1d6984bc21
209 изменённых файлов: 2641 добавлений и 18638 удалений

4
Cargo.lock сгенерированный
Просмотреть файл

@ -5822,9 +5822,9 @@ checksum = "b07db065a5cf61a7e4ba64f29e67db906fb1787316516c4e6e5ff0fea1efcd8a"
[[package]]
name = "xmldecl"
version = "0.2.0"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efeb408acbc94f7459f1a3ee3620c108ebea5e5baf93a4641c07d57f59f5ffd1"
checksum = "e8c855a4ffa155714197d09677f21262f1d2e8d553fe5356c1752ad61f8a868c"
dependencies = [
"encoding_rs",
]

Просмотреть файл

@ -1596,6 +1596,7 @@ nsDocShell::ForceEncodingDetection() {
}
break;
case kCharsetFromXmlDeclaration:
case kCharsetFromMetaPrescan:
case kCharsetFromMetaTag:
if (isFileURL) {
LOGCHARSETMENU(("LocalLabeled"));

Просмотреть файл

@ -6798,14 +6798,11 @@ void Document::TryChannelCharset(nsIChannel* aChannel, int32_t& aCharsetSource,
if (NS_SUCCEEDED(rv)) {
const Encoding* preferred = Encoding::ForLabel(charsetVal);
if (preferred) {
if (aExecutor && preferred == REPLACEMENT_ENCODING) {
aExecutor->ComplainAboutBogusProtocolCharset(this, false);
}
aEncoding = WrapNotNull(preferred);
aCharsetSource = kCharsetFromChannel;
return;
} else if (aExecutor && !charsetVal.IsEmpty()) {
aExecutor->ComplainAboutBogusProtocolCharset(this, true);
aExecutor->ComplainAboutBogusProtocolCharset(this);
}
}
}

Просмотреть файл

@ -690,6 +690,7 @@ bool nsHTMLDocument::WillIgnoreCharsetOverride() {
case kCharsetFromFinalAutoDetectionWouldNotHaveBeenUTF8DependedOnTLD:
case kCharsetFromParentFrame:
case kCharsetFromXmlDeclaration:
case kCharsetFromMetaPrescan:
case kCharsetFromMetaTag:
case kCharsetFromChannel:
return false;

Просмотреть файл

@ -4,27 +4,16 @@
# Encoding warnings and errors
EncNoDeclarationFrame=The character encoding of a framed document was not declared. The document may appear different if viewed without the document framing it.
EncXmlDecl=The character encoding of an HTML document was declared using the XML declaration syntax. This is non-conforming, and declaring the encoding using a meta tag at the start of the head part is more efficient.
EncMetaTooLate=A meta tag attempting to declare the character encoding declaration was found too late, and the encoding was guessed from content instead. The meta tag needs to be moved to the start of the head part of the document.
EncMetaTooLateFrame=A meta tag attempting to declare the character encoding declaration was found too late, and the encoding of the parent document was used instead. The meta tag needs to be moved to the start of the head part of the document.
EncMetaAfterHeadInKilobyte=The meta tag declaring the character encoding of the document should be moved to start of the head part of the document.
EncNoDecl=The character encoding of the document was not declared, so the encoding was guessed from content. The character encoding needs to be declared in the Content-Type HTTP header, using a meta tag, or using a byte order mark.
EncNoDeclPlain=The character encoding of the document was not declared, so the encoding was guessed from content. The character encoding needs to be declared in the Content-Type HTTP header or using a byte order mark.
EncNoDeclarationPlain=The character encoding of the plain text document was not declared. The document will render with garbled text in some browser configurations if the document contains characters from outside the US-ASCII range. The character encoding of the file needs to be declared in the transfer protocol or file needs to use a byte order mark as an encoding signature.
EncNoDeclaration=The character encoding of the HTML document was not declared. The document will render with garbled text in some browser configurations if the document contains characters from outside the US-ASCII range. The character encoding of the page must be declared in the document or in the transfer protocol.
EncLateMetaFrame=The character encoding declaration of the framed HTML document was not found when prescanning the first 1024 bytes of the file. When viewed without the document framing it, the page will reload automatically. The encoding declaration needs to be moved to be within the first 1024 bytes of the file.
EncLateMeta=The character encoding declaration of the HTML document was not found when prescanning the first 1024 bytes of the file. When viewed in a differently-configured browser, this page will reload automatically. The encoding declaration needs to be moved to be within the first 1024 bytes of the file.
EncLateMetaReload=The page was reloaded, because the character encoding declaration of the HTML document was not found when prescanning the first 1024 bytes of the file. The encoding declaration needs to be moved to be within the first 1024 bytes of the file.
EncLateMetaTooLate=The character encoding declaration of document was found too late for it to take effect. The encoding declaration needs to be moved to be within the first 1024 bytes of the file.
EncMetaUnsupported=An unsupported character encoding was declared for the HTML document using a meta tag. The declaration was ignored.
EncProtocolUnsupported=An unsupported character encoding was declared on the transfer protocol level. The declaration was ignored.
EncMetaUtf16=A meta tag was used to declare the character encoding as UTF-16. This was interpreted as an UTF-8 declaration instead.
EncMetaUserDefined=A meta tag was used to declare the character encoding as x-user-defined. This was interpreted as a windows-1252 declaration instead for compatibility with intentionally mis-encoded legacy fonts. This site should migrate to Unicode.
EncMetaReplacement=A meta tag was used to declare an encoding that is a cross-site scripting hazard. The replacement encoding was used instead.
EncProtocolReplacement=An encoding that is a cross-site scripting hazard was declared on the transfer protocol level. The replacement encoding was used instead.
EncDetectorReload=The character encoding of the document was not declared, and the encoding was guessable from content only late. This caused the document to be reloaded. The character encoding needs to be declared in the Content-Type HTTP header, using a meta tag, or using a byte order mark.
EncDetectorReloadPlain=The character encoding of the document was not declared, and the encoding was guessable from content only late. This caused the document to be reloaded. The character encoding needs to be declared in the Content-Type HTTP header or using a byte order mark.
EncError=The byte stream was erroneous according to the character encoding that was declared. The character encoding declaration may be incorrect.
EncErrorFrame=The byte stream was erroneous according to the character encoding that was inherited from the parent document. The character encoding needs to be declared in the Content-Type HTTP header, using a meta tag, or using a byte order mark.
EncErrorFramePlain=The byte stream was erroneous according to the character encoding that was inherited from the parent document. The character encoding needs to be declared in the Content-Type HTTP header or using a byte order mark.
EncSpeculationFailMeta=The start of the document was reparsed, because there were non-ASCII characters before the meta tag that declared the encoding. The meta should be the first child of head without non-ASCII comments before.
EncSpeculationFailXml=The start of the document was reparsed, because there were non-ASCII characters in the part of the document that was unsuccessfully searched for a meta tag before falling back to the XML declaration syntax. A meta tag at the start of the head part should be used instead of the XML declaration syntax.
# The audience of the following message isn't the author of the document but other people debugging browser behavior.
EncSpeculationFail2022=The start of the document was reparsed, because ISO-2022-JP is an ASCII-incompatible encoding.
# The bulk of the messages below are derived from
# https://hg.mozilla.org/projects/htmlparser/file/1f633cef7de7/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java

Просмотреть файл

@ -1,7 +1,6 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Document</title>
<link rel="stylesheet" href="https://example.com/tests/dom/security/test/general/file_cache_splitting_server.sjs">
</head>

Просмотреть файл

@ -2,7 +2,6 @@
<html>
<head>
<meta charset="utf-8">
<title>Bug 1454721 - Add same-site cookie test for about:blank and about:srcdoc</title>
<script src="/tests/SimpleTest/SimpleTest.js"></script>
<script src="/tests/SimpleTest/ChromeTask.js"></script>

Просмотреть файл

@ -6,7 +6,6 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<link rel="stylesheet" type="text/css" href="http://mochi.test:8888/tests/SimpleTest/test.css?resource-timing-nocors"/>
<!--

Просмотреть файл

@ -6,7 +6,6 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<link rel="stylesheet" href="/tests/SimpleTest/test.css?performance-timeline-main-test"/>
<script type="application/javascript">

Просмотреть файл

@ -45,7 +45,6 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<link rel="stylesheet" href="/tests/SimpleTest/test.css?resource-timing-main-test"/>
<script type="application/javascript">

Просмотреть файл

@ -12,4 +12,4 @@ simd-accel = ["encoding_rs/simd-accel"]
encoding_rs = "0.8.0"
nsstring = { path = "../../xpcom/rust/nsstring" }
nserror = { path = "../../xpcom/rust/nserror" }
xmldecl = "0.2.0"
xmldecl = "0.1.1"

Просмотреть файл

@ -2585,12 +2585,6 @@ nsDocumentViewer::GetReloadEncodingAndSource(int32_t* aSource) {
NS_IMETHODIMP_(void)
nsDocumentViewer::SetReloadEncodingAndSource(const Encoding* aEncoding,
int32_t aSource) {
MOZ_ASSERT(
aSource == kCharsetUninitialized ||
(aSource >= kCharsetFromFinalAutoDetectionWouldHaveBeenUTF8 &&
aSource <=
kCharsetFromFinalAutoDetectionWouldNotHaveBeenUTF8DependedOnTLD) ||
aSource == kCharsetFromFinalUserForcedAutoDetection);
mReloadEncoding = aEncoding;
mReloadEncodingSource = aSource;
}

Просмотреть файл

@ -1,5 +1,4 @@
<!doctype html>
<meta charset="utf-8">
<script src="/tests/SimpleTest/SimpleTest.js"></script>
<link rel="stylesheet" href="/tests/SimpleTest/test.css"/>
<script src="slow_load.sjs"></script>

Просмотреть файл

@ -0,0 +1,850 @@
/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2008-2015 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.impl;
import java.io.IOException;
import org.xml.sax.SAXException;
import nu.validator.htmlparser.annotation.Auto;
import nu.validator.htmlparser.annotation.Inline;
import nu.validator.htmlparser.common.ByteReadable;
public abstract class MetaScanner {
/**
* Constant for "charset".
*/
private static final char[] CHARSET = { 'h', 'a', 'r', 's', 'e', 't' };
/**
* Constant for "content".
*/
private static final char[] CONTENT = { 'o', 'n', 't', 'e', 'n', 't' };
/**
* Constant for "http-equiv".
*/
private static final char[] HTTP_EQUIV = { 't', 't', 'p', '-', 'e', 'q',
'u', 'i', 'v' };
/**
* Constant for "content-type".
*/
private static final char[] CONTENT_TYPE = { 'c', 'o', 'n', 't', 'e', 'n',
't', '-', 't', 'y', 'p', 'e' };
private static final int NO = 0;
private static final int M = 1;
private static final int E = 2;
private static final int T = 3;
private static final int A = 4;
private static final int DATA = 0;
private static final int TAG_OPEN = 1;
private static final int SCAN_UNTIL_GT = 2;
private static final int TAG_NAME = 3;
private static final int BEFORE_ATTRIBUTE_NAME = 4;
private static final int ATTRIBUTE_NAME = 5;
private static final int AFTER_ATTRIBUTE_NAME = 6;
private static final int BEFORE_ATTRIBUTE_VALUE = 7;
private static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED = 8;
private static final int ATTRIBUTE_VALUE_SINGLE_QUOTED = 9;
private static final int ATTRIBUTE_VALUE_UNQUOTED = 10;
private static final int AFTER_ATTRIBUTE_VALUE_QUOTED = 11;
private static final int MARKUP_DECLARATION_OPEN = 13;
private static final int MARKUP_DECLARATION_HYPHEN = 14;
private static final int COMMENT_START = 15;
private static final int COMMENT_START_DASH = 16;
private static final int COMMENT = 17;
private static final int COMMENT_END_DASH = 18;
private static final int COMMENT_END = 19;
private static final int SELF_CLOSING_START_TAG = 20;
private static final int HTTP_EQUIV_NOT_SEEN = 0;
private static final int HTTP_EQUIV_CONTENT_TYPE = 1;
private static final int HTTP_EQUIV_OTHER = 2;
/**
* The data source.
*/
protected ByteReadable readable;
/**
* The state of the state machine that recognizes the tag name "meta".
*/
private int metaState = NO;
/**
* The current position in recognizing the attribute name "content".
*/
private int contentIndex = Integer.MAX_VALUE;
/**
* The current position in recognizing the attribute name "charset".
*/
private int charsetIndex = Integer.MAX_VALUE;
/**
* The current position in recognizing the attribute name "http-equive".
*/
private int httpEquivIndex = Integer.MAX_VALUE;
/**
* The current position in recognizing the attribute value "content-type".
*/
private int contentTypeIndex = Integer.MAX_VALUE;
/**
* The tokenizer state.
*/
protected int stateSave = DATA;
/**
* The currently filled length of strBuf.
*/
private int strBufLen;
/**
* Accumulation buffer for attribute values.
*/
private @Auto char[] strBuf;
private String content;
private String charset;
private int httpEquivState;
// CPPONLY: private TreeBuilder treeBuilder;
public MetaScanner(
// CPPONLY: TreeBuilder tb
) {
this.readable = null;
this.metaState = NO;
this.contentIndex = Integer.MAX_VALUE;
this.charsetIndex = Integer.MAX_VALUE;
this.httpEquivIndex = Integer.MAX_VALUE;
this.contentTypeIndex = Integer.MAX_VALUE;
this.stateSave = DATA;
this.strBufLen = 0;
this.strBuf = new char[36];
this.content = null;
this.charset = null;
this.httpEquivState = HTTP_EQUIV_NOT_SEEN;
// CPPONLY: this.treeBuilder = tb;
// CPPONLY: this.mEncoding = null;
}
@SuppressWarnings("unused") private void destructor() {
Portability.releaseString(content);
Portability.releaseString(charset);
}
// [NOCPP[
/**
* Reads a byte from the data source.
*
* -1 means end.
* @return
* @throws IOException
*/
protected int read() throws IOException {
return readable.readByte();
}
// ]NOCPP]
// WARNING When editing this, makes sure the bytecode length shown by javap
// stays under 8000 bytes!
/**
* The runs the meta scanning algorithm.
*/
protected final void stateLoop(int state)
throws SAXException, IOException {
int c = -1;
boolean reconsume = false;
stateloop: for (;;) {
switch (state) {
case DATA:
dataloop: for (;;) {
if (reconsume) {
reconsume = false;
} else {
c = read();
}
switch (c) {
case -1:
break stateloop;
case '<':
state = MetaScanner.TAG_OPEN;
break dataloop; // FALL THROUGH continue
// stateloop;
default:
continue;
}
}
// CPPONLY: MOZ_FALLTHROUGH;
case TAG_OPEN:
tagopenloop: for (;;) {
c = read();
switch (c) {
case -1:
break stateloop;
case 'm':
case 'M':
metaState = M;
state = MetaScanner.TAG_NAME;
break tagopenloop;
// continue stateloop;
case '!':
state = MetaScanner.MARKUP_DECLARATION_OPEN;
continue stateloop;
case '?':
case '/':
state = MetaScanner.SCAN_UNTIL_GT;
continue stateloop;
case '>':
state = MetaScanner.DATA;
continue stateloop;
default:
if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
metaState = NO;
state = MetaScanner.TAG_NAME;
break tagopenloop;
// continue stateloop;
}
state = MetaScanner.DATA;
reconsume = true;
continue stateloop;
}
}
// CPPONLY: MOZ_FALLTHROUGH;
case TAG_NAME:
tagnameloop: for (;;) {
c = read();
switch (c) {
case -1:
break stateloop;
case ' ':
case '\t':
case '\n':
case '\u000C':
state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
break tagnameloop;
// continue stateloop;
case '/':
state = MetaScanner.SELF_CLOSING_START_TAG;
continue stateloop;
case '>':
state = MetaScanner.DATA;
continue stateloop;
case 'e':
case 'E':
if (metaState == M) {
metaState = E;
} else {
metaState = NO;
}
continue;
case 't':
case 'T':
if (metaState == E) {
metaState = T;
} else {
metaState = NO;
}
continue;
case 'a':
case 'A':
if (metaState == T) {
metaState = A;
} else {
metaState = NO;
}
continue;
default:
metaState = NO;
continue;
}
}
// CPPONLY: MOZ_FALLTHROUGH;
case BEFORE_ATTRIBUTE_NAME:
beforeattributenameloop: for (;;) {
if (reconsume) {
reconsume = false;
} else {
c = read();
}
/*
* Consume the next input character:
*/
switch (c) {
case -1:
break stateloop;
case ' ':
case '\t':
case '\n':
case '\u000C':
continue;
case '/':
state = MetaScanner.SELF_CLOSING_START_TAG;
continue stateloop;
case '>':
if (handleTag()) {
break stateloop;
}
state = DATA;
continue stateloop;
case 'c':
case 'C':
contentIndex = 0;
charsetIndex = 0;
httpEquivIndex = Integer.MAX_VALUE;
contentTypeIndex = Integer.MAX_VALUE;
state = MetaScanner.ATTRIBUTE_NAME;
break beforeattributenameloop;
case 'h':
case 'H':
contentIndex = Integer.MAX_VALUE;
charsetIndex = Integer.MAX_VALUE;
httpEquivIndex = 0;
contentTypeIndex = Integer.MAX_VALUE;
state = MetaScanner.ATTRIBUTE_NAME;
break beforeattributenameloop;
default:
contentIndex = Integer.MAX_VALUE;
charsetIndex = Integer.MAX_VALUE;
httpEquivIndex = Integer.MAX_VALUE;
contentTypeIndex = Integer.MAX_VALUE;
state = MetaScanner.ATTRIBUTE_NAME;
break beforeattributenameloop;
// continue stateloop;
}
}
// CPPONLY: MOZ_FALLTHROUGH;
case ATTRIBUTE_NAME:
attributenameloop: for (;;) {
c = read();
switch (c) {
case -1:
break stateloop;
case ' ':
case '\t':
case '\n':
case '\u000C':
state = MetaScanner.AFTER_ATTRIBUTE_NAME;
continue stateloop;
case '/':
state = MetaScanner.SELF_CLOSING_START_TAG;
continue stateloop;
case '=':
strBufLen = 0;
contentTypeIndex = 0;
state = MetaScanner.BEFORE_ATTRIBUTE_VALUE;
break attributenameloop;
// continue stateloop;
case '>':
if (handleTag()) {
break stateloop;
}
state = MetaScanner.DATA;
continue stateloop;
default:
if (metaState == A) {
if (c >= 'A' && c <= 'Z') {
c += 0x20;
}
if (contentIndex < CONTENT.length && c == CONTENT[contentIndex]) {
++contentIndex;
} else {
contentIndex = Integer.MAX_VALUE;
}
if (charsetIndex < CHARSET.length && c == CHARSET[charsetIndex]) {
++charsetIndex;
} else {
charsetIndex = Integer.MAX_VALUE;
}
if (httpEquivIndex < HTTP_EQUIV.length && c == HTTP_EQUIV[httpEquivIndex]) {
++httpEquivIndex;
} else {
httpEquivIndex = Integer.MAX_VALUE;
}
}
continue;
}
}
// CPPONLY: MOZ_FALLTHROUGH;
case BEFORE_ATTRIBUTE_VALUE:
beforeattributevalueloop: for (;;) {
c = read();
switch (c) {
case -1:
break stateloop;
case ' ':
case '\t':
case '\n':
case '\u000C':
continue;
case '"':
state = MetaScanner.ATTRIBUTE_VALUE_DOUBLE_QUOTED;
break beforeattributevalueloop;
// continue stateloop;
case '\'':
state = MetaScanner.ATTRIBUTE_VALUE_SINGLE_QUOTED;
continue stateloop;
case '>':
if (handleTag()) {
break stateloop;
}
state = MetaScanner.DATA;
continue stateloop;
default:
handleCharInAttributeValue(c);
state = MetaScanner.ATTRIBUTE_VALUE_UNQUOTED;
continue stateloop;
}
}
// CPPONLY: MOZ_FALLTHROUGH;
case ATTRIBUTE_VALUE_DOUBLE_QUOTED:
attributevaluedoublequotedloop: for (;;) {
if (reconsume) {
reconsume = false;
} else {
c = read();
}
switch (c) {
case -1:
break stateloop;
case '"':
handleAttributeValue();
state = MetaScanner.AFTER_ATTRIBUTE_VALUE_QUOTED;
break attributevaluedoublequotedloop;
// continue stateloop;
default:
handleCharInAttributeValue(c);
continue;
}
}
// CPPONLY: MOZ_FALLTHROUGH;
case AFTER_ATTRIBUTE_VALUE_QUOTED:
afterattributevaluequotedloop: for (;;) {
c = read();
switch (c) {
case -1:
break stateloop;
case ' ':
case '\t':
case '\n':
case '\u000C':
state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
continue stateloop;
case '/':
state = MetaScanner.SELF_CLOSING_START_TAG;
break afterattributevaluequotedloop;
// continue stateloop;
case '>':
if (handleTag()) {
break stateloop;
}
state = MetaScanner.DATA;
continue stateloop;
default:
state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
reconsume = true;
continue stateloop;
}
}
// CPPONLY: MOZ_FALLTHROUGH;
case SELF_CLOSING_START_TAG:
c = read();
switch (c) {
case -1:
break stateloop;
case '>':
if (handleTag()) {
break stateloop;
}
state = MetaScanner.DATA;
continue stateloop;
default:
state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
reconsume = true;
continue stateloop;
}
case ATTRIBUTE_VALUE_UNQUOTED:
for (;;) {
if (reconsume) {
reconsume = false;
} else {
c = read();
}
switch (c) {
case -1:
break stateloop;
case ' ':
case '\t':
case '\n':
case '\u000C':
handleAttributeValue();
state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
continue stateloop;
case '>':
handleAttributeValue();
if (handleTag()) {
break stateloop;
}
state = MetaScanner.DATA;
continue stateloop;
default:
handleCharInAttributeValue(c);
continue;
}
}
case AFTER_ATTRIBUTE_NAME:
for (;;) {
c = read();
switch (c) {
case -1:
break stateloop;
case ' ':
case '\t':
case '\n':
case '\u000C':
continue;
case '/':
handleAttributeValue();
state = MetaScanner.SELF_CLOSING_START_TAG;
continue stateloop;
case '=':
strBufLen = 0;
contentTypeIndex = 0;
state = MetaScanner.BEFORE_ATTRIBUTE_VALUE;
continue stateloop;
case '>':
handleAttributeValue();
if (handleTag()) {
break stateloop;
}
state = MetaScanner.DATA;
continue stateloop;
case 'c':
case 'C':
contentIndex = 0;
charsetIndex = 0;
state = MetaScanner.ATTRIBUTE_NAME;
continue stateloop;
default:
contentIndex = Integer.MAX_VALUE;
charsetIndex = Integer.MAX_VALUE;
state = MetaScanner.ATTRIBUTE_NAME;
continue stateloop;
}
}
case MARKUP_DECLARATION_OPEN:
markupdeclarationopenloop: for (;;) {
c = read();
switch (c) {
case -1:
break stateloop;
case '-':
state = MetaScanner.MARKUP_DECLARATION_HYPHEN;
break markupdeclarationopenloop;
// continue stateloop;
default:
state = MetaScanner.SCAN_UNTIL_GT;
reconsume = true;
continue stateloop;
}
}
// CPPONLY: MOZ_FALLTHROUGH;
case MARKUP_DECLARATION_HYPHEN:
markupdeclarationhyphenloop: for (;;) {
c = read();
switch (c) {
case -1:
break stateloop;
case '-':
state = MetaScanner.COMMENT_START;
break markupdeclarationhyphenloop;
// continue stateloop;
default:
state = MetaScanner.SCAN_UNTIL_GT;
reconsume = true;
continue stateloop;
}
}
// CPPONLY: MOZ_FALLTHROUGH;
case COMMENT_START:
commentstartloop: for (;;) {
c = read();
switch (c) {
case -1:
break stateloop;
case '-':
state = MetaScanner.COMMENT_START_DASH;
continue stateloop;
case '>':
state = MetaScanner.DATA;
continue stateloop;
default:
state = MetaScanner.COMMENT;
break commentstartloop;
// continue stateloop;
}
}
// CPPONLY: MOZ_FALLTHROUGH;
case COMMENT:
commentloop: for (;;) {
c = read();
switch (c) {
case -1:
break stateloop;
case '-':
state = MetaScanner.COMMENT_END_DASH;
break commentloop;
// continue stateloop;
default:
continue;
}
}
// CPPONLY: MOZ_FALLTHROUGH;
case COMMENT_END_DASH:
commentenddashloop: for (;;) {
c = read();
switch (c) {
case -1:
break stateloop;
case '-':
state = MetaScanner.COMMENT_END;
break commentenddashloop;
// continue stateloop;
default:
state = MetaScanner.COMMENT;
continue stateloop;
}
}
// CPPONLY: MOZ_FALLTHROUGH;
case COMMENT_END:
for (;;) {
c = read();
switch (c) {
case -1:
break stateloop;
case '>':
state = MetaScanner.DATA;
continue stateloop;
case '-':
continue;
default:
state = MetaScanner.COMMENT;
continue stateloop;
}
}
case COMMENT_START_DASH:
c = read();
switch (c) {
case -1:
break stateloop;
case '-':
state = MetaScanner.COMMENT_END;
continue stateloop;
case '>':
state = MetaScanner.DATA;
continue stateloop;
default:
state = MetaScanner.COMMENT;
continue stateloop;
}
case ATTRIBUTE_VALUE_SINGLE_QUOTED:
for (;;) {
if (reconsume) {
reconsume = false;
} else {
c = read();
}
switch (c) {
case -1:
break stateloop;
case '\'':
handleAttributeValue();
state = MetaScanner.AFTER_ATTRIBUTE_VALUE_QUOTED;
continue stateloop;
default:
handleCharInAttributeValue(c);
continue;
}
}
case SCAN_UNTIL_GT:
for (;;) {
if (reconsume) {
reconsume = false;
} else {
c = read();
}
switch (c) {
case -1:
break stateloop;
case '>':
state = MetaScanner.DATA;
continue stateloop;
default:
continue;
}
}
}
}
stateSave = state;
}
private void handleCharInAttributeValue(int c) throws SAXException {
if (metaState == A) {
if (contentIndex == CONTENT.length || charsetIndex == CHARSET.length) {
addToBuffer(c);
} else if (httpEquivIndex == HTTP_EQUIV.length) {
if (contentTypeIndex < CONTENT_TYPE.length && toAsciiLowerCase(c) == CONTENT_TYPE[contentTypeIndex]) {
++contentTypeIndex;
} else {
contentTypeIndex = Integer.MAX_VALUE;
}
}
}
}
@Inline private int toAsciiLowerCase(int c) {
if (c >= 'A' && c <= 'Z') {
return c + 0x20;
}
return c;
}
/**
* Adds a character to the accumulation buffer.
* @param c the character to add
* @throws SAXException
*/
private void addToBuffer(int c) throws SAXException {
if (strBufLen == strBuf.length) {
char[] newBuf = new char[Portability.checkedAdd(strBuf.length, (strBuf.length << 1))];
System.arraycopy(strBuf, 0, newBuf, 0, strBuf.length);
strBuf = newBuf;
}
strBuf[strBufLen++] = (char)c;
}
/**
* Attempts to extract a charset name from the accumulation buffer.
* @return <code>true</code> if successful
* @throws SAXException
*/
private void handleAttributeValue() throws SAXException {
if (metaState != A) {
return;
}
if (contentIndex == CONTENT.length && content == null) {
content = Portability.newStringFromBuffer(strBuf, 0, strBufLen
// CPPONLY: , treeBuilder, false
);
return;
}
if (charsetIndex == CHARSET.length && charset == null) {
charset = Portability.newStringFromBuffer(strBuf, 0, strBufLen
// CPPONLY: , treeBuilder, false
);
return;
}
if (httpEquivIndex == HTTP_EQUIV.length
&& httpEquivState == HTTP_EQUIV_NOT_SEEN) {
httpEquivState = (contentTypeIndex == CONTENT_TYPE.length) ? HTTP_EQUIV_CONTENT_TYPE
: HTTP_EQUIV_OTHER;
return;
}
}
private boolean handleTag() throws SAXException {
boolean stop = handleTagInner();
Portability.releaseString(content);
content = null;
Portability.releaseString(charset);
charset = null;
httpEquivState = HTTP_EQUIV_NOT_SEEN;
return stop;
}
private boolean handleTagInner() throws SAXException {
if (charset != null && tryCharset(charset)) {
return true;
}
if (content != null && httpEquivState == HTTP_EQUIV_CONTENT_TYPE) {
String extract = TreeBuilder.extractCharsetFromContent(content
// CPPONLY: , treeBuilder
);
if (extract == null) {
return false;
}
boolean success = tryCharset(extract);
Portability.releaseString(extract);
return success;
}
return false;
}
/**
* Tries to switch to an encoding.
*
* @param encoding
* @return <code>true</code> if successful
* @throws SAXException
*/
protected abstract boolean tryCharset(String encoding) throws SAXException;
}

Просмотреть файл

@ -365,8 +365,6 @@ public class Tokenizer implements Locator, Locator2 {
private boolean seenDigits;
private boolean suspendAfterCurrentNonTextToken;
protected int cstart;
/**
@ -545,7 +543,6 @@ public class Tokenizer implements Locator, Locator2 {
this.charRefBufMark = 0;
this.value = 0;
this.seenDigits = false;
this.suspendAfterCurrentNonTextToken = false;
this.cstart = 0;
this.strBufLen = 0;
this.newAttributesEachTime = newAttributesEachTime;
@ -604,7 +601,6 @@ public class Tokenizer implements Locator, Locator2 {
this.charRefBufMark = 0;
this.value = 0;
this.seenDigits = false;
this.suspendAfterCurrentNonTextToken = false;
this.cstart = 0;
this.strBufLen = 0;
// &CounterClockwiseContourIntegral; is the longest valid char ref and
@ -1090,16 +1086,6 @@ public class Tokenizer implements Locator, Locator2 {
/**
* Emits the current comment token.
*
* NOTE: The method may set <code>shouldSuspend</code>, so the caller
* must have this pattern after the state's <code>transition</code> call:
*
* <pre>
* if (shouldSuspend) {
* break stateloop;
* }
* continue stateloop;
* </pre>
*
* @param pos
* TODO
*
@ -1107,7 +1093,6 @@ public class Tokenizer implements Locator, Locator2 {
*/
private void emitComment(int provisionalHyphens, int pos)
throws SAXException {
// CPPONLY: RememberGt(pos);
// [NOCPP[
if (wantsComments) {
// ]NOCPP]
@ -1118,7 +1103,6 @@ public class Tokenizer implements Locator, Locator2 {
// ]NOCPP]
clearStrBufAfterUse();
cstart = pos + 1;
suspendIfRequestedAfterCurrentNonTextToken();
}
/**
@ -1229,26 +1213,8 @@ public class Tokenizer implements Locator, Locator2 {
clearStrBufAfterUse();
}
/**
* Emits a tag token.
*
* NOTE: The method may set <code>shouldSuspend</code>, so the caller
* must have this pattern after the state's <code>transition</code> call:
* <pre>
* if (shouldSuspend) {
* break stateloop;
* }
* continue stateloop;
* </pre>
*
* @param selfClosing
* @param pos
* @return
* @throws SAXException
*/
private int emitCurrentTagToken(boolean selfClosing, int pos)
throws SAXException {
// CPPONLY: RememberGt(pos);
cstart = pos + 1;
maybeErrSlashInEndTag(selfClosing);
stateSave = Tokenizer.DATA;
@ -1286,7 +1252,6 @@ public class Tokenizer implements Locator, Locator2 {
* The token handler may have called setStateAndEndTagExpectation
* and changed stateSave since the start of this method.
*/
suspendIfRequestedAfterCurrentNonTextToken();
return stateSave;
}
@ -2617,9 +2582,6 @@ public class Tokenizer implements Locator, Locator2 {
* Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos);
if (shouldSuspend) {
break stateloop;
}
continue stateloop;
case '<':
appendStrBuf(c);
@ -2767,9 +2729,6 @@ public class Tokenizer implements Locator, Locator2 {
* Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos);
if (shouldSuspend) {
break stateloop;
}
continue stateloop;
case '-':
/* U+002D HYPHEN-MINUS (-) Parse error. */
@ -2839,9 +2798,6 @@ public class Tokenizer implements Locator, Locator2 {
* Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos);
if (shouldSuspend) {
break stateloop;
}
continue stateloop;
case '-':
/*
@ -2989,9 +2945,6 @@ public class Tokenizer implements Locator, Locator2 {
appendStrBuf(c);
emitComment(3, pos);
state = transition(state, Tokenizer.DATA, reconsume, pos);
if (shouldSuspend) {
break stateloop;
}
continue stateloop;
case '-':
errNestedComment();
@ -3057,9 +3010,6 @@ public class Tokenizer implements Locator, Locator2 {
* Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos);
if (shouldSuspend) {
break stateloop;
}
continue stateloop;
case '<':
appendStrBuf(c);
@ -3181,16 +3131,6 @@ public class Tokenizer implements Locator, Locator2 {
case '>':
cstart = pos + 1;
state = transition(state, Tokenizer.DATA, reconsume, pos);
// Since a CDATA section starts with a less-than sign, it
// is participates in the suspension-after-current-token
// behavior. (The suspension can be requested when the
// less-than sign has been seen but we don't yet know the
// resulting token type.) Therefore, we need to deal with
// a potential request here.
suspendIfRequestedAfterCurrentNonTextToken();
if (shouldSuspend) {
break stateloop;
}
continue stateloop;
default:
tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 2);
@ -4189,9 +4129,6 @@ public class Tokenizer implements Locator, Locator2 {
case '>':
emitComment(0, pos);
state = transition(state, Tokenizer.DATA, reconsume, pos);
if (shouldSuspend) {
break stateloop;
}
continue stateloop;
case '-':
appendStrBuf(c);
@ -4225,9 +4162,6 @@ public class Tokenizer implements Locator, Locator2 {
// ]NOCPP]
emitComment(0, pos);
state = transition(state, Tokenizer.DATA, reconsume, pos);
if (shouldSuspend) {
break stateloop;
}
continue stateloop;
case '-':
appendSecondHyphenToBogusComment();
@ -5029,9 +4963,6 @@ public class Tokenizer implements Locator, Locator2 {
* Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos);
if (shouldSuspend) {
break stateloop;
}
continue stateloop;
case '\u0000':
c = '\uFFFD';
@ -5105,9 +5036,6 @@ public class Tokenizer implements Locator, Locator2 {
* Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos);
if (shouldSuspend) {
break stateloop;
}
continue stateloop;
case '\u0000':
c = '\uFFFD';
@ -5171,9 +5099,6 @@ public class Tokenizer implements Locator, Locator2 {
* Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos);
if (shouldSuspend) {
break stateloop;
}
continue stateloop;
case 'p':
case 'P':
@ -5320,9 +5245,6 @@ public class Tokenizer implements Locator, Locator2 {
* Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos);
if (shouldSuspend) {
break stateloop;
}
continue stateloop;
default:
bogusDoctype();
@ -5408,9 +5330,6 @@ public class Tokenizer implements Locator, Locator2 {
* Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos);
if (shouldSuspend) {
break stateloop;
}
continue stateloop;
default:
bogusDoctype();
@ -5465,9 +5384,6 @@ public class Tokenizer implements Locator, Locator2 {
* Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos);
if (shouldSuspend) {
break stateloop;
}
continue stateloop;
case '\r':
appendStrBufCarriageReturn();
@ -5532,9 +5448,6 @@ public class Tokenizer implements Locator, Locator2 {
* Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos);
if (shouldSuspend) {
break stateloop;
}
continue stateloop;
case '"':
/*
@ -5619,9 +5532,6 @@ public class Tokenizer implements Locator, Locator2 {
* Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos);
if (shouldSuspend) {
break stateloop;
}
continue stateloop;
case '"':
/*
@ -5702,9 +5612,6 @@ public class Tokenizer implements Locator, Locator2 {
* Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos);
if (shouldSuspend) {
break stateloop;
}
continue stateloop;
case '\r':
appendStrBufCarriageReturn();
@ -5764,9 +5671,6 @@ public class Tokenizer implements Locator, Locator2 {
* Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos);
if (shouldSuspend) {
break stateloop;
}
continue stateloop;
default:
/*
@ -5805,9 +5709,6 @@ public class Tokenizer implements Locator, Locator2 {
* Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos);
if (shouldSuspend) {
break stateloop;
}
continue stateloop;
case '\r':
silentCarriageReturn();
@ -5938,9 +5839,6 @@ public class Tokenizer implements Locator, Locator2 {
* Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos);
if (shouldSuspend) {
break stateloop;
}
continue stateloop;
default:
bogusDoctype();
@ -6026,9 +5924,6 @@ public class Tokenizer implements Locator, Locator2 {
* Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos);
if (shouldSuspend) {
break stateloop;
}
continue stateloop;
default:
bogusDoctype();
@ -6079,9 +5974,6 @@ public class Tokenizer implements Locator, Locator2 {
* Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos);
if (shouldSuspend) {
break stateloop;
}
continue stateloop;
case '\r':
appendStrBufCarriageReturn();
@ -6140,9 +6032,6 @@ public class Tokenizer implements Locator, Locator2 {
* Switch to the data state.
*/
state = transition(state, Tokenizer.DATA, reconsume, pos);
if (shouldSuspend) {
break stateloop;
}
continue stateloop;
case '\r':
appendStrBufCarriageReturn();
@ -6195,23 +6084,6 @@ public class Tokenizer implements Locator, Locator2 {
case '>':
state = transition(state, Tokenizer.DATA,
reconsume, pos);
// Processing instruction syntax goes through these
// states only in Gecko's XML View Source--not in HTML
// parsing in Java or in Gecko.
// Since XML View Source doesn't use the
// suspension-after-current-token facility, its extension
// to processing instruction states is strictly unnecessary
// at the moment. However, if these states ever were to be
// used together with the suspension-after-current-token
// facility, these states would need to participate, since
// suspension could be requested when only less-than has been
// seen and we don't yet know if we end up here. Handling
// the currently unnecessary case in order to avoid leaving
// a trap for future modification.
suspendIfRequestedAfterCurrentNonTextToken();
if (shouldSuspend) {
break stateloop;
}
continue stateloop;
default:
state = transition(state,
@ -6947,23 +6819,7 @@ public class Tokenizer implements Locator, Locator2 {
return;
}
/**
* Emits a doctype token.
*
* NOTE: The method may set <code>shouldSuspend</code>, so the caller
* must have this pattern after the state's <code>transition</code> call:
* <pre>
* if (shouldSuspend) {
* break stateloop;
* }
* continue stateloop;
* </pre>
*
* @param pos
* @throws SAXException
*/
private void emitDoctypeToken(int pos) throws SAXException {
// CPPONLY: RememberGt(pos);
cstart = pos + 1;
tokenHandler.doctype(doctypeName, publicIdentifier, systemIdentifier,
forceQuirks);
@ -6975,135 +6831,6 @@ public class Tokenizer implements Locator, Locator2 {
publicIdentifier = null;
Portability.releaseString(systemIdentifier);
systemIdentifier = null;
suspendIfRequestedAfterCurrentNonTextToken();
}
/**
* If a previous call to <code>suspendAfterCurrentTokenIfNotInText()</code>
* happened in a non-text context, this method turns that deferred suspension
* request into an immediately pending suspension request.
*/
private void suspendIfRequestedAfterCurrentNonTextToken() {
if (suspendAfterCurrentNonTextToken) {
suspendAfterCurrentNonTextToken = false;
shouldSuspend = true;
}
}
// Making this private until the full Java implementation is done.
/**
* Request suspension after the current token if the tokenizer is currently
* in a non-text state (i.e. it's known that the next token will be a
* non-text token).
*
* Must not be called when <code>tokenizeBuffer()</code> is on the call
* stack.
*/
@SuppressWarnings("unused") private void suspendAfterCurrentTokenIfNotInText() {
switch (stateSave) {
case DATA:
case RCDATA:
case SCRIPT_DATA:
case RAWTEXT:
case SCRIPT_DATA_ESCAPED:
case PLAINTEXT:
case NON_DATA_END_TAG_NAME: // We haven't yet committed to the next
// token being a non-text token, though
// it could be.
case SCRIPT_DATA_LESS_THAN_SIGN:
case SCRIPT_DATA_ESCAPE_START:
case SCRIPT_DATA_ESCAPE_START_DASH:
case SCRIPT_DATA_ESCAPED_DASH:
case SCRIPT_DATA_ESCAPED_DASH_DASH:
case RAWTEXT_RCDATA_LESS_THAN_SIGN:
case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
case SCRIPT_DATA_DOUBLE_ESCAPE_START:
case SCRIPT_DATA_DOUBLE_ESCAPED:
case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
case SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
case SCRIPT_DATA_DOUBLE_ESCAPE_END:
return;
case TAG_NAME:
case BEFORE_ATTRIBUTE_NAME:
case ATTRIBUTE_NAME:
case AFTER_ATTRIBUTE_NAME:
case BEFORE_ATTRIBUTE_VALUE:
case AFTER_ATTRIBUTE_VALUE_QUOTED:
case BOGUS_COMMENT:
case MARKUP_DECLARATION_OPEN:
case DOCTYPE:
case BEFORE_DOCTYPE_NAME:
case DOCTYPE_NAME:
case AFTER_DOCTYPE_NAME:
case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
case AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
case AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
case BOGUS_DOCTYPE:
case COMMENT_START:
case COMMENT_START_DASH:
case COMMENT:
case COMMENT_END_DASH:
case COMMENT_END:
case COMMENT_END_BANG:
case TAG_OPEN:
case CLOSE_TAG_OPEN:
case MARKUP_DECLARATION_HYPHEN:
case MARKUP_DECLARATION_OCTYPE:
case DOCTYPE_UBLIC:
case DOCTYPE_YSTEM:
case AFTER_DOCTYPE_PUBLIC_KEYWORD:
case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
case AFTER_DOCTYPE_SYSTEM_KEYWORD:
case SELF_CLOSING_START_TAG:
case ATTRIBUTE_VALUE_DOUBLE_QUOTED:
case ATTRIBUTE_VALUE_SINGLE_QUOTED:
case ATTRIBUTE_VALUE_UNQUOTED:
case BOGUS_COMMENT_HYPHEN:
case COMMENT_LESSTHAN:
case COMMENT_LESSTHAN_BANG:
case COMMENT_LESSTHAN_BANG_DASH:
case COMMENT_LESSTHAN_BANG_DASH_DASH:
case CDATA_START:
case CDATA_SECTION:
case CDATA_RSQB:
case CDATA_RSQB_RSQB:
case PROCESSING_INSTRUCTION:
case PROCESSING_INSTRUCTION_QUESTION_MARK:
break;
case CONSUME_CHARACTER_REFERENCE:
case CONSUME_NCR:
case CHARACTER_REFERENCE_TAIL:
case HEX_NCR_LOOP:
case DECIMAL_NRC_LOOP:
case HANDLE_NCR_VALUE:
case HANDLE_NCR_VALUE_RECONSUME:
case CHARACTER_REFERENCE_HILO_LOOKUP:
if (returnStateSave == DATA || returnStateSave == RCDATA) {
return;
}
break;
default:
assert false : "Incomplete switch";
return;
}
suspendAfterCurrentNonTextToken = true;
}
// Making this private until the full Java implementation is done.
/**
* Queries if we are about to suspend after the current non-text token due to a request
* from <code>suspendAfterCurrentTokenIfNotInText()</code>.
* @return <code>true</code> iff <code>suspendAfterCurrentTokenIfNotInText()</code> was
* called in a non-text position and the then-current token has not been emitted yet.
*/
@SuppressWarnings("unused") private boolean suspensionAfterCurrentNonTextTokenPending() {
return suspendAfterCurrentNonTextToken;
}
@Inline protected char checkChar(@NoLength char[] buf, int pos)
@ -7232,7 +6959,6 @@ public class Tokenizer implements Locator, Locator2 {
charRefBufMark = 0;
value = 0;
seenDigits = false;
suspendAfterCurrentNonTextToken = false;
endTag = false;
shouldSuspend = false;
initDoctypeFields();
@ -7276,7 +7002,6 @@ public class Tokenizer implements Locator, Locator2 {
seenDigits = other.seenDigits;
endTag = other.endTag;
shouldSuspend = false;
suspendAfterCurrentNonTextToken = false;
doctypeName = other.doctypeName;
Portability.releaseString(systemIdentifier);

Просмотреть файл

@ -23,6 +23,8 @@ EXPORTS += [
"nsHtml5DocumentMode.h",
"nsHtml5HtmlAttributes.h",
"nsHtml5Macros.h",
"nsHtml5MetaScanner.h",
"nsHtml5MetaScannerHSupplement.h",
"nsHtml5Module.h",
"nsHtml5NamedCharacters.h",
"nsHtml5NamedCharactersAccel.h",
@ -58,6 +60,7 @@ UNIFIED_SOURCES += [
"nsHtml5ElementName.cpp",
"nsHtml5Highlighter.cpp",
"nsHtml5HtmlAttributes.cpp",
"nsHtml5MetaScanner.cpp",
"nsHtml5Module.cpp",
"nsHtml5NamedCharacters.cpp",
"nsHtml5NamedCharactersAccel.cpp",

Просмотреть файл

@ -46,6 +46,7 @@
#include "nsHtml5ElementName.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"
#include "nsHtml5MetaScanner.h"
#include "nsHtml5StackNode.h"
#include "nsHtml5UTF16Buffer.h"
#include "nsHtml5StateSnapshot.h"

Просмотреть файл

@ -49,6 +49,7 @@ class nsHtml5StreamParser;
class nsHtml5ElementName;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;
class nsHtml5MetaScanner;
class nsHtml5UTF16Buffer;
class nsHtml5StateSnapshot;
class nsHtml5Portability;

Просмотреть файл

@ -46,6 +46,7 @@
#include "nsHtml5AttributeName.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"
#include "nsHtml5MetaScanner.h"
#include "nsHtml5StackNode.h"
#include "nsHtml5UTF16Buffer.h"
#include "nsHtml5StateSnapshot.h"

Просмотреть файл

@ -49,6 +49,7 @@ class nsHtml5StreamParser;
class nsHtml5AttributeName;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;
class nsHtml5MetaScanner;
class nsHtml5UTF16Buffer;
class nsHtml5StateSnapshot;
class nsHtml5Portability;

Просмотреть файл

@ -66,30 +66,6 @@ nsHtml5Highlighter::~nsHtml5Highlighter() {
NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
}
void nsHtml5Highlighter::SetOpSink(nsAHtml5TreeOpSink* aOpSink) {
mOpSink = aOpSink;
}
void nsHtml5Highlighter::Rewind() {
mState = 0;
mCStart = INT32_MAX;
mPos = 0;
mLineNumber = 1;
mInlinesOpen = 0;
mInCharacters = false;
mBuffer = nullptr;
mOpQueue.Clear();
mCurrentRun = nullptr;
mAmpersand = nullptr;
mSlash = nullptr;
// Pop until we have three elements on the stack:
// html, body, and pre.
while (mStack.Length() > 3) {
Pop();
}
mSeenBase = false;
}
void nsHtml5Highlighter::Start(const nsAutoString& aTitle) {
// Doctype
opAppendDoctypeToDocument operation(nsGkAtoms::html, u""_ns, u""_ns);
@ -138,12 +114,9 @@ void nsHtml5Highlighter::Start(const nsAutoString& aTitle) {
preAttrs->addAttribute(nsHtml5AttributeName::ATTR_ID, preId, -1);
Push(nsGkAtoms::pre, preAttrs, NS_NewHTMLPreElement);
// Don't call StartCharacters here in order to be able to put it in
// a speculation.
StartCharacters();
mOpQueue.AppendElement()->Init(mozilla::AsVariant(opStartLayout()));
FlushOps();
}
int32_t nsHtml5Highlighter::Transition(int32_t aState, bool aReconsume,

Просмотреть файл

@ -30,17 +30,6 @@ class nsHtml5Highlighter {
*/
~nsHtml5Highlighter();
/**
* Set the op sink (for speculation).
*/
void SetOpSink(nsAHtml5TreeOpSink* aOpSink);
/**
* Reset state to after generated head but before processing any of the input
* stream.
*/
void Rewind();
/**
* Starts the generated document.
*/
@ -157,11 +146,6 @@ class nsHtml5Highlighter {
*/
void AddBase(nsHtml5String aValue);
/**
* Starts a wrapper around a run of characters.
*/
void StartCharacters();
private:
/**
* Starts a span with no class.
@ -181,6 +165,11 @@ class nsHtml5Highlighter {
*/
void EndSpanOrA();
/**
* Starts a wrapper around a run of characters.
*/
void StartCharacters();
/**
* Ends a wrapper around a run of characters.
*/
@ -328,13 +317,7 @@ class nsHtml5Highlighter {
nsTArray<nsHtml5TreeOperation> mOpQueue;
/**
* The tree op stage for the tree op executor or a speculation when looking
* for meta charset.
*
* The op sink is owned by the nsHtml5TreeOpExecutor, which outlives this
* object, because this object is owned by the nsHtml5Tokenizer instance that
* is owned by the nsHtml5StreamParser, which keeps the executor alive via
* nsHtml5Streamparser::mExecutorFlusher.
* The tree op stage for the tree op executor.
*/
nsAHtml5TreeOpSink* mOpSink;

Просмотреть файл

@ -38,6 +38,7 @@
#include "nsHtml5AttributeName.h"
#include "nsHtml5ElementName.h"
#include "nsHtml5MetaScanner.h"
#include "nsHtml5Portability.h"
#include "nsHtml5StackNode.h"
#include "nsHtml5StateSnapshot.h"

Просмотреть файл

@ -43,6 +43,7 @@ class nsHtml5StreamParser;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;
class nsHtml5MetaScanner;
class nsHtml5AttributeName;
class nsHtml5ElementName;
class nsHtml5UTF16Buffer;

Просмотреть файл

@ -0,0 +1,821 @@
/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2008-2015 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/*
* THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
* Please edit MetaScanner.java instead and regenerate.
*/
#define nsHtml5MetaScanner_cpp__
#include "nsAtom.h"
#include "nsHtml5AtomTable.h"
#include "nsHtml5String.h"
#include "nsNameSpaceManager.h"
#include "nsIContent.h"
#include "nsTraceRefcnt.h"
#include "jArray.h"
#include "nsHtml5ArrayCopy.h"
#include "nsAHtml5TreeBuilderState.h"
#include "nsGkAtoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsHtml5Macros.h"
#include "nsIContentHandle.h"
#include "nsHtml5Portability.h"
#include "nsHtml5ContentCreatorFunction.h"
#include "nsHtml5AttributeName.h"
#include "nsHtml5ElementName.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"
#include "nsHtml5StackNode.h"
#include "nsHtml5UTF16Buffer.h"
#include "nsHtml5StateSnapshot.h"
#include "nsHtml5Portability.h"
#include "nsHtml5MetaScanner.h"
static char16_t const CHARSET_DATA[] = {'h', 'a', 'r', 's', 'e', 't'};
staticJArray<char16_t, int32_t> nsHtml5MetaScanner::CHARSET = {
CHARSET_DATA, MOZ_ARRAY_LENGTH(CHARSET_DATA)};
static char16_t const CONTENT_DATA[] = {'o', 'n', 't', 'e', 'n', 't'};
staticJArray<char16_t, int32_t> nsHtml5MetaScanner::CONTENT = {
CONTENT_DATA, MOZ_ARRAY_LENGTH(CONTENT_DATA)};
static char16_t const HTTP_EQUIV_DATA[] = {'t', 't', 'p', '-', 'e',
'q', 'u', 'i', 'v'};
staticJArray<char16_t, int32_t> nsHtml5MetaScanner::HTTP_EQUIV = {
HTTP_EQUIV_DATA, MOZ_ARRAY_LENGTH(HTTP_EQUIV_DATA)};
static char16_t const CONTENT_TYPE_DATA[] = {'c', 'o', 'n', 't', 'e', 'n',
't', '-', 't', 'y', 'p', 'e'};
staticJArray<char16_t, int32_t> nsHtml5MetaScanner::CONTENT_TYPE = {
CONTENT_TYPE_DATA, MOZ_ARRAY_LENGTH(CONTENT_TYPE_DATA)};
nsHtml5MetaScanner::nsHtml5MetaScanner(nsHtml5TreeBuilder* tb)
: readable(nullptr),
metaState(NO),
contentIndex(INT32_MAX),
charsetIndex(INT32_MAX),
httpEquivIndex(INT32_MAX),
contentTypeIndex(INT32_MAX),
stateSave(DATA),
strBufLen(0),
strBuf(jArray<char16_t, int32_t>::newJArray(36)),
content(nullptr),
charset(nullptr),
httpEquivState(HTTP_EQUIV_NOT_SEEN),
treeBuilder(tb),
mEncoding(nullptr) {
MOZ_COUNT_CTOR(nsHtml5MetaScanner);
}
nsHtml5MetaScanner::~nsHtml5MetaScanner() {
MOZ_COUNT_DTOR(nsHtml5MetaScanner);
content.Release();
charset.Release();
}
void nsHtml5MetaScanner::stateLoop(int32_t state) {
int32_t c = -1;
bool reconsume = false;
stateloop:
for (;;) {
switch (state) {
case DATA: {
for (;;) {
if (reconsume) {
reconsume = false;
} else {
c = read();
}
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case '<': {
state = nsHtml5MetaScanner::TAG_OPEN;
NS_HTML5_BREAK(dataloop);
}
default: {
continue;
}
}
}
dataloop_end:;
[[fallthrough]];
}
case TAG_OPEN: {
for (;;) {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case 'm':
case 'M': {
metaState = M;
state = nsHtml5MetaScanner::TAG_NAME;
NS_HTML5_BREAK(tagopenloop);
}
case '!': {
state = nsHtml5MetaScanner::MARKUP_DECLARATION_OPEN;
NS_HTML5_CONTINUE(stateloop);
}
case '\?':
case '/': {
state = nsHtml5MetaScanner::SCAN_UNTIL_GT;
NS_HTML5_CONTINUE(stateloop);
}
case '>': {
state = nsHtml5MetaScanner::DATA;
NS_HTML5_CONTINUE(stateloop);
}
default: {
if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
metaState = NO;
state = nsHtml5MetaScanner::TAG_NAME;
NS_HTML5_BREAK(tagopenloop);
}
state = nsHtml5MetaScanner::DATA;
reconsume = true;
NS_HTML5_CONTINUE(stateloop);
}
}
}
tagopenloop_end:;
[[fallthrough]];
}
case TAG_NAME: {
for (;;) {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case ' ':
case '\t':
case '\n':
case '\f': {
state = nsHtml5MetaScanner::BEFORE_ATTRIBUTE_NAME;
NS_HTML5_BREAK(tagnameloop);
}
case '/': {
state = nsHtml5MetaScanner::SELF_CLOSING_START_TAG;
NS_HTML5_CONTINUE(stateloop);
}
case '>': {
state = nsHtml5MetaScanner::DATA;
NS_HTML5_CONTINUE(stateloop);
}
case 'e':
case 'E': {
if (metaState == M) {
metaState = E;
} else {
metaState = NO;
}
continue;
}
case 't':
case 'T': {
if (metaState == E) {
metaState = T;
} else {
metaState = NO;
}
continue;
}
case 'a':
case 'A': {
if (metaState == T) {
metaState = A;
} else {
metaState = NO;
}
continue;
}
default: {
metaState = NO;
continue;
}
}
}
tagnameloop_end:;
[[fallthrough]];
}
case BEFORE_ATTRIBUTE_NAME: {
for (;;) {
if (reconsume) {
reconsume = false;
} else {
c = read();
}
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case ' ':
case '\t':
case '\n':
case '\f': {
continue;
}
case '/': {
state = nsHtml5MetaScanner::SELF_CLOSING_START_TAG;
NS_HTML5_CONTINUE(stateloop);
}
case '>': {
if (handleTag()) {
NS_HTML5_BREAK(stateloop);
}
state = DATA;
NS_HTML5_CONTINUE(stateloop);
}
case 'c':
case 'C': {
contentIndex = 0;
charsetIndex = 0;
httpEquivIndex = INT32_MAX;
contentTypeIndex = INT32_MAX;
state = nsHtml5MetaScanner::ATTRIBUTE_NAME;
NS_HTML5_BREAK(beforeattributenameloop);
}
case 'h':
case 'H': {
contentIndex = INT32_MAX;
charsetIndex = INT32_MAX;
httpEquivIndex = 0;
contentTypeIndex = INT32_MAX;
state = nsHtml5MetaScanner::ATTRIBUTE_NAME;
NS_HTML5_BREAK(beforeattributenameloop);
}
default: {
contentIndex = INT32_MAX;
charsetIndex = INT32_MAX;
httpEquivIndex = INT32_MAX;
contentTypeIndex = INT32_MAX;
state = nsHtml5MetaScanner::ATTRIBUTE_NAME;
NS_HTML5_BREAK(beforeattributenameloop);
}
}
}
beforeattributenameloop_end:;
[[fallthrough]];
}
case ATTRIBUTE_NAME: {
for (;;) {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case ' ':
case '\t':
case '\n':
case '\f': {
state = nsHtml5MetaScanner::AFTER_ATTRIBUTE_NAME;
NS_HTML5_CONTINUE(stateloop);
}
case '/': {
state = nsHtml5MetaScanner::SELF_CLOSING_START_TAG;
NS_HTML5_CONTINUE(stateloop);
}
case '=': {
strBufLen = 0;
contentTypeIndex = 0;
state = nsHtml5MetaScanner::BEFORE_ATTRIBUTE_VALUE;
NS_HTML5_BREAK(attributenameloop);
}
case '>': {
if (handleTag()) {
NS_HTML5_BREAK(stateloop);
}
state = nsHtml5MetaScanner::DATA;
NS_HTML5_CONTINUE(stateloop);
}
default: {
if (metaState == A) {
if (c >= 'A' && c <= 'Z') {
c += 0x20;
}
if (contentIndex < CONTENT.length &&
c == CONTENT[contentIndex]) {
++contentIndex;
} else {
contentIndex = INT32_MAX;
}
if (charsetIndex < CHARSET.length &&
c == CHARSET[charsetIndex]) {
++charsetIndex;
} else {
charsetIndex = INT32_MAX;
}
if (httpEquivIndex < HTTP_EQUIV.length &&
c == HTTP_EQUIV[httpEquivIndex]) {
++httpEquivIndex;
} else {
httpEquivIndex = INT32_MAX;
}
}
continue;
}
}
}
attributenameloop_end:;
[[fallthrough]];
}
case BEFORE_ATTRIBUTE_VALUE: {
for (;;) {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case ' ':
case '\t':
case '\n':
case '\f': {
continue;
}
case '\"': {
state = nsHtml5MetaScanner::ATTRIBUTE_VALUE_DOUBLE_QUOTED;
NS_HTML5_BREAK(beforeattributevalueloop);
}
case '\'': {
state = nsHtml5MetaScanner::ATTRIBUTE_VALUE_SINGLE_QUOTED;
NS_HTML5_CONTINUE(stateloop);
}
case '>': {
if (handleTag()) {
NS_HTML5_BREAK(stateloop);
}
state = nsHtml5MetaScanner::DATA;
NS_HTML5_CONTINUE(stateloop);
}
default: {
handleCharInAttributeValue(c);
state = nsHtml5MetaScanner::ATTRIBUTE_VALUE_UNQUOTED;
NS_HTML5_CONTINUE(stateloop);
}
}
}
beforeattributevalueloop_end:;
[[fallthrough]];
}
case ATTRIBUTE_VALUE_DOUBLE_QUOTED: {
for (;;) {
if (reconsume) {
reconsume = false;
} else {
c = read();
}
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case '\"': {
handleAttributeValue();
state = nsHtml5MetaScanner::AFTER_ATTRIBUTE_VALUE_QUOTED;
NS_HTML5_BREAK(attributevaluedoublequotedloop);
}
default: {
handleCharInAttributeValue(c);
continue;
}
}
}
attributevaluedoublequotedloop_end:;
[[fallthrough]];
}
case AFTER_ATTRIBUTE_VALUE_QUOTED: {
for (;;) {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case ' ':
case '\t':
case '\n':
case '\f': {
state = nsHtml5MetaScanner::BEFORE_ATTRIBUTE_NAME;
NS_HTML5_CONTINUE(stateloop);
}
case '/': {
state = nsHtml5MetaScanner::SELF_CLOSING_START_TAG;
NS_HTML5_BREAK(afterattributevaluequotedloop);
}
case '>': {
if (handleTag()) {
NS_HTML5_BREAK(stateloop);
}
state = nsHtml5MetaScanner::DATA;
NS_HTML5_CONTINUE(stateloop);
}
default: {
state = nsHtml5MetaScanner::BEFORE_ATTRIBUTE_NAME;
reconsume = true;
NS_HTML5_CONTINUE(stateloop);
}
}
}
afterattributevaluequotedloop_end:;
[[fallthrough]];
}
case SELF_CLOSING_START_TAG: {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case '>': {
if (handleTag()) {
NS_HTML5_BREAK(stateloop);
}
state = nsHtml5MetaScanner::DATA;
NS_HTML5_CONTINUE(stateloop);
}
default: {
state = nsHtml5MetaScanner::BEFORE_ATTRIBUTE_NAME;
reconsume = true;
NS_HTML5_CONTINUE(stateloop);
}
}
}
case ATTRIBUTE_VALUE_UNQUOTED: {
for (;;) {
if (reconsume) {
reconsume = false;
} else {
c = read();
}
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case ' ':
case '\t':
case '\n':
case '\f': {
handleAttributeValue();
state = nsHtml5MetaScanner::BEFORE_ATTRIBUTE_NAME;
NS_HTML5_CONTINUE(stateloop);
}
case '>': {
handleAttributeValue();
if (handleTag()) {
NS_HTML5_BREAK(stateloop);
}
state = nsHtml5MetaScanner::DATA;
NS_HTML5_CONTINUE(stateloop);
}
default: {
handleCharInAttributeValue(c);
continue;
}
}
}
}
case AFTER_ATTRIBUTE_NAME: {
for (;;) {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case ' ':
case '\t':
case '\n':
case '\f': {
continue;
}
case '/': {
handleAttributeValue();
state = nsHtml5MetaScanner::SELF_CLOSING_START_TAG;
NS_HTML5_CONTINUE(stateloop);
}
case '=': {
strBufLen = 0;
contentTypeIndex = 0;
state = nsHtml5MetaScanner::BEFORE_ATTRIBUTE_VALUE;
NS_HTML5_CONTINUE(stateloop);
}
case '>': {
handleAttributeValue();
if (handleTag()) {
NS_HTML5_BREAK(stateloop);
}
state = nsHtml5MetaScanner::DATA;
NS_HTML5_CONTINUE(stateloop);
}
case 'c':
case 'C': {
contentIndex = 0;
charsetIndex = 0;
state = nsHtml5MetaScanner::ATTRIBUTE_NAME;
NS_HTML5_CONTINUE(stateloop);
}
default: {
contentIndex = INT32_MAX;
charsetIndex = INT32_MAX;
state = nsHtml5MetaScanner::ATTRIBUTE_NAME;
NS_HTML5_CONTINUE(stateloop);
}
}
}
}
case MARKUP_DECLARATION_OPEN: {
for (;;) {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case '-': {
state = nsHtml5MetaScanner::MARKUP_DECLARATION_HYPHEN;
NS_HTML5_BREAK(markupdeclarationopenloop);
}
default: {
state = nsHtml5MetaScanner::SCAN_UNTIL_GT;
reconsume = true;
NS_HTML5_CONTINUE(stateloop);
}
}
}
markupdeclarationopenloop_end:;
[[fallthrough]];
}
case MARKUP_DECLARATION_HYPHEN: {
for (;;) {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case '-': {
state = nsHtml5MetaScanner::COMMENT_START;
NS_HTML5_BREAK(markupdeclarationhyphenloop);
}
default: {
state = nsHtml5MetaScanner::SCAN_UNTIL_GT;
reconsume = true;
NS_HTML5_CONTINUE(stateloop);
}
}
}
markupdeclarationhyphenloop_end:;
[[fallthrough]];
}
case COMMENT_START: {
for (;;) {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case '-': {
state = nsHtml5MetaScanner::COMMENT_START_DASH;
NS_HTML5_CONTINUE(stateloop);
}
case '>': {
state = nsHtml5MetaScanner::DATA;
NS_HTML5_CONTINUE(stateloop);
}
default: {
state = nsHtml5MetaScanner::COMMENT;
NS_HTML5_BREAK(commentstartloop);
}
}
}
commentstartloop_end:;
[[fallthrough]];
}
case COMMENT: {
for (;;) {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case '-': {
state = nsHtml5MetaScanner::COMMENT_END_DASH;
NS_HTML5_BREAK(commentloop);
}
default: {
continue;
}
}
}
commentloop_end:;
[[fallthrough]];
}
case COMMENT_END_DASH: {
for (;;) {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case '-': {
state = nsHtml5MetaScanner::COMMENT_END;
NS_HTML5_BREAK(commentenddashloop);
}
default: {
state = nsHtml5MetaScanner::COMMENT;
NS_HTML5_CONTINUE(stateloop);
}
}
}
commentenddashloop_end:;
[[fallthrough]];
}
case COMMENT_END: {
for (;;) {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case '>': {
state = nsHtml5MetaScanner::DATA;
NS_HTML5_CONTINUE(stateloop);
}
case '-': {
continue;
}
default: {
state = nsHtml5MetaScanner::COMMENT;
NS_HTML5_CONTINUE(stateloop);
}
}
}
}
case COMMENT_START_DASH: {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case '-': {
state = nsHtml5MetaScanner::COMMENT_END;
NS_HTML5_CONTINUE(stateloop);
}
case '>': {
state = nsHtml5MetaScanner::DATA;
NS_HTML5_CONTINUE(stateloop);
}
default: {
state = nsHtml5MetaScanner::COMMENT;
NS_HTML5_CONTINUE(stateloop);
}
}
}
case ATTRIBUTE_VALUE_SINGLE_QUOTED: {
for (;;) {
if (reconsume) {
reconsume = false;
} else {
c = read();
}
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case '\'': {
handleAttributeValue();
state = nsHtml5MetaScanner::AFTER_ATTRIBUTE_VALUE_QUOTED;
NS_HTML5_CONTINUE(stateloop);
}
default: {
handleCharInAttributeValue(c);
continue;
}
}
}
}
case SCAN_UNTIL_GT: {
for (;;) {
if (reconsume) {
reconsume = false;
} else {
c = read();
}
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case '>': {
state = nsHtml5MetaScanner::DATA;
NS_HTML5_CONTINUE(stateloop);
}
default: {
continue;
}
}
}
}
}
}
stateloop_end:;
stateSave = state;
}
void nsHtml5MetaScanner::handleCharInAttributeValue(int32_t c) {
if (metaState == A) {
if (contentIndex == CONTENT.length || charsetIndex == CHARSET.length) {
addToBuffer(c);
} else if (httpEquivIndex == HTTP_EQUIV.length) {
if (contentTypeIndex < CONTENT_TYPE.length &&
toAsciiLowerCase(c) == CONTENT_TYPE[contentTypeIndex]) {
++contentTypeIndex;
} else {
contentTypeIndex = INT32_MAX;
}
}
}
}
void nsHtml5MetaScanner::addToBuffer(int32_t c) {
if (strBufLen == strBuf.length) {
jArray<char16_t, int32_t> newBuf = jArray<char16_t, int32_t>::newJArray(
nsHtml5Portability::checkedAdd(strBuf.length, (strBuf.length << 1)));
nsHtml5ArrayCopy::arraycopy(strBuf, newBuf, strBuf.length);
strBuf = newBuf;
}
strBuf[strBufLen++] = (char16_t)c;
}
void nsHtml5MetaScanner::handleAttributeValue() {
if (metaState != A) {
return;
}
if (contentIndex == CONTENT.length && !content) {
content = nsHtml5Portability::newStringFromBuffer(strBuf, 0, strBufLen,
treeBuilder, false);
return;
}
if (charsetIndex == CHARSET.length && !charset) {
charset = nsHtml5Portability::newStringFromBuffer(strBuf, 0, strBufLen,
treeBuilder, false);
return;
}
if (httpEquivIndex == HTTP_EQUIV.length &&
httpEquivState == HTTP_EQUIV_NOT_SEEN) {
httpEquivState = (contentTypeIndex == CONTENT_TYPE.length)
? HTTP_EQUIV_CONTENT_TYPE
: HTTP_EQUIV_OTHER;
return;
}
}
bool nsHtml5MetaScanner::handleTag() {
bool stop = handleTagInner();
content.Release();
content = nullptr;
charset.Release();
charset = nullptr;
httpEquivState = HTTP_EQUIV_NOT_SEEN;
return stop;
}
bool nsHtml5MetaScanner::handleTagInner() {
if (!!charset && tryCharset(charset)) {
return true;
}
if (!!content && httpEquivState == HTTP_EQUIV_CONTENT_TYPE) {
nsHtml5String extract =
nsHtml5TreeBuilder::extractCharsetFromContent(content, treeBuilder);
if (!extract) {
return false;
}
bool success = tryCharset(extract);
extract.Release();
return success;
}
return false;
}
void nsHtml5MetaScanner::initializeStatics() {}
void nsHtml5MetaScanner::releaseStatics() {}
#include "nsHtml5MetaScannerCppSupplement.h"

Просмотреть файл

@ -0,0 +1,172 @@
/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2008-2015 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/*
* THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
* Please edit MetaScanner.java instead and regenerate.
*/
#ifndef nsHtml5MetaScanner_h
#define nsHtml5MetaScanner_h
#include "nsAtom.h"
#include "nsHtml5AtomTable.h"
#include "nsHtml5String.h"
#include "nsNameSpaceManager.h"
#include "nsIContent.h"
#include "nsTraceRefcnt.h"
#include "jArray.h"
#include "nsHtml5ArrayCopy.h"
#include "nsAHtml5TreeBuilderState.h"
#include "nsGkAtoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsHtml5Macros.h"
#include "nsIContentHandle.h"
#include "nsHtml5Portability.h"
#include "nsHtml5ContentCreatorFunction.h"
class nsHtml5StreamParser;
class nsHtml5AttributeName;
class nsHtml5ElementName;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;
class nsHtml5UTF16Buffer;
class nsHtml5StateSnapshot;
class nsHtml5Portability;
class nsHtml5MetaScanner {
private:
static staticJArray<char16_t, int32_t> CHARSET;
static staticJArray<char16_t, int32_t> CONTENT;
static staticJArray<char16_t, int32_t> HTTP_EQUIV;
static staticJArray<char16_t, int32_t> CONTENT_TYPE;
static const int32_t NO = 0;
static const int32_t M = 1;
static const int32_t E = 2;
static const int32_t T = 3;
static const int32_t A = 4;
static const int32_t DATA = 0;
static const int32_t TAG_OPEN = 1;
static const int32_t SCAN_UNTIL_GT = 2;
static const int32_t TAG_NAME = 3;
static const int32_t BEFORE_ATTRIBUTE_NAME = 4;
static const int32_t ATTRIBUTE_NAME = 5;
static const int32_t AFTER_ATTRIBUTE_NAME = 6;
static const int32_t BEFORE_ATTRIBUTE_VALUE = 7;
static const int32_t ATTRIBUTE_VALUE_DOUBLE_QUOTED = 8;
static const int32_t ATTRIBUTE_VALUE_SINGLE_QUOTED = 9;
static const int32_t ATTRIBUTE_VALUE_UNQUOTED = 10;
static const int32_t AFTER_ATTRIBUTE_VALUE_QUOTED = 11;
static const int32_t MARKUP_DECLARATION_OPEN = 13;
static const int32_t MARKUP_DECLARATION_HYPHEN = 14;
static const int32_t COMMENT_START = 15;
static const int32_t COMMENT_START_DASH = 16;
static const int32_t COMMENT = 17;
static const int32_t COMMENT_END_DASH = 18;
static const int32_t COMMENT_END = 19;
static const int32_t SELF_CLOSING_START_TAG = 20;
static const int32_t HTTP_EQUIV_NOT_SEEN = 0;
static const int32_t HTTP_EQUIV_CONTENT_TYPE = 1;
static const int32_t HTTP_EQUIV_OTHER = 2;
protected:
nsHtml5ByteReadable* readable;
private:
int32_t metaState;
int32_t contentIndex;
int32_t charsetIndex;
int32_t httpEquivIndex;
int32_t contentTypeIndex;
protected:
int32_t stateSave;
private:
int32_t strBufLen;
autoJArray<char16_t, int32_t> strBuf;
nsHtml5String content;
nsHtml5String charset;
int32_t httpEquivState;
nsHtml5TreeBuilder* treeBuilder;
public:
explicit nsHtml5MetaScanner(nsHtml5TreeBuilder* tb);
~nsHtml5MetaScanner();
protected:
void stateLoop(int32_t state);
private:
void handleCharInAttributeValue(int32_t c);
inline int32_t toAsciiLowerCase(int32_t c) {
if (c >= 'A' && c <= 'Z') {
return c + 0x20;
}
return c;
}
void addToBuffer(int32_t c);
void handleAttributeValue();
bool handleTag();
bool handleTagInner();
protected:
bool tryCharset(nsHtml5String encoding);
public:
static void initializeStatics();
static void releaseStatics();
#include "nsHtml5MetaScannerHSupplement.h"
};
#endif

Просмотреть файл

@ -0,0 +1,39 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsISupportsImpl.h"
#include "mozilla/Encoding.h"
const mozilla::Encoding* nsHtml5MetaScanner::sniff(nsHtml5ByteReadable* bytes) {
readable = bytes;
stateLoop(stateSave);
readable = nullptr;
return mEncoding;
}
bool nsHtml5MetaScanner::tryCharset(nsHtml5String charset) {
// This code needs to stay in sync with
// nsHtml5StreamParser::internalEncodingDeclaration. Unfortunately, the
// trickery with member fields here leads to some copy-paste reuse. :-(
nsAutoCString label;
nsString charset16; // Not Auto, because using it to hold nsStringBuffer*
charset.ToString(charset16);
CopyUTF16toUTF8(charset16, label);
const mozilla::Encoding* encoding = Encoding::ForLabel(label);
if (!encoding) {
return false;
}
if (encoding == UTF_16BE_ENCODING || encoding == UTF_16LE_ENCODING) {
mEncoding = UTF_8_ENCODING;
return true;
}
if (encoding == X_USER_DEFINED_ENCODING) {
// WebKit/Blink hack for Indian and Armenian legacy sites
mEncoding = WINDOWS_1252_ENCODING;
return true;
}
mEncoding = encoding;
return true;
}

Просмотреть файл

@ -0,0 +1,11 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
using Encoding = mozilla::Encoding;
private:
const Encoding* mEncoding;
inline int32_t read() { return readable->read(); }
public:
const Encoding* sniff(nsHtml5ByteReadable* bytes);

Просмотреть файл

@ -98,8 +98,8 @@ void nsHtml5Parser::SetDocumentCharset(NotNull<const Encoding*> aEncoding,
bool aForceAutoDetection) {
MOZ_ASSERT(!mExecutor->HasStarted(), "Document charset set too late.");
MOZ_ASSERT(GetStreamParser(), "Setting charset on a script-only parser.");
GetStreamParser()->SetDocumentCharset(
aEncoding, (nsCharsetSource)aCharsetSource, aForceAutoDetection);
GetStreamParser()->SetDocumentCharset(aEncoding, aCharsetSource,
aForceAutoDetection);
mExecutor->SetDocumentCharsetAndSource(aEncoding, aCharsetSource);
}
@ -589,7 +589,7 @@ nsresult nsHtml5Parser::ParseUntilBlocked() {
!mExecutor->IsScriptExecuting()) {
mTreeBuilder->Flush();
mReturnToStreamParserPermitted = false;
GetStreamParser()->ContinueAfterScriptsOrEncodingCommitment(
GetStreamParser()->ContinueAfterScripts(
mTokenizer.get(), mTreeBuilder.get(), mLastWasCR);
}
} else {

Просмотреть файл

@ -49,6 +49,7 @@ class nsHtml5AttributeName;
class nsHtml5ElementName;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;
class nsHtml5MetaScanner;
class nsHtml5UTF16Buffer;
class nsHtml5StateSnapshot;

Просмотреть файл

@ -13,7 +13,6 @@ nsHtml5SpeculativeLoad::nsHtml5SpeculativeLoad()
mIsAsync(false),
mIsDefer(false),
mIsLinkPreload(false),
mIsError(false),
mEncoding(nullptr) {
MOZ_COUNT_CTOR(nsHtml5SpeculativeLoad);
new (&mCharsetOrSrcset) nsString;
@ -23,8 +22,7 @@ nsHtml5SpeculativeLoad::~nsHtml5SpeculativeLoad() {
MOZ_COUNT_DTOR(nsHtml5SpeculativeLoad);
NS_ASSERTION(mOpCode != eSpeculativeLoadUninitialized,
"Uninitialized speculative load.");
if (!(mOpCode == eSpeculativeLoadSetDocumentCharset ||
mOpCode == eSpeculativeLoadMaybeComplainAboutCharset)) {
if (mOpCode != eSpeculativeLoadSetDocumentCharset) {
mCharsetOrSrcset.~nsString();
}
}
@ -102,9 +100,9 @@ void nsHtml5SpeculativeLoad::Perform(nsHtml5TreeOpExecutor* aExecutor) {
// TODO: remove this
break;
case eSpeculativeLoadSetDocumentCharset: {
MOZ_ASSERT(mTypeOrCharsetSourceOrDocumentModeOrMetaCSPOrSizesOrIntegrity
.Length() == 1,
"Unexpected charset source string");
NS_ASSERTION(mTypeOrCharsetSourceOrDocumentModeOrMetaCSPOrSizesOrIntegrity
.Length() == 1,
"Unexpected charset source string");
int32_t intSource =
(int32_t)mTypeOrCharsetSourceOrDocumentModeOrMetaCSPOrSizesOrIntegrity
.First();
@ -131,21 +129,6 @@ void nsHtml5SpeculativeLoad::Perform(nsHtml5TreeOpExecutor* aExecutor) {
aExecutor->PreloadFetch(mUrlOrSizes, mCrossOrigin, mMedia,
mReferrerPolicyOrIntegrity);
break;
case eSpeculativeLoadMaybeComplainAboutCharset: {
MOZ_ASSERT(mTypeOrCharsetSourceOrDocumentModeOrMetaCSPOrSizesOrIntegrity
.Length() == 2,
"Unexpected line number string");
uint32_t high =
(uint32_t)
mTypeOrCharsetSourceOrDocumentModeOrMetaCSPOrSizesOrIntegrity
.CharAt(0);
uint32_t low =
(uint32_t)
mTypeOrCharsetSourceOrDocumentModeOrMetaCSPOrSizesOrIntegrity
.CharAt(1);
uint32_t line = (high << 16) | low;
aExecutor->MaybeComplainAboutCharset(mMsgId, mIsError, (int32_t)line);
} break;
default:
MOZ_ASSERT_UNREACHABLE("Bogus speculative load.");
break;

Просмотреть файл

@ -32,8 +32,7 @@ enum eHtml5SpeculativeLoad {
eSpeculativeLoadSetDocumentMode,
eSpeculativeLoadPreconnect,
eSpeculativeLoadFont,
eSpeculativeLoadFetch,
eSpeculativeLoadMaybeComplainAboutCharset
eSpeculativeLoadFetch
};
class nsHtml5SpeculativeLoad {
@ -276,24 +275,6 @@ class nsHtml5SpeculativeLoad {
(char16_t)aCharsetSource);
}
inline void InitMaybeComplainAboutCharset(const char* aMsgId, bool aError,
int32_t aLineNumber) {
MOZ_ASSERT(mOpCode == eSpeculativeLoadUninitialized,
"Trying to reinitialize a speculative load!");
mOpCode = eSpeculativeLoadMaybeComplainAboutCharset;
mCharsetOrSrcset.~nsString();
mMsgId = aMsgId;
mIsError = aError;
// Transport a 32-bit integer as two 16-bit code units of a string
// in order to avoid adding an integer field to the object.
// See https://bugzilla.mozilla.org/show_bug.cgi?id=1733043 for a better
// eventual approach.
char16_t high = (char16_t)(((uint32_t)aLineNumber) >> 16);
char16_t low = (char16_t)(((uint32_t)aLineNumber) & 0xFFFF);
mTypeOrCharsetSourceOrDocumentModeOrMetaCSPOrSizesOrIntegrity.Assign(high);
mTypeOrCharsetSourceOrDocumentModeOrMetaCSPOrSizesOrIntegrity.Append(low);
}
/**
* Speculative document mode setting isn't really speculative. Once it
* happens, we are committed to it. However, this information needs to
@ -325,13 +306,9 @@ class nsHtml5SpeculativeLoad {
eHtml5SpeculativeLoad mOpCode;
/**
* Whether the refering element has async attribute.
* Whether the refering element has async and/or defer attributes.
*/
bool mIsAsync;
/**
* Whether the refering element has defer attribute.
*/
bool mIsDefer;
/**
@ -341,11 +318,6 @@ class nsHtml5SpeculativeLoad {
*/
bool mIsLinkPreload;
/**
* Whether the charset complaint is an error.
*/
bool mIsError;
/* If mOpCode is eSpeculativeLoadPictureSource, this is the value of the
* "sizes" attribute. If the attribute is not set, this will be a void
* string. Otherwise it empty or the value of the url.
@ -365,12 +337,10 @@ class nsHtml5SpeculativeLoad {
* or eSpeculativeLoadPictureSource, this is the value of the "srcset"
* attribute. If the attribute is not set, this will be a void string.
* Otherwise it's empty.
* For eSpeculativeLoadMaybeComplainAboutCharset mMsgId is used.
*/
union {
nsString mCharsetOrSrcset;
const Encoding* mEncoding;
const char* mMsgId;
};
/**
* If mOpCode is eSpeculativeLoadSetDocumentCharset, this is a

Просмотреть файл

@ -48,6 +48,7 @@
#include "nsHtml5ElementName.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"
#include "nsHtml5MetaScanner.h"
#include "nsHtml5UTF16Buffer.h"
#include "nsHtml5StateSnapshot.h"
#include "nsHtml5Portability.h"

Просмотреть файл

@ -51,6 +51,7 @@ class nsHtml5AttributeName;
class nsHtml5ElementName;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;
class nsHtml5MetaScanner;
class nsHtml5UTF16Buffer;
class nsHtml5StateSnapshot;
class nsHtml5Portability;

Просмотреть файл

@ -47,6 +47,7 @@
#include "nsHtml5ElementName.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"
#include "nsHtml5MetaScanner.h"
#include "nsHtml5StackNode.h"
#include "nsHtml5UTF16Buffer.h"
#include "nsHtml5Portability.h"

Просмотреть файл

@ -50,6 +50,7 @@ class nsHtml5AttributeName;
class nsHtml5ElementName;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;
class nsHtml5MetaScanner;
class nsHtml5UTF16Buffer;
class nsHtml5Portability;

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -6,8 +6,6 @@
#ifndef nsHtml5StreamParser_h
#define nsHtml5StreamParser_h
#include <tuple>
#include "MainThreadUtils.h"
#include "mozilla/AlreadyAddRefed.h"
#include "mozilla/Assertions.h"
@ -30,6 +28,7 @@
#include "nscore.h"
class nsCycleCollectionTraversalCallback;
class nsHtml5MetaScanner;
class nsHtml5OwningUTF16Buffer;
class nsHtml5Parser;
class nsHtml5Speculation;
@ -183,7 +182,7 @@ class nsHtml5StreamParser final : public nsISupports {
using NotNull = mozilla::NotNull<T>;
using Encoding = mozilla::Encoding;
const uint32_t UNCONDITIONAL_META_SCAN_BOUNDARY = 1024;
const uint32_t SNIFFING_BUFFER_SIZE = 1024;
const uint32_t READ_BUFFER_SIZE = 1024;
const uint32_t LOCAL_FILE_UTF_8_BUFFER_SIZE = 1024 * 1024 * 4; // 4 MB
@ -215,23 +214,12 @@ class nsHtml5StreamParser final : public nsISupports {
*/
bool internalEncodingDeclaration(nsHtml5String aEncoding);
bool TemplatePushedOrHeadPopped();
void RememberGt(int32_t aPos);
void PostEncodingCommitter();
// Not from an external interface
/**
* Pass a buffer to chardetng.
* Pass a buffer to the Japanese or Cyrillic detector as appropriate.
*/
void FeedDetector(mozilla::Span<const uint8_t> aBuffer);
/**
* Report EOF to chardetng.
*/
void DetectorEof();
void FeedDetector(mozilla::Span<const uint8_t> aBuffer, bool aLast);
/**
* Call this method once you've created a parser, and want to instruct it
@ -241,8 +229,7 @@ class nsHtml5StreamParser final : public nsISupports {
* @param aCharsetSource the source of the charset
*/
inline void SetDocumentCharset(NotNull<const Encoding*> aEncoding,
nsCharsetSource aSource,
bool aForceAutoDetection) {
int32_t aSource, bool aForceAutoDetection) {
MOZ_ASSERT(mStreamState == STREAM_NOT_STARTED,
"SetDocumentCharset called too late.");
MOZ_ASSERT(NS_IsMainThread(), "Wrong thread!");
@ -260,14 +247,9 @@ class nsHtml5StreamParser final : public nsISupports {
* The owner parser must call this after script execution
* when no scripts are executing and the document.written
* buffer has been exhausted.
*
* If the first two arguments are nullptr, instead of
* continuing after scripts, this method commits to an
* internally-discovered encoding.
*/
void ContinueAfterScriptsOrEncodingCommitment(
nsHtml5Tokenizer* aTokenizer, nsHtml5TreeBuilder* aTreeBuilder,
bool aLastWasCR);
void ContinueAfterScripts(nsHtml5Tokenizer* aTokenizer,
nsHtml5TreeBuilder* aTreeBuilder, bool aLastWasCR);
/**
* Continues the stream parser if the charset switch failed.
@ -322,14 +304,6 @@ class nsHtml5StreamParser final : public nsISupports {
*/
void FlushTreeOpsAndDisarmTimer();
void SwitchDecoderIfAsciiSoFar(NotNull<const Encoding*> aEncoding);
size_t CountGts();
void DiscardMetaSpeculation();
bool ProcessLookingForMetaCharset(bool aEof);
void ParseAvailableData();
void DoStopRequest();
@ -358,39 +332,41 @@ class nsHtml5StreamParser final : public nsISupports {
*/
inline bool HasDecoder() { return !!mUnicodeDecoder; }
/**
* Returns 0 if 1) there aren't at least 2 buffers in mBufferedBytes
* or 2) there is no byte '>' in the second buffer.
* Otherwise, returns the length of the prefix of the second buffer
* that is long enough to contain the first byte '>' in the second
* buffer (including the '>' byte).
*/
size_t LengthOfLtContainingPrefixInSecondBuffer();
/**
* Push bytes from network when there is no Unicode decoder yet
*/
nsresult SniffStreamBytes(mozilla::Span<const uint8_t> aFromSegment,
bool aEof);
nsresult SniffStreamBytes(mozilla::Span<const uint8_t> aFromSegment);
/**
* Push bytes from network when there is a Unicode decoder already
*/
nsresult WriteStreamBytes(mozilla::Span<const uint8_t> aFromSegment);
/**
* Write the start of the stream to detector.
*/
void FinalizeSniffingWithDetector(mozilla::Span<const uint8_t> aFromSegment,
uint32_t aCountToSniffingLimit, bool aEof);
/**
* <meta charset> scan failed. Try chardet if applicable. After this, the
* the parser will have some encoding even if a last resolt fallback.
*
* @param aFromSegment The current network buffer
* @param aCountToSniffingLimit The number of unfilled slots in
* mSniffingBuffer
* @param aEof true iff called upon end of stream
*/
nsresult FinalizeSniffing(mozilla::Span<const uint8_t> aFromSegment,
uint32_t aCountToSniffingLimit, bool aEof);
/**
* Set up the Unicode decoder and write the sniffing buffer into it
* followed by the current network buffer.
*
* @param aPrefix the part of the stream that has already been seen
* prior to aFromSegment. In practice, these are the
* bytes that are baked into the state of the BOM
* and UTF-16 XML declaration-like sniffing state
* machine state.
* @param aFromSegment The current network buffer
*/
nsresult SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
mozilla::Span<const uint8_t> aPrefix,
mozilla::Span<const uint8_t> aFromSegment);
/**
@ -419,11 +395,22 @@ class nsHtml5StreamParser final : public nsISupports {
void ReDecodeLocalFile();
/**
* Potentially guess the encoding using mozilla::EncodingDetector.
* Returns the guessed encoding and a telemetry-appropriate source.
* Change a final autodetection source to the corresponding initial one.
*/
std::tuple<NotNull<const Encoding*>, nsCharsetSource> GuessEncoding(
bool aInitial);
int32_t MaybeRollBackSource(int32_t aSource);
/**
* Potentially guess the encoding using mozilla::EncodingDetector.
*/
void GuessEncoding(bool aEof, bool aInitial);
inline void DontGuessEncoding() {
mFeedChardet = false;
mGuessEncoding = false;
if (mDecodingLocalFileWithoutTokenizing) {
CommitLocalFileToEncoding();
}
}
/**
* Become confident or resolve and encoding name to its preferred form.
@ -433,7 +420,7 @@ class nsHtml5StreamParser final : public nsISupports {
* aEncoding and false if the parser became confident or if
* the encoding name did not specify a usable encoding
*/
const Encoding* PreferredForInternalEncodingDecl(const nsAString& aEncoding);
const Encoding* PreferredForInternalEncodingDecl(const nsACString& aEncoding);
/**
* Callback for mFlushTimer.
@ -487,37 +474,46 @@ class nsHtml5StreamParser final : public nsISupports {
*/
mozilla::UniquePtr<mozilla::Decoder> mUnicodeDecoder;
/**
* The buffer for sniffing the character encoding
*/
mozilla::UniquePtr<uint8_t[]> mSniffingBuffer;
/**
* The number of meaningful bytes in mSniffingBuffer
*/
uint32_t mSniffingLength;
/**
* BOM sniffing state
*/
eBomState mBomState;
/**
* <meta> prescan implementation
*/
mozilla::UniquePtr<nsHtml5MetaScanner> mMetaScanner;
// encoding-related stuff
/**
* The source (confidence) of the character encoding in use
*/
nsCharsetSource mCharsetSource;
nsCharsetSource mEncodingSwitchSource;
int32_t mCharsetSource;
/**
* The character encoding in use
*/
NotNull<const Encoding*> mEncoding;
const Encoding* mNeedsEncodingSwitchTo;
/**
* Whether the generic or Japanese detector should still be fed.
*/
bool mFeedChardet;
bool mSeenEligibleMetaCharset;
bool mChardetEof;
#ifdef DEBUG
bool mStartedFeedingDetector;
bool mStartedFeedingDevTools;
#endif
/**
* Whether the generic detector should be still queried for its guess.
*/
bool mGuessEncoding;
/**
* Whether reparse is forbidden
@ -534,44 +530,12 @@ class nsHtml5StreamParser final : public nsISupports {
*/
bool mChannelHadCharset;
/**
* We are in the process of looking for <meta charset>
*/
bool mLookingForMetaCharset;
/**
* Whether the byte stream started with ASCII <?
*/
bool mStartsWithLtQuestion;
/**
* If we are viewing XML source and are waiting for a '>' form the network.
*/
bool mLookingForXmlDeclarationForXmlViewSource;
/**
* Whether template has been pushed or head popped within the first 1024
* bytes.
*/
bool mTemplatePushedOrHeadPopped;
// Portable parser objects
/**
* The first buffer in the pending UTF-16 buffer queue
*/
RefPtr<nsHtml5OwningUTF16Buffer> mFirstBuffer;
/**
* Non-owning pointer to the most recent buffer that contains the most recent
* remembered greater-than sign. Used only while mLookingForMetaCharset is
* true. While mLookingForMetaCharset is true, mFirstBuffer is not changed and
* keeps the whole linked list of buffers alive. This pointer is non-owning to
* avoid frequent refcounting.
*/
nsHtml5OwningUTF16Buffer* mGtBuffer;
int32_t mGtPos;
/**
* The last buffer in the pending UTF-16 buffer queue
*/
@ -580,12 +544,6 @@ class nsHtml5StreamParser final : public nsISupports {
// a buffer of the size
// NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE
/**
* The first buffer of the document if looking for <meta charset> or
* nullptr afterwards.
*/
RefPtr<nsHtml5OwningUTF16Buffer> mFirstBufferOfMetaScan;
/**
* The tree operation executor
*/
@ -657,11 +615,12 @@ class nsHtml5StreamParser final : public nsISupports {
uint32_t mSpeculationFailureCount;
/**
* Number of bytes already buffered into mBufferedBytes.
* Number of bytes already buffered into mBufferedLocalFileData.
* Never counts above LOCAL_FILE_UTF_8_BUFFER_SIZE.
*/
uint32_t mNumBytesBuffered;
uint32_t mLocalFileBytesBuffered;
nsTArray<mozilla::Buffer<uint8_t>> mBufferedBytes;
nsTArray<mozilla::Buffer<uint8_t>> mBufferedLocalFileData;
/**
* True to terminate early; protected by mTerminatedMutex
@ -679,14 +638,6 @@ class nsHtml5StreamParser final : public nsISupports {
nsCOMPtr<nsIRunnable> mLoadFlusher;
/**
* This runnable is distinct from the regular flushers to
* signal the intent of encoding commitment without having to
* protect mPendingEncodingCommitment in the executer with a
* mutex.
*/
nsCOMPtr<nsIRunnable> mEncodingCommitter;
/**
* The generict detector.
*/
@ -714,11 +665,6 @@ class nsHtml5StreamParser final : public nsISupports {
*/
bool mDecodingLocalFileWithoutTokenizing;
/**
* Whether we are keeping the incoming bytes.
*/
bool mBufferingBytes;
/**
* Timer for flushing tree ops once in a while when not speculating.
*/

Просмотреть файл

@ -49,6 +49,7 @@
#include "nsHtml5AttributeName.h"
#include "nsHtml5ElementName.h"
#include "nsHtml5TreeBuilder.h"
#include "nsHtml5MetaScanner.h"
#include "nsHtml5StackNode.h"
#include "nsHtml5UTF16Buffer.h"
#include "nsHtml5StateSnapshot.h"
@ -118,7 +119,6 @@ nsHtml5Tokenizer::nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler,
charRefBufMark(0),
value(0),
seenDigits(false),
suspendAfterCurrentNonTextToken(false),
cstart(0),
strBufLen(0),
charRefBuf(jArray<char16_t, int32_t>::newJArray(32)),
@ -278,11 +278,9 @@ void nsHtml5Tokenizer::appendStrBuf(char16_t* buffer, int32_t offset,
}
void nsHtml5Tokenizer::emitComment(int32_t provisionalHyphens, int32_t pos) {
RememberGt(pos);
tokenHandler->comment(strBuf, 0, strBufLen - provisionalHyphens);
clearStrBufAfterUse();
cstart = pos + 1;
suspendIfRequestedAfterCurrentNonTextToken();
}
void nsHtml5Tokenizer::flushChars(char16_t* buf, int32_t pos) {
@ -321,7 +319,6 @@ void nsHtml5Tokenizer::strBufToElementNameString() {
}
int32_t nsHtml5Tokenizer::emitCurrentTagToken(bool selfClosing, int32_t pos) {
RememberGt(pos);
cstart = pos + 1;
maybeErrSlashInEndTag(selfClosing);
stateSave = nsHtml5Tokenizer::DATA;
@ -351,7 +348,6 @@ int32_t nsHtml5Tokenizer::emitCurrentTagToken(bool selfClosing, int32_t pos) {
} else {
attributes->clear(0);
}
suspendIfRequestedAfterCurrentNonTextToken();
return stateSave;
}
@ -1274,9 +1270,6 @@ stateloop:
emitComment(0, pos);
state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
reconsume, pos);
if (shouldSuspend) {
NS_HTML5_BREAK(stateloop);
}
NS_HTML5_CONTINUE(stateloop);
}
case '<': {
@ -1414,9 +1407,6 @@ stateloop:
emitComment(2, pos);
state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
reconsume, pos);
if (shouldSuspend) {
NS_HTML5_BREAK(stateloop);
}
NS_HTML5_CONTINUE(stateloop);
}
case '-': {
@ -1477,9 +1467,6 @@ stateloop:
emitComment(3, pos);
state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
reconsume, pos);
if (shouldSuspend) {
NS_HTML5_BREAK(stateloop);
}
NS_HTML5_CONTINUE(stateloop);
}
case '-': {
@ -1660,9 +1647,6 @@ stateloop:
emitComment(3, pos);
state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
reconsume, pos);
if (shouldSuspend) {
NS_HTML5_BREAK(stateloop);
}
NS_HTML5_CONTINUE(stateloop);
}
case '-': {
@ -1751,9 +1735,6 @@ stateloop:
emitComment(1, pos);
state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
reconsume, pos);
if (shouldSuspend) {
NS_HTML5_BREAK(stateloop);
}
NS_HTML5_CONTINUE(stateloop);
}
case '<': {
@ -1900,10 +1881,6 @@ stateloop:
cstart = pos + 1;
state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
reconsume, pos);
suspendIfRequestedAfterCurrentNonTextToken();
if (shouldSuspend) {
NS_HTML5_BREAK(stateloop);
}
NS_HTML5_CONTINUE(stateloop);
}
default: {
@ -2705,9 +2682,6 @@ stateloop:
emitComment(0, pos);
state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
reconsume, pos);
if (shouldSuspend) {
NS_HTML5_BREAK(stateloop);
}
NS_HTML5_CONTINUE(stateloop);
}
case '-': {
@ -2750,9 +2724,6 @@ stateloop:
emitComment(0, pos);
state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
reconsume, pos);
if (shouldSuspend) {
NS_HTML5_BREAK(stateloop);
}
NS_HTML5_CONTINUE(stateloop);
}
case '-': {
@ -3501,9 +3472,6 @@ stateloop:
emitDoctypeToken(pos);
state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
reconsume, pos);
if (shouldSuspend) {
NS_HTML5_BREAK(stateloop);
}
NS_HTML5_CONTINUE(stateloop);
}
case '\0': {
@ -3559,9 +3527,6 @@ stateloop:
emitDoctypeToken(pos);
state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
reconsume, pos);
if (shouldSuspend) {
NS_HTML5_BREAK(stateloop);
}
NS_HTML5_CONTINUE(stateloop);
}
case '\0': {
@ -3604,9 +3569,6 @@ stateloop:
emitDoctypeToken(pos);
state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
reconsume, pos);
if (shouldSuspend) {
NS_HTML5_BREAK(stateloop);
}
NS_HTML5_CONTINUE(stateloop);
}
case 'p':
@ -3731,9 +3693,6 @@ stateloop:
emitDoctypeToken(pos);
state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
reconsume, pos);
if (shouldSuspend) {
NS_HTML5_BREAK(stateloop);
}
NS_HTML5_CONTINUE(stateloop);
}
default: {
@ -3792,9 +3751,6 @@ stateloop:
emitDoctypeToken(pos);
state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
reconsume, pos);
if (shouldSuspend) {
NS_HTML5_BREAK(stateloop);
}
NS_HTML5_CONTINUE(stateloop);
}
default: {
@ -3833,9 +3789,6 @@ stateloop:
emitDoctypeToken(pos);
state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
reconsume, pos);
if (shouldSuspend) {
NS_HTML5_BREAK(stateloop);
}
NS_HTML5_CONTINUE(stateloop);
}
case '\r': {
@ -3893,9 +3846,6 @@ stateloop:
emitDoctypeToken(pos);
state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
reconsume, pos);
if (shouldSuspend) {
NS_HTML5_BREAK(stateloop);
}
NS_HTML5_CONTINUE(stateloop);
}
case '\"': {
@ -3956,9 +3906,6 @@ stateloop:
emitDoctypeToken(pos);
state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
reconsume, pos);
if (shouldSuspend) {
NS_HTML5_BREAK(stateloop);
}
NS_HTML5_CONTINUE(stateloop);
}
case '\"': {
@ -4013,9 +3960,6 @@ stateloop:
emitDoctypeToken(pos);
state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
reconsume, pos);
if (shouldSuspend) {
NS_HTML5_BREAK(stateloop);
}
NS_HTML5_CONTINUE(stateloop);
}
case '\r': {
@ -4061,9 +4005,6 @@ stateloop:
emitDoctypeToken(pos);
state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
reconsume, pos);
if (shouldSuspend) {
NS_HTML5_BREAK(stateloop);
}
NS_HTML5_CONTINUE(stateloop);
}
default: {
@ -4093,9 +4034,6 @@ stateloop:
emitDoctypeToken(pos);
state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
reconsume, pos);
if (shouldSuspend) {
NS_HTML5_BREAK(stateloop);
}
NS_HTML5_CONTINUE(stateloop);
}
case '\r': {
@ -4206,9 +4144,6 @@ stateloop:
emitDoctypeToken(pos);
state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
reconsume, pos);
if (shouldSuspend) {
NS_HTML5_BREAK(stateloop);
}
NS_HTML5_CONTINUE(stateloop);
}
default: {
@ -4267,9 +4202,6 @@ stateloop:
emitDoctypeToken(pos);
state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
reconsume, pos);
if (shouldSuspend) {
NS_HTML5_BREAK(stateloop);
}
NS_HTML5_CONTINUE(stateloop);
}
default: {
@ -4308,9 +4240,6 @@ stateloop:
emitDoctypeToken(pos);
state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
reconsume, pos);
if (shouldSuspend) {
NS_HTML5_BREAK(stateloop);
}
NS_HTML5_CONTINUE(stateloop);
}
case '\r': {
@ -4356,9 +4285,6 @@ stateloop:
emitDoctypeToken(pos);
state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
reconsume, pos);
if (shouldSuspend) {
NS_HTML5_BREAK(stateloop);
}
NS_HTML5_CONTINUE(stateloop);
}
case '\r': {
@ -4411,10 +4337,6 @@ stateloop:
case '>': {
state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
reconsume, pos);
suspendIfRequestedAfterCurrentNonTextToken();
if (shouldSuspend) {
NS_HTML5_BREAK(stateloop);
}
NS_HTML5_CONTINUE(stateloop);
}
default: {
@ -4832,7 +4754,6 @@ eofloop_end:;
}
void nsHtml5Tokenizer::emitDoctypeToken(int32_t pos) {
RememberGt(pos);
cstart = pos + 1;
tokenHandler->doctype(doctypeName, publicIdentifier, systemIdentifier,
forceQuirks);
@ -4841,116 +4762,6 @@ void nsHtml5Tokenizer::emitDoctypeToken(int32_t pos) {
publicIdentifier = nullptr;
systemIdentifier.Release();
systemIdentifier = nullptr;
suspendIfRequestedAfterCurrentNonTextToken();
}
void nsHtml5Tokenizer::suspendIfRequestedAfterCurrentNonTextToken() {
if (suspendAfterCurrentNonTextToken) {
suspendAfterCurrentNonTextToken = false;
shouldSuspend = true;
}
}
void nsHtml5Tokenizer::suspendAfterCurrentTokenIfNotInText() {
switch (stateSave) {
case DATA:
case RCDATA:
case SCRIPT_DATA:
case RAWTEXT:
case SCRIPT_DATA_ESCAPED:
case PLAINTEXT:
case NON_DATA_END_TAG_NAME:
case SCRIPT_DATA_LESS_THAN_SIGN:
case SCRIPT_DATA_ESCAPE_START:
case SCRIPT_DATA_ESCAPE_START_DASH:
case SCRIPT_DATA_ESCAPED_DASH:
case SCRIPT_DATA_ESCAPED_DASH_DASH:
case RAWTEXT_RCDATA_LESS_THAN_SIGN:
case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
case SCRIPT_DATA_DOUBLE_ESCAPE_START:
case SCRIPT_DATA_DOUBLE_ESCAPED:
case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
case SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
case SCRIPT_DATA_DOUBLE_ESCAPE_END: {
return;
}
case TAG_NAME:
case BEFORE_ATTRIBUTE_NAME:
case ATTRIBUTE_NAME:
case AFTER_ATTRIBUTE_NAME:
case BEFORE_ATTRIBUTE_VALUE:
case AFTER_ATTRIBUTE_VALUE_QUOTED:
case BOGUS_COMMENT:
case MARKUP_DECLARATION_OPEN:
case DOCTYPE:
case BEFORE_DOCTYPE_NAME:
case DOCTYPE_NAME:
case AFTER_DOCTYPE_NAME:
case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
case AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
case AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
case BOGUS_DOCTYPE:
case COMMENT_START:
case COMMENT_START_DASH:
case COMMENT:
case COMMENT_END_DASH:
case COMMENT_END:
case COMMENT_END_BANG:
case TAG_OPEN:
case CLOSE_TAG_OPEN:
case MARKUP_DECLARATION_HYPHEN:
case MARKUP_DECLARATION_OCTYPE:
case DOCTYPE_UBLIC:
case DOCTYPE_YSTEM:
case AFTER_DOCTYPE_PUBLIC_KEYWORD:
case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
case AFTER_DOCTYPE_SYSTEM_KEYWORD:
case SELF_CLOSING_START_TAG:
case ATTRIBUTE_VALUE_DOUBLE_QUOTED:
case ATTRIBUTE_VALUE_SINGLE_QUOTED:
case ATTRIBUTE_VALUE_UNQUOTED:
case BOGUS_COMMENT_HYPHEN:
case COMMENT_LESSTHAN:
case COMMENT_LESSTHAN_BANG:
case COMMENT_LESSTHAN_BANG_DASH:
case COMMENT_LESSTHAN_BANG_DASH_DASH:
case CDATA_START:
case CDATA_SECTION:
case CDATA_RSQB:
case CDATA_RSQB_RSQB:
case PROCESSING_INSTRUCTION:
case PROCESSING_INSTRUCTION_QUESTION_MARK: {
break;
}
case CONSUME_CHARACTER_REFERENCE:
case CONSUME_NCR:
case CHARACTER_REFERENCE_TAIL:
case HEX_NCR_LOOP:
case DECIMAL_NRC_LOOP:
case HANDLE_NCR_VALUE:
case HANDLE_NCR_VALUE_RECONSUME:
case CHARACTER_REFERENCE_HILO_LOOKUP: {
if (returnStateSave == DATA || returnStateSave == RCDATA) {
return;
}
break;
}
default: {
MOZ_ASSERT(false, "Incomplete switch");
return;
}
}
suspendAfterCurrentNonTextToken = true;
}
bool nsHtml5Tokenizer::suspensionAfterCurrentNonTextTokenPending() {
return suspendAfterCurrentNonTextToken;
}
bool nsHtml5Tokenizer::internalEncodingDeclaration(
@ -5022,7 +4833,6 @@ void nsHtml5Tokenizer::resetToDataState() {
charRefBufMark = 0;
value = 0;
seenDigits = false;
suspendAfterCurrentNonTextToken = false;
endTag = false;
shouldSuspend = false;
initDoctypeFields();
@ -5063,7 +4873,6 @@ void nsHtml5Tokenizer::loadState(nsHtml5Tokenizer* other) {
seenDigits = other->seenDigits;
endTag = other->endTag;
shouldSuspend = false;
suspendAfterCurrentNonTextToken = false;
doctypeName = other->doctypeName;
systemIdentifier.Release();
if (!other->systemIdentifier) {

Просмотреть файл

@ -52,6 +52,7 @@ class nsHtml5StreamParser;
class nsHtml5AttributeName;
class nsHtml5ElementName;
class nsHtml5TreeBuilder;
class nsHtml5MetaScanner;
class nsHtml5UTF16Buffer;
class nsHtml5StateSnapshot;
class nsHtml5Portability;
@ -269,7 +270,6 @@ class nsHtml5Tokenizer {
private:
bool seenDigits;
bool suspendAfterCurrentNonTextToken;
protected:
int32_t cstart;
@ -452,9 +452,6 @@ class nsHtml5Tokenizer {
private:
void emitDoctypeToken(int32_t pos);
void suspendIfRequestedAfterCurrentNonTextToken();
void suspendAfterCurrentTokenIfNotInText();
bool suspensionAfterCurrentNonTextTokenPending();
protected:
inline char16_t checkChar(char16_t* buf, int32_t pos) { return buf[pos]; }

Просмотреть файл

@ -61,19 +61,6 @@ bool nsHtml5Tokenizer::EnsureBufferSpace(int32_t aLength) {
return true;
}
bool nsHtml5Tokenizer::TemplatePushedOrHeadPopped() {
if (encodingDeclarationHandler) {
return encodingDeclarationHandler->TemplatePushedOrHeadPopped();
}
return false;
}
void nsHtml5Tokenizer::RememberGt(int32_t aPos) {
if (encodingDeclarationHandler) {
return encodingDeclarationHandler->RememberGt(aPos);
}
}
void nsHtml5Tokenizer::StartPlainText() {
stateSave = nsHtml5Tokenizer::PLAINTEXT;
}
@ -88,18 +75,8 @@ void nsHtml5Tokenizer::StartViewSource(const nsAutoString& aTitle) {
mViewSource->Start(aTitle);
}
void nsHtml5Tokenizer::StartViewSourceCharacters() {
mViewSource->StartCharacters();
}
void nsHtml5Tokenizer::EndViewSource() { mViewSource->End(); }
void nsHtml5Tokenizer::SetViewSourceOpSink(nsAHtml5TreeOpSink* aOpSink) {
mViewSource->SetOpSink(aOpSink);
}
void nsHtml5Tokenizer::RewindViewSource() { mViewSource->Rewind(); }
void nsHtml5Tokenizer::errWarnLtSlashInRcdata() {}
// The null checks below annotated MOZ_LIKELY are not actually necessary.

Просмотреть файл

@ -14,16 +14,6 @@ inline nsHtml5HtmlAttributes* GetAttributes() { return attributes; }
*/
bool EnsureBufferSpace(int32_t aLength);
bool TemplatePushedOrHeadPopped();
void RememberGt(int32_t aPos);
void AtKilobyteBoundary() { suspendAfterCurrentTokenIfNotInText(); }
bool IsInTokenStartedAtKilobyteBoundary() {
return suspensionAfterCurrentNonTextTokenPending();
}
mozilla::UniquePtr<nsHtml5Highlighter> mViewSource;
/**
@ -38,14 +28,8 @@ bool FlushViewSource();
void StartViewSource(const nsAutoString& aTitle);
void StartViewSourceCharacters();
void EndViewSource();
void RewindViewSource();
void SetViewSourceOpSink(nsAHtml5TreeOpSink* aOpSink);
void errGarbageAfterLtSlash();
void errLtSlashGt();

Просмотреть файл

@ -59,6 +59,7 @@
#include "nsHtml5AttributeName.h"
#include "nsHtml5ElementName.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5MetaScanner.h"
#include "nsHtml5StackNode.h"
#include "nsHtml5UTF16Buffer.h"
#include "nsHtml5StateSnapshot.h"

Просмотреть файл

@ -62,6 +62,7 @@ class nsHtml5StreamParser;
class nsHtml5AttributeName;
class nsHtml5ElementName;
class nsHtml5Tokenizer;
class nsHtml5MetaScanner;
class nsHtml5UTF16Buffer;
class nsHtml5StateSnapshot;
class nsHtml5Portability;

Просмотреть файл

@ -992,12 +992,6 @@ void nsHtml5TreeBuilder::elementPushed(int32_t aNamespace, nsAtom* aName,
//
// See comments in nsHtml5SpeculativeLoad.h about <picture> preloading
mSpeculativeLoadQueue.AppendElement()->InitOpenPicture();
return;
}
if (aName == nsGkAtoms::_template) {
if (tokenizer->TemplatePushedOrHeadPopped()) {
requestSuspension();
}
}
}
@ -1065,11 +1059,6 @@ void nsHtml5TreeBuilder::elementPopped(int32_t aNamespace, nsAtom* aName,
}
opDoneAddingChildren operation(aElement);
treeOp->Init(mozilla::AsVariant(operation));
if (aNamespace == kNameSpaceID_XHTML && aName == nsGkAtoms::head) {
if (tokenizer->TemplatePushedOrHeadPopped()) {
requestSuspension();
}
}
return;
}
if (aName == nsGkAtoms::style ||
@ -1123,7 +1112,6 @@ void nsHtml5TreeBuilder::elementPopped(int32_t aNamespace, nsAtom* aName,
//
// See comments in nsHtml5SpeculativeLoad.h about <picture> preloading
mSpeculativeLoadQueue.AppendElement()->InitEndPicture();
return;
}
}
@ -1297,15 +1285,9 @@ void nsHtml5TreeBuilder::MaybeComplainAboutCharset(const char* aMsgId,
MOZ_ASSERT_UNREACHABLE("Must never complain about charset with builder.");
return;
}
if (mSpeculativeLoadStage) {
mSpeculativeLoadQueue.AppendElement()->InitMaybeComplainAboutCharset(
aMsgId, aError, aLineNumber);
} else {
opMaybeComplainAboutCharset opeartion(const_cast<char*>(aMsgId), aError,
aLineNumber);
mOpQueue.AppendElement()->Init(mozilla::AsVariant(opeartion));
}
opMaybeComplainAboutCharset opeartion(const_cast<char*>(aMsgId), aError,
aLineNumber);
mOpQueue.AppendElement()->Init(mozilla::AsVariant(opeartion));
}
void nsHtml5TreeBuilder::TryToEnableEncodingMenu() {

Просмотреть файл

@ -118,7 +118,6 @@ StaticRefPtr<IdleTaskRunner> gBackgroundFlushRunner;
nsHtml5TreeOpExecutor::nsHtml5TreeOpExecutor()
: nsHtml5DocumentBuilder(false),
mSuppressEOF(false),
mPendingEncodingCommitment(false),
mReadingFromStage(false),
mStreamParser(nullptr),
mPreloadedURLs(23), // Mean # of preloadable resources per page on dmoz
@ -433,7 +432,7 @@ nsresult nsHtml5TreeOpExecutor::MarkAsBroken(nsresult aReason) {
static bool BackgroundFlushCallback(TimeStamp /*aDeadline*/) {
RefPtr<nsHtml5TreeOpExecutor> ex = gBackgroundFlushList->popFirst();
if (ex) {
ex->RunFlushLoopOrCommitToInternalEncoding();
ex->RunFlushLoop();
}
if (gBackgroundFlushList && gBackgroundFlushList->isEmpty()) {
delete gBackgroundFlushList;
@ -446,23 +445,11 @@ static bool BackgroundFlushCallback(TimeStamp /*aDeadline*/) {
}
void nsHtml5TreeOpExecutor::ContinueInterruptedParsingAsync() {
if (mDocument && !mDocument->IsInBackgroundWindow()) {
if (mPendingEncodingCommitment) {
// Unlike the general executor flusher, the encoding committer
// calls ContinueAfterScriptsOrEncodingCommitment() with the
// arguments indicating encoding commitment rather than script
// once done. Also, in order to get the correct encoding set
// on the document object for speculative load purposes, the
// encoding committer doesn't check for throttling.
if (mStreamParser) {
mStreamParser->PostEncodingCommitter();
}
} else {
nsCOMPtr<nsIRunnable> flusher = new nsHtml5ExecutorReflusher(this);
if (NS_FAILED(
mDocument->Dispatch(TaskCategory::Network, flusher.forget()))) {
NS_WARNING("failed to dispatch executor flush event");
}
if (!mDocument || !mDocument->IsInBackgroundWindow()) {
nsCOMPtr<nsIRunnable> flusher = new nsHtml5ExecutorReflusher(this);
if (NS_FAILED(
mDocument->Dispatch(TaskCategory::Network, flusher.forget()))) {
NS_WARNING("failed to dispatch executor flush event");
}
} else {
if (!gBackgroundFlushList) {
@ -813,40 +800,6 @@ nsresult nsHtml5TreeOpExecutor::FlushDocumentWrite() {
return rv;
}
void nsHtml5TreeOpExecutor::RunFlushLoopOrCommitToInternalEncoding() {
if (mPendingEncodingCommitment) {
CommitToInternalEncoding();
} else {
RunFlushLoop();
}
}
void nsHtml5TreeOpExecutor::CommitToInternalEncoding() {
if (MOZ_UNLIKELY(!mParser || !mStreamParser)) {
// An extension terminated the parser from a HTTP observer.
ClearOpQueue(); // clear in order to be able to assert in destructor
return;
}
mPendingEncodingCommitment = false;
RunFlushLoop();
// The above loop relates to plain text and View Source only. As such,
// it never runs content scripts. Unfortunately, the loop may be interrupted
// by timer and by extension-injected scripts. In that case, we repost the
// runnable and return early. :-(
if (!mParser->IsParserEnabled()) {
mPendingEncodingCommitment = true;
return;
}
if (!mOpQueue.IsEmpty()) {
mStreamParser->PostEncodingCommitter();
mPendingEncodingCommitment = true;
return;
}
mStreamParser->ContinueAfterScriptsOrEncodingCommitment(nullptr, nullptr,
false);
RunFlushLoop();
}
// copied from HTML content sink
bool nsHtml5TreeOpExecutor::IsScriptEnabled() {
// Note that if we have no document or no docshell or no global or whatnot we
@ -973,39 +926,57 @@ void nsHtml5TreeOpExecutor::NeedsCharsetSwitchTo(
}
// if the charset switch was accepted, mDocShell has called Terminate() on the
// parser by now
if (!mParser) {
// success
if (aSource == kCharsetFromMetaTag) {
MaybeComplainAboutCharset("EncLateMetaReload", false, aLineNumber);
}
return;
}
if (aSource == kCharsetFromMetaTag) {
MaybeComplainAboutCharset("EncLateMetaTooLate", true, aLineNumber);
}
GetParser()->ContinueAfterFailedCharsetSwitch();
}
void nsHtml5TreeOpExecutor::MaybeComplainAboutCharset(const char* aMsgId,
bool aError,
uint32_t aLineNumber) {
// Encoding errors don't count towards already complaining
if (!(!strcmp(aMsgId, "EncError") || !strcmp(aMsgId, "EncErrorFrame") ||
!strcmp(aMsgId, "EncErrorFramePlain"))) {
if (mAlreadyComplainedAboutCharset) {
if (mAlreadyComplainedAboutCharset) {
return;
}
// The EncNoDeclaration case for advertising iframes is so common that it
// would result is way too many errors. The iframe case doesn't matter
// when the ad is an image or a Flash animation anyway. When the ad is
// textual, a misrendered ad probably isn't a huge loss for users.
// Let's suppress the message in this case.
// This means that errors about other different-origin iframes in mashups
// are lost as well, but generally, the site author isn't in control of
// the embedded different-origin pages anyway and can't fix problems even
// if alerted about them.
if (!strcmp(aMsgId, "EncNoDeclaration") && mDocShell) {
dom::BrowsingContext* const bc = mDocShell->GetBrowsingContext();
if (bc && bc->GetParent()) {
return;
}
mAlreadyComplainedAboutCharset = true;
}
mAlreadyComplainedAboutCharset = true;
nsContentUtils::ReportToConsole(
aError ? nsIScriptError::errorFlag : nsIScriptError::warningFlag,
"HTML parser"_ns, mDocument, nsContentUtils::eHTMLPARSER_PROPERTIES,
aMsgId, nsTArray<nsString>(), nullptr, u""_ns, aLineNumber);
}
void nsHtml5TreeOpExecutor::ComplainAboutBogusProtocolCharset(
Document* aDoc, bool aUnrecognized) {
void nsHtml5TreeOpExecutor::ComplainAboutBogusProtocolCharset(Document* aDoc) {
NS_ASSERTION(!mAlreadyComplainedAboutCharset,
"How come we already managed to complain?");
mAlreadyComplainedAboutCharset = true;
nsContentUtils::ReportToConsole(
nsIScriptError::errorFlag, "HTML parser"_ns, aDoc,
nsContentUtils::eHTMLPARSER_PROPERTIES,
aUnrecognized ? "EncProtocolUnsupported" : "EncProtocolReplacement");
nsContentUtils::ReportToConsole(nsIScriptError::errorFlag, "HTML parser"_ns,
aDoc, nsContentUtils::eHTMLPARSER_PROPERTIES,
"EncProtocolUnsupported");
}
void nsHtml5TreeOpExecutor::MaybeComplainAboutDeepTree(uint32_t aLineNumber) {

Просмотреть файл

@ -60,21 +60,6 @@ class nsHtml5TreeOpExecutor final
*/
bool mSuppressEOF;
/**
* Set to true if CommitToInternalEncoding() was unable to flush all
* pending operations. In that case, this flag signals to
* ContinueInterruptedParsingAsync() that another attempt to call
* CommitToInternalEncoding() (from the event loop) is need.
*
* This may happen in particular when extensions inject scripts
* to the document, and the script injection prevents
* CommitToInternalEncoding() from completing in one attempt.
*
* This can also happen as a result of the flush being slow enough
* that the flush is interrupted based on time.
*/
bool mPendingEncodingCommitment;
bool mReadingFromStage;
nsTArray<nsHtml5TreeOperation> mOpQueue;
nsHtml5StreamParser* mStreamParser;
@ -194,12 +179,8 @@ class nsHtml5TreeOpExecutor final
void RunFlushLoop();
void RunFlushLoopOrCommitToInternalEncoding();
nsresult FlushDocumentWrite();
void CommitToInternalEncoding();
void MaybeSuspend();
void Start();
@ -210,8 +191,7 @@ class nsHtml5TreeOpExecutor final
void MaybeComplainAboutCharset(const char* aMsgId, bool aError,
uint32_t aLineNumber);
void ComplainAboutBogusProtocolCharset(mozilla::dom::Document* aDoc,
bool aUnrecognized);
void ComplainAboutBogusProtocolCharset(mozilla::dom::Document*);
void MaybeComplainAboutDeepTree(uint32_t aLineNumber);

Просмотреть файл

@ -47,6 +47,7 @@
#include "nsHtml5ElementName.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"
#include "nsHtml5MetaScanner.h"
#include "nsHtml5StackNode.h"
#include "nsHtml5StateSnapshot.h"
#include "nsHtml5Portability.h"

Просмотреть файл

@ -50,6 +50,7 @@ class nsHtml5AttributeName;
class nsHtml5ElementName;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;
class nsHtml5MetaScanner;
class nsHtml5StateSnapshot;
class nsHtml5Portability;

Просмотреть файл

@ -1,10 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Encoding error</title>
</head>
<body>
<p>ä</p>
</body>
</html>

Просмотреть файл

@ -1,9 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<title>Encoding error</title>
</head>
<body>
<p>ä</p>
</body>
</html>

Просмотреть файл

@ -1 +0,0 @@
<!DOCTYPE html>

Просмотреть файл

@ -1,2 +0,0 @@
HTTP 200 OK
Content-Type: text/html; charset=iso-2022-kr

Просмотреть файл

@ -1,10 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<title>Meta after head</title>
</head>
<body>
<meta charset="windows-1251">
<p>Meta after head</p>
</body>
</html>

Просмотреть файл

@ -1,10 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="iso-2022-kr">
<title>Replacement encoding</title>
</head>
<body>
<p>Replacement encoding</p>
</body>
</html>

Просмотреть файл

@ -1,10 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<title>Speculation fäilure</title>
<meta charset="windows-1252">
</head>
<body>
<p>Speculation fails</p>
</body>
</html>

Просмотреть файл

@ -1,10 +0,0 @@
<?xml version="1.0" encoding="windows-1251"?>
<!DOCTYPE html>
<html>
<head>
<title>Has only XML decl</title>
</head>
<body>
<p>Only XML decl</p>
</body>
</html>

Просмотреть файл

@ -1,10 +0,0 @@
<?xml version="1.0" encoding="windows-1252"?>
<!DOCTYPE html>
<html>
<head>
<title>Speculation fäilure</title>
</head>
<body>
<p>Speculation fails</p>
</body>
</html>

Просмотреть файл

@ -26,15 +26,6 @@ support-files =
file_bug688580.js
file_bug672453_not_declared.html
file_bug672453_meta_userdefined.html
file_bug672453_xml_decl.html
file_bug672453_meta_after_head.html
file_bug672453_meta_replacement.html
file_bug672453_http_replacement.html
file_bug672453_http_replacement.html^headers^
file_bug672453_enc_error.html
file_bug672453_enc_error_inherited.html
file_bug672453_meta_speculation_fail.html
file_bug672453_xml_speculation_fail.html
file_bug716579-16.html
file_bug716579-16.html^headers^
file_bug716579-16.xhtml

Просмотреть файл

@ -19,48 +19,27 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=672453
var tests = [
"file_bug672453_not_declared.html",
"file_bug672453_xml_decl.html",
"file_bug672453_late_meta.html",
"file_bug672453_meta_restart.html",
"file_bug672453_meta_after_head.html",
"file_bug672453_meta_unsupported.html",
"file_bug672453_http_unsupported.html",
"file_bug672453_meta_utf16.html",
"file_bug672453_meta_non_superset.html",
"file_bug672453_meta_userdefined.html",
"file_bug672453_meta_replacement.html",
"file_bug672453_http_replacement.html",
"file_bug672453_enc_error.html",
"file_bug672453_enc_error_inherited.html",
"file_bug672453_meta_speculation_fail.html",
"file_bug672453_xml_speculation_fail.html",
];
// The general idea here is that encoding substitutions or failures to declare the encoding
// (except when inherited or, not tested here, all-ASCII) are errors and ineffeciencies
// or risks about things failing under further editing (i.e. meta after head but within
// the extended scan zone) are warnings.
var expectedErrors = [
{ errorMessage: "The character encoding of a framed document was not declared. The document may appear different if viewed without the document framing it.",
sourceName: "http://mochi.test:8888/tests/parser/htmlparser/tests/mochitest/file_bug672453_not_declared.html",
lineNumber: 0,
isWarning: true },
{ errorMessage: "The character encoding of an HTML document was declared using the XML declaration syntax. This is non-conforming, and declaring the encoding using a meta tag at the start of the head part is more efficient.",
sourceName: "http://mochi.test:8888/tests/parser/htmlparser/tests/mochitest/file_bug672453_xml_decl.html",
lineNumber: 1,
isWarning: true },
{ errorMessage: "A meta tag attempting to declare the character encoding declaration was found too late, and the encoding of the parent document was used instead. The meta tag needs to be moved to the start of the head part of the document.",
{ errorMessage: "The character encoding declaration of the framed HTML document was not found when prescanning the first 1024 bytes of the file. When viewed without the document framing it, the page will reload automatically. The encoding declaration needs to be moved to be within the first 1024 bytes of the file.",
sourceName: "http://mochi.test:8888/tests/parser/htmlparser/tests/mochitest/file_bug672453_late_meta.html",
lineNumber: 1028,
isWarning: false },
{ errorMessage: "A meta tag attempting to declare the character encoding declaration was found too late, and the encoding of the parent document was used instead. The meta tag needs to be moved to the start of the head part of the document.",
isWarning: true },
{ errorMessage: "The page was reloaded, because the character encoding declaration of the HTML document was not found when prescanning the first 1024 bytes of the file. The encoding declaration needs to be moved to be within the first 1024 bytes of the file.",
sourceName: "http://mochi.test:8888/tests/parser/htmlparser/tests/mochitest/file_bug672453_meta_restart.html",
lineNumber: 1028,
isWarning: false },
{ errorMessage: "The meta tag declaring the character encoding of the document should be moved to start of the head part of the document.",
sourceName: "http://mochi.test:8888/tests/parser/htmlparser/tests/mochitest/file_bug672453_meta_after_head.html",
lineNumber: 7,
isWarning: true },
{ errorMessage: "An unsupported character encoding was declared for the HTML document using a meta tag. The declaration was ignored.",
sourceName: "http://mochi.test:8888/tests/parser/htmlparser/tests/mochitest/file_bug672453_meta_unsupported.html",
@ -82,30 +61,6 @@ var expectedErrors = [
sourceName: "http://mochi.test:8888/tests/parser/htmlparser/tests/mochitest/file_bug672453_meta_userdefined.html",
lineNumber: 1,
isWarning: false },
{ errorMessage: "A meta tag was used to declare an encoding that is a cross-site scripting hazard. The replacement encoding was used instead.",
sourceName: "http://mochi.test:8888/tests/parser/htmlparser/tests/mochitest/file_bug672453_meta_replacement.html",
lineNumber: 0,
isWarning: false },
{ errorMessage: "An encoding that is a cross-site scripting hazard was declared on the transfer protocol level. The replacement encoding was used instead.",
sourceName: "http://mochi.test:8888/tests/parser/htmlparser/tests/mochitest/file_bug672453_http_replacement.html",
lineNumber: 0,
isWarning: false },
{ errorMessage: "The byte stream was erroneous according to the character encoding that was declared. The character encoding declaration may be incorrect.",
sourceName: "http://mochi.test:8888/tests/parser/htmlparser/tests/mochitest/file_bug672453_enc_error.html",
lineNumber: 0,
isWarning: false },
{ errorMessage: "The byte stream was erroneous according to the character encoding that was inherited from the parent document. The character encoding needs to be declared in the Content-Type HTTP header, using a meta tag, or using a byte order mark.",
sourceName: "http://mochi.test:8888/tests/parser/htmlparser/tests/mochitest/file_bug672453_enc_error_inherited.html",
lineNumber: 0,
isWarning: false },
{ errorMessage: "The start of the document was reparsed, because there were non-ASCII characters before the meta tag that declared the encoding. The meta should be the first child of head without non-ASCII comments before.",
sourceName: "http://mochi.test:8888/tests/parser/htmlparser/tests/mochitest/file_bug672453_meta_speculation_fail.html",
lineNumber: 5,
isWarning: true },
{ errorMessage: "The start of the document was reparsed, because there were non-ASCII characters in the part of the document that was unsuccessfully searched for a meta tag before falling back to the XML declaration syntax. A meta tag at the start of the head part should be used instead of the XML declaration syntax.",
sourceName: "http://mochi.test:8888/tests/parser/htmlparser/tests/mochitest/file_bug672453_xml_speculation_fail.html",
lineNumber: 11,
isWarning: true },
];
SimpleTest.waitForExplicitFinish();

Просмотреть файл

@ -1,9 +0,0 @@
<!DOCTYPE html>
<head>
<meta charset="utf-8">
</head>
<body>
<p>In <code>head</code>, after first kilobyte.</p>
<p>Test: ж</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,955 +0,0 @@
<!DOCTYPE html>
<head>
<link rel="match" href="references/after-1kb-ref.html">
<meta charset="windows-1251">
</head>
<body>
<p>In <code>head</code>, after first kilobyte.</p>
<p>Test: æ</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,9 +0,0 @@
<!DOCTYPE html>
<head>
<meta charset="utf-8">
</head>
<body>
<p>After <code>bogus</code>, before <code>head</code> end tag, after first kilobyte.</p>
<p>Test: ж</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,933 +0,0 @@
<!DOCTYPE html>
<head>
<link rel="mismatch" href="references/after-bogus-after-1kb-ref.html">
<bogus><meta charset="windows-1251">
</head>
<body>
<p>After <code>bogus</code>, before <code>head</code> end tag, after first kilobyte.</p>
<p>Test: æ</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,9 +0,0 @@
<!DOCTYPE html>
<head>
<meta charset="utf-8">
</head>
<body>
<p>Meta after <code>bogus</code>.</p>
<p>Test: ж</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,10 +0,0 @@
<!DOCTYPE html>
<head>
<link rel="match" href="references/after-bogus-ref.html">
<bogus><meta charset="windows-1251">
</head>
<body>
<p>Meta after <code>bogus</code>.</p>
<p>Test: æ</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,9 +0,0 @@
<!DOCTYPE html>
<head>
<meta charset="utf-8">
</head>
<body>
<p>After <code>head</code>, before <code>body</code>, after first kilobyte, with a CRLF in the first kilobyte.</p>
<p>Test: ж</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,927 +0,0 @@
<!DOCTYPE html>
<head>
<link rel="mismatch" href="references/after-head-after-1kb-crlf-ref.html">
</head>
<meta charset="windows-1251">
<body>
<p>After <code>head</code>, before <code>body</code>, after first kilobyte, with a CRLF in the first kilobyte.</p>
<p>Test: æ</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,9 +0,0 @@
<!DOCTYPE html>
<head>
<meta charset="utf-8">
</head>
<body>
<p>After <code>head</code>, before <code>body</code>, after first kilobyte.</p>
<p>Test: ж</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,933 +0,0 @@
<!DOCTYPE html>
<head>
<link rel="mismatch" href="references/after-head-after-1kb-ref.html">
</head>
<meta charset="windows-1251">
<body>
<p>After <code>head</code>, before <code>body</code>, after first kilobyte.</p>
<p>Test: æ</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,9 +0,0 @@
<!DOCTYPE html>
<head>
<meta charset="utf-8">
</head>
<body>
<p>After <code>head</code>, before <code>body</code>, within first kilobyte, with a CRLF in the first kilobyte.</p>
<p>Test: ж</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,932 +0,0 @@
<!DOCTYPE html>
<head>
<link rel="match" href="references/after-head-in-1kb-crlf-ref.html">
</head>
<meta charset="windows-1251">
<body>
<p>After <code>head</code>, before <code>body</code>, within first kilobyte, with a CRLF in the first kilobyte.</p>
<p>Test: æ</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,9 +0,0 @@
<!DOCTYPE html>
<head>
<meta charset="utf-8">
</head>
<body>
<p>After <code>head</code>, before <code>body</code>, within first kilobyte.</p>
<p>Test: ж</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,938 +0,0 @@
<!DOCTYPE html>
<head>
<link rel="match" href="references/after-head-in-1kb-ref.html">
</head>
<meta charset="windows-1251">
<body>
<p>After <code>head</code>, before <code>body</code>, within first kilobyte.</p>
<p>Test: æ</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,9 +0,0 @@
<!DOCTYPE html>
<head>
<meta charset="utf-8">
</head>
<body>
<p>Normal meta.</p>
<p>Test: ж</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,10 +0,0 @@
<!DOCTYPE html>
<head>
<link rel="match" href="references/baseline-ref.html">
<meta charset="windows-1251">
</head>
<body>
<p>Normal meta.</p>
<p>Test: æ</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,9 +0,0 @@
<!DOCTYPE html>
<head>
<meta charset="utf-8">
</head>
<body>
<p>Meta from <code>document.write</code> (with concatenation in the middle of <code>charset</code> to require execution for effect).</p>
<p>Test: ж</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,10 +0,0 @@
<!DOCTYPE html>
<head>
<link rel="mismatch" href="references/document-write-ref.html">
<script>document.write('<meta char' + 'set="windows-1251">');</script>
</head>
<body>
<p>Meta from <code>document.write</code> (with concatenation in the middle of <code>charset</code> to require execution for effect).</p>
<p>Test: æ</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,9 +0,0 @@
<!DOCTYPE html>
<head>
<meta charset="utf-8">
</head>
<body>
<p>Meta inside comment.</p>
<p>Test: ж</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,10 +0,0 @@
<!DOCTYPE html>
<head>
<link rel="mismatch" href="references/in-comment-ref.html">
<!--<meta charset="windows-1251">-->
</head>
<body>
<p>Meta inside comment.</p>
<p>Test: æ</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,9 +0,0 @@
<!DOCTYPE html>
<head>
<meta charset="utf-8">
</head>
<body>
<p>Meta in <code>noscript</code> after <code>template</code> (which is also inside the <code>noscript</code>) after 1kb of padding following the template.</p>
<p>Test: ж</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,894 +0,0 @@
<!DOCTYPE html>
<head>
<link rel="mismatch" href="references/in-noscript-after-template-after-1kb-ref.html">
<noscript><template></template>
<meta charset="windows-1251"></noscript>
</head>
<body>
<p>Meta in <code>noscript</code> after <code>template</code> (which is also inside the <code>noscript</code>) after 1kb of padding following the template.</p>
<p>Test: æ</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,9 +0,0 @@
<!DOCTYPE html>
<head>
<meta charset="utf-8">
</head>
<body>
<p>Meta in <code>noscript</code> after <code>template</code> (which is also inside the <code>noscript</code>).</p>
<p>Test: ж</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,10 +0,0 @@
<!DOCTYPE html>
<head>
<link rel="mismatch" href="references/in-noscript-after-template-ref.html">
<noscript><template></template><meta charset="windows-1251"></noscript>
</head>
<body>
<p>Meta in <code>noscript</code> after <code>template</code> (which is also inside the <code>noscript</code>).</p>
<p>Test: æ</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,9 +0,0 @@
<!DOCTYPE html>
<head>
<meta charset="utf-8">
</head>
<body>
<p>Meta with NCR in the encoding label in <code>noscript</code>.</p>
<p>Test: ж</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,10 +0,0 @@
<!DOCTYPE html>
<head>
<link rel="mismatch" href="references/in-noscript-ncr-ref.html">
<noscript><meta charset="&#119;indows-1251"></noscript>
</head>
<body>
<p>Meta with NCR in the encoding label in <code>noscript</code>.</p>
<p>Test: æ</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,9 +0,0 @@
<!DOCTYPE html>
<head>
<meta charset="utf-8">
</head>
<body>
<p>Meta in <code>noscript</code>.</p>
<p>Test: ж</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,10 +0,0 @@
<!DOCTYPE html>
<head>
<link rel="mismatch" href="references/in-noscript-ref.html">
<noscript><meta charset="windows-1251"></noscript>
</head>
<body>
<p>Meta in <code>noscript</code>.</p>
<p>Test: æ</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,9 +0,0 @@
<!DOCTYPE html>
<head>
<meta charset="utf-8">
</head>
<body>
<p>Meta in <code>object</code>.</p>
<p>Test: ж</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,10 +0,0 @@
<!DOCTYPE html>
<head>
<link rel="match" href="references/in-object-ref.html">
<object><meta charset="windows-1251"></object>
</head>
<body>
<p>Meta in <code>object</code>.</p>
<p>Test: æ</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,9 +0,0 @@
<!DOCTYPE html>
<head>
<meta charset="utf-8">
</head>
<body>
<p>Meta in <code>script</code>.</p>
<p>Test: ж</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,10 +0,0 @@
<!DOCTYPE html>
<head>
<link rel="mismatch" href="references/in-script-ref.html">
<script><meta charset="windows-1251"></script>
</head>
<body>
<p>Meta in <code>script</code>.</p>
<p>Test: æ</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,9 +0,0 @@
<!DOCTYPE html>
<head>
<meta charset="utf-8">
</head>
<body>
<p>Meta in <code>style</code>.</p>
<p>Test: ж</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,10 +0,0 @@
<!DOCTYPE html>
<head>
<link rel="mismatch" href="references/in-style-ref.html">
<style><meta charset="windows-1251"></style>
</head>
<body>
<p>Meta in <code>style</code>.</p>
<p>Test: æ</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,10 +0,0 @@
<!DOCTYPE html>
<head>
<meta charset="utf-8">
</head>
<body>
<svg></svg>
<p>In SVG in CDATA after greater-than sign in the CDATA (after head).</p>
<p>Test: ж</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,10 +0,0 @@
<!DOCTYPE html>
<head>
<link rel="mismatch" href="references/in-svg-in-cdata-after-gt-ref.html">
</head>
<body>
<svg><![CDATA[><meta charset="windows-1251">]]></svg>
<p>In SVG in CDATA after greater-than sign in the CDATA (after head).</p>
<p>Test: æ</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,10 +0,0 @@
<!DOCTYPE html>
<head>
<meta charset="utf-8">
</head>
<body>
<svg></svg>
<p>In SVG in CDATA (after head).</p>
<p>Test: ж</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,10 +0,0 @@
<!DOCTYPE html>
<head>
<link rel="mismatch" href="references/in-svg-in-cdata-ref.html">
</head>
<body>
<svg><![CDATA[<meta charset="windows-1251">]]></svg>
<p>In SVG in CDATA (after head).</p>
<p>Test: æ</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,10 +0,0 @@
<!DOCTYPE html>
<head>
<meta charset="utf-8">
</head>
<body>
<svg></svg>
<p>In SVG (after head).</p>
<p>Test: ж</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Просмотреть файл

@ -1,10 +0,0 @@
<!DOCTYPE html>
<head>
<link rel="match" href="references/in-svg-ref.html">
</head>
<body>
<svg><meta charset="windows-1251"></svg>
<p>In SVG (after head).</p>
<p>Test: æ</p>
<p>If &#x0436;, meta takes effect</p>
</body>

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше