gecko-dev/parser/html/nsHtml5MetaScanner.cpp

822 строки
23 KiB
C++

/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2008-2015 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/*
* THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
* Please edit MetaScanner.java instead and regenerate.
*/
#define nsHtml5MetaScanner_cpp__
#include "nsAtom.h"
#include "nsHtml5AtomTable.h"
#include "nsHtml5String.h"
#include "nsNameSpaceManager.h"
#include "nsIContent.h"
#include "nsTraceRefcnt.h"
#include "jArray.h"
#include "nsHtml5ArrayCopy.h"
#include "nsAHtml5TreeBuilderState.h"
#include "nsGkAtoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsHtml5Macros.h"
#include "nsIContentHandle.h"
#include "nsHtml5Portability.h"
#include "nsHtml5ContentCreatorFunction.h"
#include "nsHtml5AttributeName.h"
#include "nsHtml5ElementName.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"
#include "nsHtml5StackNode.h"
#include "nsHtml5UTF16Buffer.h"
#include "nsHtml5StateSnapshot.h"
#include "nsHtml5Portability.h"
#include "nsHtml5MetaScanner.h"
static char16_t const CHARSET_DATA[] = {'h', 'a', 'r', 's', 'e', 't'};
staticJArray<char16_t, int32_t> nsHtml5MetaScanner::CHARSET = {
CHARSET_DATA, MOZ_ARRAY_LENGTH(CHARSET_DATA)};
static char16_t const CONTENT_DATA[] = {'o', 'n', 't', 'e', 'n', 't'};
staticJArray<char16_t, int32_t> nsHtml5MetaScanner::CONTENT = {
CONTENT_DATA, MOZ_ARRAY_LENGTH(CONTENT_DATA)};
static char16_t const HTTP_EQUIV_DATA[] = {'t', 't', 'p', '-', 'e',
'q', 'u', 'i', 'v'};
staticJArray<char16_t, int32_t> nsHtml5MetaScanner::HTTP_EQUIV = {
HTTP_EQUIV_DATA, MOZ_ARRAY_LENGTH(HTTP_EQUIV_DATA)};
static char16_t const CONTENT_TYPE_DATA[] = {'c', 'o', 'n', 't', 'e', 'n',
't', '-', 't', 'y', 'p', 'e'};
staticJArray<char16_t, int32_t> nsHtml5MetaScanner::CONTENT_TYPE = {
CONTENT_TYPE_DATA, MOZ_ARRAY_LENGTH(CONTENT_TYPE_DATA)};
nsHtml5MetaScanner::nsHtml5MetaScanner(nsHtml5TreeBuilder* tb)
: readable(nullptr),
metaState(NO),
contentIndex(INT32_MAX),
charsetIndex(INT32_MAX),
httpEquivIndex(INT32_MAX),
contentTypeIndex(INT32_MAX),
stateSave(DATA),
strBufLen(0),
strBuf(jArray<char16_t, int32_t>::newJArray(36)),
content(nullptr),
charset(nullptr),
httpEquivState(HTTP_EQUIV_NOT_SEEN),
treeBuilder(tb),
mEncoding(nullptr) {
MOZ_COUNT_CTOR(nsHtml5MetaScanner);
}
nsHtml5MetaScanner::~nsHtml5MetaScanner() {
MOZ_COUNT_DTOR(nsHtml5MetaScanner);
content.Release();
charset.Release();
}
void nsHtml5MetaScanner::stateLoop(int32_t state) {
int32_t c = -1;
bool reconsume = false;
stateloop:
for (;;) {
switch (state) {
case DATA: {
for (;;) {
if (reconsume) {
reconsume = false;
} else {
c = read();
}
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case '<': {
state = nsHtml5MetaScanner::TAG_OPEN;
NS_HTML5_BREAK(dataloop);
}
default: {
continue;
}
}
}
dataloop_end:;
MOZ_FALLTHROUGH;
}
case TAG_OPEN: {
for (;;) {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case 'm':
case 'M': {
metaState = M;
state = nsHtml5MetaScanner::TAG_NAME;
NS_HTML5_BREAK(tagopenloop);
}
case '!': {
state = nsHtml5MetaScanner::MARKUP_DECLARATION_OPEN;
NS_HTML5_CONTINUE(stateloop);
}
case '\?':
case '/': {
state = nsHtml5MetaScanner::SCAN_UNTIL_GT;
NS_HTML5_CONTINUE(stateloop);
}
case '>': {
state = nsHtml5MetaScanner::DATA;
NS_HTML5_CONTINUE(stateloop);
}
default: {
if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
metaState = NO;
state = nsHtml5MetaScanner::TAG_NAME;
NS_HTML5_BREAK(tagopenloop);
}
state = nsHtml5MetaScanner::DATA;
reconsume = true;
NS_HTML5_CONTINUE(stateloop);
}
}
}
tagopenloop_end:;
MOZ_FALLTHROUGH;
}
case TAG_NAME: {
for (;;) {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case ' ':
case '\t':
case '\n':
case '\f': {
state = nsHtml5MetaScanner::BEFORE_ATTRIBUTE_NAME;
NS_HTML5_BREAK(tagnameloop);
}
case '/': {
state = nsHtml5MetaScanner::SELF_CLOSING_START_TAG;
NS_HTML5_CONTINUE(stateloop);
}
case '>': {
state = nsHtml5MetaScanner::DATA;
NS_HTML5_CONTINUE(stateloop);
}
case 'e':
case 'E': {
if (metaState == M) {
metaState = E;
} else {
metaState = NO;
}
continue;
}
case 't':
case 'T': {
if (metaState == E) {
metaState = T;
} else {
metaState = NO;
}
continue;
}
case 'a':
case 'A': {
if (metaState == T) {
metaState = A;
} else {
metaState = NO;
}
continue;
}
default: {
metaState = NO;
continue;
}
}
}
tagnameloop_end:;
MOZ_FALLTHROUGH;
}
case BEFORE_ATTRIBUTE_NAME: {
for (;;) {
if (reconsume) {
reconsume = false;
} else {
c = read();
}
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case ' ':
case '\t':
case '\n':
case '\f': {
continue;
}
case '/': {
state = nsHtml5MetaScanner::SELF_CLOSING_START_TAG;
NS_HTML5_CONTINUE(stateloop);
}
case '>': {
if (handleTag()) {
NS_HTML5_BREAK(stateloop);
}
state = DATA;
NS_HTML5_CONTINUE(stateloop);
}
case 'c':
case 'C': {
contentIndex = 0;
charsetIndex = 0;
httpEquivIndex = INT32_MAX;
contentTypeIndex = INT32_MAX;
state = nsHtml5MetaScanner::ATTRIBUTE_NAME;
NS_HTML5_BREAK(beforeattributenameloop);
}
case 'h':
case 'H': {
contentIndex = INT32_MAX;
charsetIndex = INT32_MAX;
httpEquivIndex = 0;
contentTypeIndex = INT32_MAX;
state = nsHtml5MetaScanner::ATTRIBUTE_NAME;
NS_HTML5_BREAK(beforeattributenameloop);
}
default: {
contentIndex = INT32_MAX;
charsetIndex = INT32_MAX;
httpEquivIndex = INT32_MAX;
contentTypeIndex = INT32_MAX;
state = nsHtml5MetaScanner::ATTRIBUTE_NAME;
NS_HTML5_BREAK(beforeattributenameloop);
}
}
}
beforeattributenameloop_end:;
MOZ_FALLTHROUGH;
}
case ATTRIBUTE_NAME: {
for (;;) {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case ' ':
case '\t':
case '\n':
case '\f': {
state = nsHtml5MetaScanner::AFTER_ATTRIBUTE_NAME;
NS_HTML5_CONTINUE(stateloop);
}
case '/': {
state = nsHtml5MetaScanner::SELF_CLOSING_START_TAG;
NS_HTML5_CONTINUE(stateloop);
}
case '=': {
strBufLen = 0;
contentTypeIndex = 0;
state = nsHtml5MetaScanner::BEFORE_ATTRIBUTE_VALUE;
NS_HTML5_BREAK(attributenameloop);
}
case '>': {
if (handleTag()) {
NS_HTML5_BREAK(stateloop);
}
state = nsHtml5MetaScanner::DATA;
NS_HTML5_CONTINUE(stateloop);
}
default: {
if (metaState == A) {
if (c >= 'A' && c <= 'Z') {
c += 0x20;
}
if (contentIndex < CONTENT.length &&
c == CONTENT[contentIndex]) {
++contentIndex;
} else {
contentIndex = INT32_MAX;
}
if (charsetIndex < CHARSET.length &&
c == CHARSET[charsetIndex]) {
++charsetIndex;
} else {
charsetIndex = INT32_MAX;
}
if (httpEquivIndex < HTTP_EQUIV.length &&
c == HTTP_EQUIV[httpEquivIndex]) {
++httpEquivIndex;
} else {
httpEquivIndex = INT32_MAX;
}
}
continue;
}
}
}
attributenameloop_end:;
MOZ_FALLTHROUGH;
}
case BEFORE_ATTRIBUTE_VALUE: {
for (;;) {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case ' ':
case '\t':
case '\n':
case '\f': {
continue;
}
case '\"': {
state = nsHtml5MetaScanner::ATTRIBUTE_VALUE_DOUBLE_QUOTED;
NS_HTML5_BREAK(beforeattributevalueloop);
}
case '\'': {
state = nsHtml5MetaScanner::ATTRIBUTE_VALUE_SINGLE_QUOTED;
NS_HTML5_CONTINUE(stateloop);
}
case '>': {
if (handleTag()) {
NS_HTML5_BREAK(stateloop);
}
state = nsHtml5MetaScanner::DATA;
NS_HTML5_CONTINUE(stateloop);
}
default: {
handleCharInAttributeValue(c);
state = nsHtml5MetaScanner::ATTRIBUTE_VALUE_UNQUOTED;
NS_HTML5_CONTINUE(stateloop);
}
}
}
beforeattributevalueloop_end:;
MOZ_FALLTHROUGH;
}
case ATTRIBUTE_VALUE_DOUBLE_QUOTED: {
for (;;) {
if (reconsume) {
reconsume = false;
} else {
c = read();
}
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case '\"': {
handleAttributeValue();
state = nsHtml5MetaScanner::AFTER_ATTRIBUTE_VALUE_QUOTED;
NS_HTML5_BREAK(attributevaluedoublequotedloop);
}
default: {
handleCharInAttributeValue(c);
continue;
}
}
}
attributevaluedoublequotedloop_end:;
MOZ_FALLTHROUGH;
}
case AFTER_ATTRIBUTE_VALUE_QUOTED: {
for (;;) {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case ' ':
case '\t':
case '\n':
case '\f': {
state = nsHtml5MetaScanner::BEFORE_ATTRIBUTE_NAME;
NS_HTML5_CONTINUE(stateloop);
}
case '/': {
state = nsHtml5MetaScanner::SELF_CLOSING_START_TAG;
NS_HTML5_BREAK(afterattributevaluequotedloop);
}
case '>': {
if (handleTag()) {
NS_HTML5_BREAK(stateloop);
}
state = nsHtml5MetaScanner::DATA;
NS_HTML5_CONTINUE(stateloop);
}
default: {
state = nsHtml5MetaScanner::BEFORE_ATTRIBUTE_NAME;
reconsume = true;
NS_HTML5_CONTINUE(stateloop);
}
}
}
afterattributevaluequotedloop_end:;
MOZ_FALLTHROUGH;
}
case SELF_CLOSING_START_TAG: {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case '>': {
if (handleTag()) {
NS_HTML5_BREAK(stateloop);
}
state = nsHtml5MetaScanner::DATA;
NS_HTML5_CONTINUE(stateloop);
}
default: {
state = nsHtml5MetaScanner::BEFORE_ATTRIBUTE_NAME;
reconsume = true;
NS_HTML5_CONTINUE(stateloop);
}
}
}
case ATTRIBUTE_VALUE_UNQUOTED: {
for (;;) {
if (reconsume) {
reconsume = false;
} else {
c = read();
}
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case ' ':
case '\t':
case '\n':
case '\f': {
handleAttributeValue();
state = nsHtml5MetaScanner::BEFORE_ATTRIBUTE_NAME;
NS_HTML5_CONTINUE(stateloop);
}
case '>': {
handleAttributeValue();
if (handleTag()) {
NS_HTML5_BREAK(stateloop);
}
state = nsHtml5MetaScanner::DATA;
NS_HTML5_CONTINUE(stateloop);
}
default: {
handleCharInAttributeValue(c);
continue;
}
}
}
}
case AFTER_ATTRIBUTE_NAME: {
for (;;) {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case ' ':
case '\t':
case '\n':
case '\f': {
continue;
}
case '/': {
handleAttributeValue();
state = nsHtml5MetaScanner::SELF_CLOSING_START_TAG;
NS_HTML5_CONTINUE(stateloop);
}
case '=': {
strBufLen = 0;
contentTypeIndex = 0;
state = nsHtml5MetaScanner::BEFORE_ATTRIBUTE_VALUE;
NS_HTML5_CONTINUE(stateloop);
}
case '>': {
handleAttributeValue();
if (handleTag()) {
NS_HTML5_BREAK(stateloop);
}
state = nsHtml5MetaScanner::DATA;
NS_HTML5_CONTINUE(stateloop);
}
case 'c':
case 'C': {
contentIndex = 0;
charsetIndex = 0;
state = nsHtml5MetaScanner::ATTRIBUTE_NAME;
NS_HTML5_CONTINUE(stateloop);
}
default: {
contentIndex = INT32_MAX;
charsetIndex = INT32_MAX;
state = nsHtml5MetaScanner::ATTRIBUTE_NAME;
NS_HTML5_CONTINUE(stateloop);
}
}
}
}
case MARKUP_DECLARATION_OPEN: {
for (;;) {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case '-': {
state = nsHtml5MetaScanner::MARKUP_DECLARATION_HYPHEN;
NS_HTML5_BREAK(markupdeclarationopenloop);
}
default: {
state = nsHtml5MetaScanner::SCAN_UNTIL_GT;
reconsume = true;
NS_HTML5_CONTINUE(stateloop);
}
}
}
markupdeclarationopenloop_end:;
MOZ_FALLTHROUGH;
}
case MARKUP_DECLARATION_HYPHEN: {
for (;;) {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case '-': {
state = nsHtml5MetaScanner::COMMENT_START;
NS_HTML5_BREAK(markupdeclarationhyphenloop);
}
default: {
state = nsHtml5MetaScanner::SCAN_UNTIL_GT;
reconsume = true;
NS_HTML5_CONTINUE(stateloop);
}
}
}
markupdeclarationhyphenloop_end:;
MOZ_FALLTHROUGH;
}
case COMMENT_START: {
for (;;) {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case '-': {
state = nsHtml5MetaScanner::COMMENT_START_DASH;
NS_HTML5_CONTINUE(stateloop);
}
case '>': {
state = nsHtml5MetaScanner::DATA;
NS_HTML5_CONTINUE(stateloop);
}
default: {
state = nsHtml5MetaScanner::COMMENT;
NS_HTML5_BREAK(commentstartloop);
}
}
}
commentstartloop_end:;
MOZ_FALLTHROUGH;
}
case COMMENT: {
for (;;) {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case '-': {
state = nsHtml5MetaScanner::COMMENT_END_DASH;
NS_HTML5_BREAK(commentloop);
}
default: {
continue;
}
}
}
commentloop_end:;
MOZ_FALLTHROUGH;
}
case COMMENT_END_DASH: {
for (;;) {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case '-': {
state = nsHtml5MetaScanner::COMMENT_END;
NS_HTML5_BREAK(commentenddashloop);
}
default: {
state = nsHtml5MetaScanner::COMMENT;
NS_HTML5_CONTINUE(stateloop);
}
}
}
commentenddashloop_end:;
MOZ_FALLTHROUGH;
}
case COMMENT_END: {
for (;;) {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case '>': {
state = nsHtml5MetaScanner::DATA;
NS_HTML5_CONTINUE(stateloop);
}
case '-': {
continue;
}
default: {
state = nsHtml5MetaScanner::COMMENT;
NS_HTML5_CONTINUE(stateloop);
}
}
}
}
case COMMENT_START_DASH: {
c = read();
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case '-': {
state = nsHtml5MetaScanner::COMMENT_END;
NS_HTML5_CONTINUE(stateloop);
}
case '>': {
state = nsHtml5MetaScanner::DATA;
NS_HTML5_CONTINUE(stateloop);
}
default: {
state = nsHtml5MetaScanner::COMMENT;
NS_HTML5_CONTINUE(stateloop);
}
}
}
case ATTRIBUTE_VALUE_SINGLE_QUOTED: {
for (;;) {
if (reconsume) {
reconsume = false;
} else {
c = read();
}
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case '\'': {
handleAttributeValue();
state = nsHtml5MetaScanner::AFTER_ATTRIBUTE_VALUE_QUOTED;
NS_HTML5_CONTINUE(stateloop);
}
default: {
handleCharInAttributeValue(c);
continue;
}
}
}
}
case SCAN_UNTIL_GT: {
for (;;) {
if (reconsume) {
reconsume = false;
} else {
c = read();
}
switch (c) {
case -1: {
NS_HTML5_BREAK(stateloop);
}
case '>': {
state = nsHtml5MetaScanner::DATA;
NS_HTML5_CONTINUE(stateloop);
}
default: {
continue;
}
}
}
}
}
}
stateloop_end:;
stateSave = state;
}
void nsHtml5MetaScanner::handleCharInAttributeValue(int32_t c) {
if (metaState == A) {
if (contentIndex == CONTENT.length || charsetIndex == CHARSET.length) {
addToBuffer(c);
} else if (httpEquivIndex == HTTP_EQUIV.length) {
if (contentTypeIndex < CONTENT_TYPE.length &&
toAsciiLowerCase(c) == CONTENT_TYPE[contentTypeIndex]) {
++contentTypeIndex;
} else {
contentTypeIndex = INT32_MAX;
}
}
}
}
void nsHtml5MetaScanner::addToBuffer(int32_t c) {
if (strBufLen == strBuf.length) {
jArray<char16_t, int32_t> newBuf = jArray<char16_t, int32_t>::newJArray(
strBuf.length + (strBuf.length << 1));
nsHtml5ArrayCopy::arraycopy(strBuf, newBuf, strBuf.length);
strBuf = newBuf;
}
strBuf[strBufLen++] = (char16_t)c;
}
void nsHtml5MetaScanner::handleAttributeValue() {
if (metaState != A) {
return;
}
if (contentIndex == CONTENT.length && !content) {
content = nsHtml5Portability::newStringFromBuffer(strBuf, 0, strBufLen,
treeBuilder, false);
return;
}
if (charsetIndex == CHARSET.length && !charset) {
charset = nsHtml5Portability::newStringFromBuffer(strBuf, 0, strBufLen,
treeBuilder, false);
return;
}
if (httpEquivIndex == HTTP_EQUIV.length &&
httpEquivState == HTTP_EQUIV_NOT_SEEN) {
httpEquivState = (contentTypeIndex == CONTENT_TYPE.length)
? HTTP_EQUIV_CONTENT_TYPE
: HTTP_EQUIV_OTHER;
return;
}
}
bool nsHtml5MetaScanner::handleTag() {
bool stop = handleTagInner();
content.Release();
content = nullptr;
charset.Release();
charset = nullptr;
httpEquivState = HTTP_EQUIV_NOT_SEEN;
return stop;
}
bool nsHtml5MetaScanner::handleTagInner() {
if (!!charset && tryCharset(charset)) {
return true;
}
if (!!content && httpEquivState == HTTP_EQUIV_CONTENT_TYPE) {
nsHtml5String extract =
nsHtml5TreeBuilder::extractCharsetFromContent(content, treeBuilder);
if (!extract) {
return false;
}
bool success = tryCharset(extract);
extract.Release();
return success;
}
return false;
}
void nsHtml5MetaScanner::initializeStatics() {}
void nsHtml5MetaScanner::releaseStatics() {}
#include "nsHtml5MetaScannerCppSupplement.h"