diff --git a/lighthouse-core/gather/gather-runner.js b/lighthouse-core/gather/gather-runner.js index 13b4947c79..f1a4ba6d0d 100644 --- a/lighthouse-core/gather/gather-runner.js +++ b/lighthouse-core/gather/gather-runner.js @@ -216,6 +216,26 @@ class GatherRunner { return new LHError(LHError.errors.CHROME_INTERSTITIAL_ERROR); } + /** + * Returns an error if we try to load a non-HTML page. + * @param {LH.Artifacts.NetworkRequest|undefined} mainRecord + * @return {LH.LighthouseError|undefined} + */ + static getNonHtmlError(mainRecord) { + // MIME types are case-insenstive but Chrome normalizes MIME types to be lowercase. + const HTML_MIME_TYPE = 'text/html'; + + // If we never requested a document, there's no doctype error, let other cases handle it. + if (!mainRecord) return undefined; + + // mimeType is determined by the browser, we assume Chrome is determining mimeType correctly, + // independently of 'Content-Type' response headers, and always sending mimeType if well-formed. + if (HTML_MIME_TYPE !== mainRecord.mimeType) { + return new LHError(LHError.errors.NOT_HTML, {mimeType: mainRecord.mimeType}); + } + return undefined; + } + /** * Returns an error if the page load should be considered failed, e.g. from a * main document request failure, a security issue, etc. @@ -234,6 +254,7 @@ class GatherRunner { const networkError = GatherRunner.getNetworkError(mainRecord); const interstitialError = GatherRunner.getInterstitialError(mainRecord, networkRecords); + const nonHtmlError = GatherRunner.getNonHtmlError(mainRecord); // Check to see if we need to ignore the page load failure. // e.g. When the driver is offline, the load will fail without page offline support. @@ -247,6 +268,9 @@ class GatherRunner { // Example: `DNS_FAILURE` is better than `NO_FCP`. if (networkError) return networkError; + // Error if page is not HTML. + if (nonHtmlError) return nonHtmlError; + // Navigation errors are rather generic and express some failure of the page to render properly. // Use `navigationError` as the last resort. // Example: `NO_FCP`, the page never painted content for some unknown reason. diff --git a/lighthouse-core/lib/i18n/locales/en-US.json b/lighthouse-core/lib/i18n/locales/en-US.json index 58b67e8b76..02f2b9d937 100644 --- a/lighthouse-core/lib/i18n/locales/en-US.json +++ b/lighthouse-core/lib/i18n/locales/en-US.json @@ -1607,6 +1607,9 @@ "lighthouse-core/lib/lh-error.js | missingRequiredArtifact": { "message": "Required {artifactName} gatherer did not run." }, + "lighthouse-core/lib/lh-error.js | notHtml": { + "message": "The page provided is not HTML (served as MIME type {mimeType})." + }, "lighthouse-core/lib/lh-error.js | oldChromeDoesNotSupportFeature": { "message": "This version of Chrome is too old to support '{featureName}'. Use a newer version to see full results." }, diff --git a/lighthouse-core/lib/i18n/locales/en-XL.json b/lighthouse-core/lib/i18n/locales/en-XL.json index 3a2bebe6a5..034caa2b5c 100644 --- a/lighthouse-core/lib/i18n/locales/en-XL.json +++ b/lighthouse-core/lib/i18n/locales/en-XL.json @@ -1607,6 +1607,9 @@ "lighthouse-core/lib/lh-error.js | missingRequiredArtifact": { "message": "R̂éq̂úîŕêd́ {artifactName} ĝát̂h́êŕêŕ d̂íd̂ ńôt́ r̂ún̂." }, + "lighthouse-core/lib/lh-error.js | notHtml": { + "message": "T̂h́ê ṕâǵê ṕr̂óv̂íd̂éd̂ íŝ ńôt́ ĤT́M̂Ĺ (ŝér̂v́êd́ âś M̂ÍM̂É t̂ýp̂é {mimeType})." + }, "lighthouse-core/lib/lh-error.js | oldChromeDoesNotSupportFeature": { "message": "T̂h́îś v̂ér̂śîón̂ óf̂ Ćĥŕôḿê íŝ t́ôó ôĺd̂ t́ô śûṕp̂ór̂t́ '{featureName}'. Ûśê á n̂éŵér̂ v́êŕŝíôń t̂ó ŝéê f́ûĺl̂ ŕêśûĺt̂ś." }, diff --git a/lighthouse-core/lib/lh-error.js b/lighthouse-core/lib/lh-error.js index 7f1a5e35a1..ea28f902d9 100644 --- a/lighthouse-core/lib/lh-error.js +++ b/lighthouse-core/lib/lh-error.js @@ -47,6 +47,11 @@ const UIStrings = { internalChromeError: 'An internal Chrome error occurred. Please restart Chrome and try re-running Lighthouse.', /** Error message explaining that fetching the resources of the webpage has taken longer than the maximum time. */ requestContentTimeout: 'Fetching resource content has exceeded the allotted time', + /** + * @description Error message explaining that the webpage is non-HTML, so audits are ill-defined. + * @example {application/xml} mimeType + * */ + notHtml: 'The page provided is not HTML (served as MIME type {mimeType}).', /** Error message explaining that the provided URL Lighthouse points to is not valid, and cannot be loaded. */ urlInvalid: 'The URL you have provided appears to be invalid.', /** @@ -322,6 +327,12 @@ const ERRORS = { message: UIStrings.pageLoadFailedHung, lhrRuntimeError: true, }, + /* Used when the page is non-HTML. */ + NOT_HTML: { + code: 'NOT_HTML', + message: UIStrings.notHtml, + lhrRuntimeError: true, + }, // Protocol internal failures TRACING_ALREADY_STARTED: { diff --git a/lighthouse-core/test/gather/gather-runner-test.js b/lighthouse-core/test/gather/gather-runner-test.js index f029a64db8..7b16db09b6 100644 --- a/lighthouse-core/test/gather/gather-runner-test.js +++ b/lighthouse-core/test/gather/gather-runner-test.js @@ -38,6 +38,7 @@ const GatherRunner = { getInstallabilityErrors: makeParamsOptional(GatherRunner_.getInstallabilityErrors), getInterstitialError: makeParamsOptional(GatherRunner_.getInterstitialError), getNetworkError: makeParamsOptional(GatherRunner_.getNetworkError), + getNonHtmlError: makeParamsOptional(GatherRunner_.getNonHtmlError), getPageLoadError: makeParamsOptional(GatherRunner_.getPageLoadError), getWebAppManifest: makeParamsOptional(GatherRunner_.getWebAppManifest), initializeBaseArtifacts: makeParamsOptional(GatherRunner_.initializeBaseArtifacts), @@ -1100,6 +1101,43 @@ describe('GatherRunner', function() { }); }); + describe('#getNonHtmlError', () => { + /** + * @param {NetworkRequest} mainRecord + */ + function getAndExpectError(mainRecord) { + const error = GatherRunner.getNonHtmlError(mainRecord); + if (!error) throw new Error('expected a non-HTML error'); + return error; + } + + it('passes when the page was not requested', () => { + expect(GatherRunner.getNonHtmlError(undefined)).toBeUndefined(); + }); + + it('passes when the page is of MIME type text/html', () => { + const url = 'http://the-page.com'; + const mainRecord = new NetworkRequest(); + const mimeType = 'text/html'; + mainRecord.url = url; + mainRecord.mimeType = mimeType; + expect(GatherRunner.getNonHtmlError(mainRecord)).toBeUndefined(); + }); + + it('fails when the page is not of MIME type text/html', () => { + const url = 'http://the-page.com'; + const mimeType = 'application/xml'; + const mainRecord = new NetworkRequest(); + mainRecord.url = url; + mainRecord.mimeType = mimeType; + const error = getAndExpectError(mainRecord); + expect(error.message).toEqual('NOT_HTML'); + expect(error.code).toEqual('NOT_HTML'); + expect(error.friendlyMessage).toBeDisplayString(/is not HTML \(served as/); + }); + }); + + describe('#getPageLoadError', () => { /** * @param {RecursivePartial} passContext @@ -1127,6 +1165,7 @@ describe('GatherRunner', function() { const mainRecord = new NetworkRequest(); const loadData = {networkRecords: [mainRecord]}; mainRecord.url = passContext.url; + mainRecord.mimeType = 'text/html'; const error = GatherRunner.getPageLoadError(passContext, loadData, undefined); expect(error).toBeUndefined(); }); @@ -1139,6 +1178,7 @@ describe('GatherRunner', function() { const mainRecord = new NetworkRequest(); const loadData = {networkRecords: [mainRecord]}; mainRecord.url = 'http://example.com'; + mainRecord.mimeType = 'text/html'; const error = GatherRunner.getPageLoadError(passContext, loadData, undefined); expect(error).toBeUndefined(); }); @@ -1175,7 +1215,7 @@ describe('GatherRunner', function() { expect(error.message).toEqual('CHROME_INTERSTITIAL_ERROR'); }); - it('fails with network error next', () => { + it('fails with network error second', () => { const passContext = { url: 'http://the-page.com', passConfig: {loadFailureMode: LoadFailureMode.fatal}, @@ -1190,6 +1230,21 @@ describe('GatherRunner', function() { expect(error.message).toEqual('FAILED_DOCUMENT_REQUEST'); }); + it('fails with non-HTML error third', () => { + const passContext = { + url: 'http://the-page.com', + passConfig: {loadFailureMode: LoadFailureMode.fatal}, + }; + const mainRecord = new NetworkRequest(); + const loadData = {networkRecords: [mainRecord]}; + + mainRecord.url = passContext.url; + mainRecord.mimeType = 'application/xml'; + + const error = getAndExpectError(passContext, loadData, navigationError); + expect(error.message).toEqual('NOT_HTML'); + }); + it('fails with nav error last', () => { const passContext = { url: 'http://the-page.com', @@ -1199,6 +1254,7 @@ describe('GatherRunner', function() { const loadData = {networkRecords: [mainRecord]}; mainRecord.url = passContext.url; + mainRecord.mimeType = 'text/html'; const error = getAndExpectError(passContext, loadData, navigationError); expect(error.message).toEqual('NAVIGATION_ERROR'); @@ -1213,6 +1269,7 @@ describe('GatherRunner', function() { const loadData = {networkRecords: [mainRecord]}; mainRecord.url = passContext.url; + mainRecord.mimeType = 'text/html'; const error = getAndExpectError(passContext, loadData, navigationError); expect(error.message).toEqual('NAVIGATION_ERROR'); diff --git a/proto/lighthouse-result.proto b/proto/lighthouse-result.proto index ece8b4739a..9f1c6ce887 100644 --- a/proto/lighthouse-result.proto +++ b/proto/lighthouse-result.proto @@ -55,6 +55,8 @@ enum LighthouseError { DNS_FAILURE = 19; // A timeout in the initial connection to the debugger protocol. CRI_TIMEOUT = 20; + // The page requested was not HTML. + NOT_HTML = 21; } // The overarching Lighthouse Response object (LHR)