From 96f13c76f9a5a90882839aa78162f6baf71f5ac5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20Wollse=CC=81n?= Date: Fri, 23 Nov 2018 04:02:53 +0200 Subject: [PATCH] Refactoring towards navigation-based batching of openwpm payloads including test skeletons and date utils --- docs/TELEMETRY.md | 4 +- feature.js/StudyPayloadPreprocessor.spec.ts | 964 ++++++++++++++++++++ feature.js/StudyPayloadPreprocessor.ts | 157 ++++ feature.js/dataReceiver.ts | 21 +- feature.js/dateUtils.spec.ts | 12 + feature.js/dateUtils.ts | 6 + feature.js/index.ts | 4 + feature.js/telemetrySender.ts | 143 +-- karma.conf.js | 1 + package-lock.json | 23 +- package.json | 3 +- src/openwpmSetup.js | 2 +- 12 files changed, 1272 insertions(+), 68 deletions(-) create mode 100644 feature.js/StudyPayloadPreprocessor.spec.ts create mode 100644 feature.js/StudyPayloadPreprocessor.ts create mode 100644 feature.js/dateUtils.spec.ts create mode 100644 feature.js/dateUtils.ts diff --git a/docs/TELEMETRY.md b/docs/TELEMETRY.md index c68cc76..3bf9f30 100644 --- a/docs/TELEMETRY.md +++ b/docs/TELEMETRY.md @@ -31,7 +31,7 @@ This study has no surveys and as such has NO SPECIFIC ENDINGS. No user interaction is instrumented in this study. -The add-on listens to navigation, web requests, cookie modifications and access to certain javascript API:s, as determined by [openwpm-webext-instrumentation](https://github.com/mozilla/openwpm-webext-instrumentation/tree/refactor-legacy-sdk-code-to-webext-equivalent) ([PR](https://github.com/mozilla/openwpm-webext-instrumentation/pull/7)), using the following configuration: +The add-on listens to navigation, web requests, cookie modifications and access to certain javascript API:s, as determined by [openwpm-webext-instrumentation](https://github.com/mozilla/openwpm-webext-instrumentation/tree/enhancements-batch-2) ([PR](https://github.com/mozilla/openwpm-webext-instrumentation/pull/31)), using the following configuration: ``` navigation_instrument: true, @@ -42,4 +42,4 @@ The add-on listens to navigation, web requests, cookie modifications and access save_all_content: false, ``` -The packets received from the instrumentation are in turn encapsulated in objects of type [`StudyTelemetryPacket`](../feature.js/telemetrySender.ts) and sent as encrypted telemetry packages using [shield-studies-addon-utils](https://github.com/mozilla/shield-studies-addon-utils) ([PR](https://github.com/mozilla/shield-studies-addon-utils/pull/263)). +The packets received from the instrumentation are in turn encapsulated in objects of type [`StudyTelemetryPacket`](../feature.js/StudyPayloadPreprocessor.ts) and sent as encrypted telemetry packages using [shield-studies-addon-utils](https://github.com/mozilla/shield-studies-addon-utils) ([PR](https://github.com/mozilla/shield-studies-addon-utils/pull/263)). diff --git a/feature.js/StudyPayloadPreprocessor.spec.ts b/feature.js/StudyPayloadPreprocessor.spec.ts new file mode 100644 index 0000000..a458b1f --- /dev/null +++ b/feature.js/StudyPayloadPreprocessor.spec.ts @@ -0,0 +1,964 @@ +import { assert } from "chai"; +import { + OpenWPMType, + StudyPayloadEnvelope, + StudyPayloadPreprocessor, +} from "./StudyPayloadPreprocessor"; +import { parseIsoDateTimeString } from "./dateUtils"; + +describe("StudyPayloadPreprocessor", function() { + it("should exist", function() { + const studyPayloadPreprocessor = new StudyPayloadPreprocessor(); + assert.isNotEmpty(studyPayloadPreprocessor); + }); + + const firstVisitIsoDateTimeString = "2018-11-23T01:34:40.475Z"; + const secondVisitIsoDateTimeString = "2018-11-23T01:34:45.488Z"; + const firstVisitDateTime = parseIsoDateTimeString( + firstVisitIsoDateTimeString, + ); + const secondVisitDateTime = parseIsoDateTimeString( + secondVisitIsoDateTimeString, + ); + + describe("Example.com visit", function() { + const exampleDotComVisitQueue: StudyPayloadEnvelope[] = [ + { + type: "http_requests" as OpenWPMType, + httpRequest: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 1, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "20", + url: "http://example.com/", + method: "GET", + time_stamp: "2018-11-23T01:34:40.487Z", + referrer: "", + headers: + '[["Host","example.com"],["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0"],["Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"],["Accept-Language","en-US,en;q=0.5"],["Accept-Encoding","gzip, deflate"],["Connection","keep-alive"],["Upgrade-Insecure-Requests","1"]]', + is_XHR: 0, + is_full_page: 1, + is_frame_load: 0, + triggering_origin: "undefined", + loading_origin: "undefined", + loading_href: "undefined", + resource_type: "main_frame", + top_level_url: "about:blank", + parent_frame_id: -1, + frame_ancestors: "[]", + }, + tabActiveDwellTime: 7750, + }, + { + type: "navigations" as OpenWPMType, + navigation: { + crawl_id: 0, + incognito: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + window_id: 3, + tab_id: 1, + frame_id: 0, + window_width: 1280, + window_height: 946, + window_type: "normal", + tab_width: 1280, + tab_height: 872, + tab_cookie_store_id: "firefox-default", + uuid: "290cb5b2-828c-4eec-9626-69463b7b4d05", + url: "http://example.com/", + transition_qualifiers: '["from_address_bar"]', + transition_type: "typed", + committed_event_ordinal: 2, + committed_time_stamp: "2018-11-23T01:34:40.769Z", + parent_frame_id: -1, + before_navigate_event_ordinal: 0, + before_navigate_time_stamp: "2018-11-23T01:34:40.475Z", + }, + tabActiveDwellTime: 8250, + }, + { + type: "http_responses" as OpenWPMType, + httpResponse: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 3, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "20", + is_cached: 0, + url: "http://example.com/", + method: "GET", + response_status: 200, + response_status_text: "HTTP/1.1 200 OK", + time_stamp: "2018-11-23T01:34:40.765Z", + headers: + '[["Content-Encoding","gzip"],["Accept-Ranges","bytes"],["Cache-Control","max-age=604800"],["Content-Type","text/html; charset=UTF-8"],["Date","Fri, 23 Nov 2018 01:34:40 GMT"],["Etag","\\"1541025663\\""],["Expires","Fri, 30 Nov 2018 01:34:40 GMT"],["Last-Modified","Fri, 09 Aug 2013 23:54:35 GMT"],["Server","ECS (lga/1390)"],["Vary","Accept-Encoding"],["X-Cache","HIT"],["Content-Length","606"]]', + location: "", + }, + tabActiveDwellTime: 8250, + }, + { + type: "http_requests" as OpenWPMType, + httpRequest: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 4, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "21", + url: "http://example.com/favicon.ico", + method: "GET", + time_stamp: "2018-11-23T01:34:40.846Z", + referrer: "", + headers: + '[["Host","example.com"],["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0"],["Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"],["Accept-Language","en-US,en;q=0.5"],["Accept-Encoding","gzip, deflate"],["Connection","keep-alive"]]', + is_XHR: 0, + is_full_page: 1, + is_frame_load: 0, + triggering_origin: "http://example.com", + loading_origin: "http://example.com", + loading_href: "http://example.com/", + resource_type: "image", + top_level_url: "http://example.com/", + parent_frame_id: -1, + frame_ancestors: "[]", + }, + tabActiveDwellTime: 8250, + }, + { + type: "http_responses" as OpenWPMType, + httpResponse: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 5, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "21", + is_cached: 0, + url: "http://example.com/favicon.ico", + method: "GET", + response_status: 404, + response_status_text: "HTTP/1.1 404 Not Found", + time_stamp: "2018-11-23T01:34:40.982Z", + headers: + '[["Content-Encoding","gzip"],["Accept-Ranges","bytes"],["Cache-Control","max-age=604800"],["Content-Type","text/html; charset=UTF-8"],["Date","Fri, 23 Nov 2018 01:34:40 GMT"],["Expires","Fri, 30 Nov 2018 01:34:40 GMT"],["Last-Modified","Fri, 16 Nov 2018 21:05:05 GMT"],["Server","ECS (lga/1391)"],["Vary","Accept-Encoding"],["X-Cache","404-HIT"],["Content-Length","606"]]', + location: "", + }, + tabActiveDwellTime: 8250, + }, + ]; + const studyPayloadPreprocessor = new StudyPayloadPreprocessor(); + exampleDotComVisitQueue.map( + (studyPayloadEnvelope: StudyPayloadEnvelope) => { + studyPayloadPreprocessor.addToQueue(studyPayloadEnvelope); + }, + ); + + describe("Queue processing 5 seconds after the visit", function() { + const nowIsoDateTimeString = "2018-11-23T01:20:08.260Z"; + const nowDateTime = parseIsoDateTimeString(nowIsoDateTimeString); + it("should not yield any navigation batches to send", function() { + studyPayloadPreprocessor.processQueue(); + assert.equal(1, 1); + }); + }); + + describe("Queue processing 20 seconds after the visit", function() { + it("should yield relevant navigation batches to send", function() { + studyPayloadPreprocessor.processQueue(); + assert.equal(1, 1); + }); + }); + }); + + describe("Example.com visit followed by 'More information' link click", function() { + const exampleDotComVisitFollowedByMoreInformationLinkClickQueue = [ + { + type: "http_requests" as OpenWPMType, + httpRequest: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 1, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "20", + url: "http://example.com/", + method: "GET", + time_stamp: "2018-11-23T01:34:40.487Z", + referrer: "", + headers: + '[["Host","example.com"],["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0"],["Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"],["Accept-Language","en-US,en;q=0.5"],["Accept-Encoding","gzip, deflate"],["Connection","keep-alive"],["Upgrade-Insecure-Requests","1"]]', + is_XHR: 0, + is_full_page: 1, + is_frame_load: 0, + triggering_origin: "undefined", + loading_origin: "undefined", + loading_href: "undefined", + resource_type: "main_frame", + top_level_url: "about:blank", + parent_frame_id: -1, + frame_ancestors: "[]", + }, + tabActiveDwellTime: 7750, + }, + { + type: "navigations" as OpenWPMType, + navigation: { + crawl_id: 0, + incognito: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + window_id: 3, + tab_id: 1, + frame_id: 0, + window_width: 1280, + window_height: 946, + window_type: "normal", + tab_width: 1280, + tab_height: 872, + tab_cookie_store_id: "firefox-default", + uuid: "290cb5b2-828c-4eec-9626-69463b7b4d05", + url: "http://example.com/", + transition_qualifiers: '["from_address_bar"]', + transition_type: "typed", + committed_event_ordinal: 2, + committed_time_stamp: "2018-11-23T01:34:40.769Z", + parent_frame_id: -1, + before_navigate_event_ordinal: 0, + before_navigate_time_stamp: "2018-11-23T01:34:40.475Z", + }, + tabActiveDwellTime: 8250, + }, + { + type: "http_responses" as OpenWPMType, + httpResponse: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 3, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "20", + is_cached: 0, + url: "http://example.com/", + method: "GET", + response_status: 200, + response_status_text: "HTTP/1.1 200 OK", + time_stamp: "2018-11-23T01:34:40.765Z", + headers: + '[["Content-Encoding","gzip"],["Accept-Ranges","bytes"],["Cache-Control","max-age=604800"],["Content-Type","text/html; charset=UTF-8"],["Date","Fri, 23 Nov 2018 01:34:40 GMT"],["Etag","\\"1541025663\\""],["Expires","Fri, 30 Nov 2018 01:34:40 GMT"],["Last-Modified","Fri, 09 Aug 2013 23:54:35 GMT"],["Server","ECS (lga/1390)"],["Vary","Accept-Encoding"],["X-Cache","HIT"],["Content-Length","606"]]', + location: "", + }, + tabActiveDwellTime: 8250, + }, + { + type: "http_requests" as OpenWPMType, + httpRequest: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 4, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "21", + url: "http://example.com/favicon.ico", + method: "GET", + time_stamp: "2018-11-23T01:34:40.846Z", + referrer: "", + headers: + '[["Host","example.com"],["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0"],["Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"],["Accept-Language","en-US,en;q=0.5"],["Accept-Encoding","gzip, deflate"],["Connection","keep-alive"]]', + is_XHR: 0, + is_full_page: 1, + is_frame_load: 0, + triggering_origin: "http://example.com", + loading_origin: "http://example.com", + loading_href: "http://example.com/", + resource_type: "image", + top_level_url: "http://example.com/", + parent_frame_id: -1, + frame_ancestors: "[]", + }, + tabActiveDwellTime: 8250, + }, + { + type: "http_responses" as OpenWPMType, + httpResponse: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 5, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "21", + is_cached: 0, + url: "http://example.com/favicon.ico", + method: "GET", + response_status: 404, + response_status_text: "HTTP/1.1 404 Not Found", + time_stamp: "2018-11-23T01:34:40.982Z", + headers: + '[["Content-Encoding","gzip"],["Accept-Ranges","bytes"],["Cache-Control","max-age=604800"],["Content-Type","text/html; charset=UTF-8"],["Date","Fri, 23 Nov 2018 01:34:40 GMT"],["Expires","Fri, 30 Nov 2018 01:34:40 GMT"],["Last-Modified","Fri, 16 Nov 2018 21:05:05 GMT"],["Server","ECS (lga/1391)"],["Vary","Accept-Encoding"],["X-Cache","404-HIT"],["Content-Length","606"]]', + location: "", + }, + tabActiveDwellTime: 8250, + }, + { + type: "http_requests" as OpenWPMType, + httpRequest: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 7, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "22", + url: "http://www.iana.org/domains/example", + method: "GET", + time_stamp: "2018-11-23T01:34:45.493Z", + referrer: "http://example.com/", + headers: + '[["Host","www.iana.org"],["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0"],["Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"],["Accept-Language","en-US,en;q=0.5"],["Accept-Encoding","gzip, deflate"],["Referer","http://example.com/"],["Connection","keep-alive"],["Upgrade-Insecure-Requests","1"]]', + is_XHR: 0, + is_full_page: 1, + is_frame_load: 0, + triggering_origin: "http://example.com", + loading_origin: "undefined", + loading_href: "undefined", + resource_type: "main_frame", + top_level_url: "http://example.com/", + parent_frame_id: -1, + frame_ancestors: "[]", + }, + tabActiveDwellTime: 12750, + }, + { + type: "http_redirects" as OpenWPMType, + httpRedirect: { + incognito: 0, + crawl_id: 0, + old_request_url: "http://www.iana.org/domains/example", + old_request_id: "22", + new_request_url: "https://www.iana.org/domains/reserved", + new_request_id: null, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 8, + window_id: 3, + tab_id: 1, + frame_id: 0, + response_status: 302, + response_status_text: "HTTP/1.1 302 Found", + time_stamp: "2018-11-23T01:34:45.870Z", + }, + tabActiveDwellTime: 13250, + }, + { + type: "http_requests" as OpenWPMType, + httpRequest: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 9, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "22", + url: "https://www.iana.org/domains/reserved", + method: "GET", + time_stamp: "2018-11-23T01:34:45.874Z", + referrer: "http://example.com/", + headers: + '[["Host","www.iana.org"],["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0"],["Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"],["Accept-Language","en-US,en;q=0.5"],["Accept-Encoding","gzip, deflate, br"],["Referer","http://example.com/"],["Connection","keep-alive"],["Upgrade-Insecure-Requests","1"]]', + is_XHR: 0, + is_full_page: 1, + is_frame_load: 0, + triggering_origin: "http://example.com", + loading_origin: "undefined", + loading_href: "undefined", + resource_type: "main_frame", + top_level_url: "http://example.com/", + parent_frame_id: -1, + frame_ancestors: "[]", + }, + tabActiveDwellTime: 13250, + }, + { + type: "navigations" as OpenWPMType, + navigation: { + crawl_id: 0, + incognito: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + window_id: 3, + tab_id: 1, + frame_id: 0, + window_width: 1280, + window_height: 946, + window_type: "normal", + tab_width: 1280, + tab_height: 872, + tab_cookie_store_id: "firefox-default", + uuid: "bd1e0e8e-6c72-4a93-983b-d59c336b4472", + url: "https://www.iana.org/domains/reserved", + transition_qualifiers: '["server_redirect"]', + transition_type: "link", + committed_event_ordinal: 10, + committed_time_stamp: "2018-11-23T01:34:46.962Z", + parent_frame_id: -1, + before_navigate_event_ordinal: 6, + before_navigate_time_stamp: "2018-11-23T01:34:45.488Z", + }, + tabActiveDwellTime: 14250, + }, + { + type: "http_requests" as OpenWPMType, + httpRequest: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 11, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "24", + url: "https://www.iana.org/_css/2015.1/screen.css", + method: "GET", + time_stamp: "2018-11-23T01:34:46.984Z", + referrer: "https://www.iana.org/domains/reserved", + headers: + '[["Host","www.iana.org"],["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0"],["Accept","text/css,*/*;q=0.1"],["Accept-Language","en-US,en;q=0.5"],["Accept-Encoding","gzip, deflate, br"],["Referer","https://www.iana.org/domains/reserved"],["Connection","keep-alive"]]', + is_XHR: 0, + is_full_page: 1, + is_frame_load: 0, + triggering_origin: "https://www.iana.org", + loading_origin: "https://www.iana.org", + loading_href: "https://www.iana.org/domains/reserved", + resource_type: "stylesheet", + top_level_url: "https://www.iana.org/domains/reserved", + parent_frame_id: -1, + frame_ancestors: "[]", + }, + tabActiveDwellTime: 14250, + }, + { + type: "http_requests" as OpenWPMType, + httpRequest: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 12, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "25", + url: "https://www.iana.org/_css/2015.1/print.css", + method: "GET", + time_stamp: "2018-11-23T01:34:46.985Z", + referrer: "https://www.iana.org/domains/reserved", + headers: + '[["Host","www.iana.org"],["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0"],["Accept","text/css,*/*;q=0.1"],["Accept-Language","en-US,en;q=0.5"],["Accept-Encoding","gzip, deflate, br"],["Referer","https://www.iana.org/domains/reserved"],["Connection","keep-alive"]]', + is_XHR: 0, + is_full_page: 1, + is_frame_load: 0, + triggering_origin: "https://www.iana.org", + loading_origin: "https://www.iana.org", + loading_href: "https://www.iana.org/domains/reserved", + resource_type: "stylesheet", + top_level_url: "https://www.iana.org/domains/reserved", + parent_frame_id: -1, + frame_ancestors: "[]", + }, + tabActiveDwellTime: 14250, + }, + { + type: "http_requests" as OpenWPMType, + httpRequest: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 13, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "26", + url: "https://www.iana.org/_js/2013.1/jquery.js", + method: "GET", + time_stamp: "2018-11-23T01:34:46.986Z", + referrer: "https://www.iana.org/domains/reserved", + headers: + '[["Host","www.iana.org"],["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0"],["Accept","*/*"],["Accept-Language","en-US,en;q=0.5"],["Accept-Encoding","gzip, deflate, br"],["Referer","https://www.iana.org/domains/reserved"],["Connection","keep-alive"]]', + is_XHR: 0, + is_full_page: 1, + is_frame_load: 0, + triggering_origin: "https://www.iana.org", + loading_origin: "https://www.iana.org", + loading_href: "https://www.iana.org/domains/reserved", + resource_type: "script", + top_level_url: "https://www.iana.org/domains/reserved", + parent_frame_id: -1, + frame_ancestors: "[]", + }, + tabActiveDwellTime: 14250, + }, + { + type: "http_requests" as OpenWPMType, + httpRequest: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 14, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "27", + url: "https://www.iana.org/_js/2013.1/iana.js", + method: "GET", + time_stamp: "2018-11-23T01:34:46.987Z", + referrer: "https://www.iana.org/domains/reserved", + headers: + '[["Host","www.iana.org"],["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0"],["Accept","*/*"],["Accept-Language","en-US,en;q=0.5"],["Accept-Encoding","gzip, deflate, br"],["Referer","https://www.iana.org/domains/reserved"],["Connection","keep-alive"]]', + is_XHR: 0, + is_full_page: 1, + is_frame_load: 0, + triggering_origin: "https://www.iana.org", + loading_origin: "https://www.iana.org", + loading_href: "https://www.iana.org/domains/reserved", + resource_type: "script", + top_level_url: "https://www.iana.org/domains/reserved", + parent_frame_id: -1, + frame_ancestors: "[]", + }, + tabActiveDwellTime: 14250, + }, + { + type: "http_requests" as OpenWPMType, + httpRequest: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 15, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "28", + url: "https://www.iana.org/_img/2013.1/iana-logo-header.svg", + method: "GET", + time_stamp: "2018-11-23T01:34:47.001Z", + referrer: "https://www.iana.org/domains/reserved", + headers: + '[["Host","www.iana.org"],["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0"],["Accept","image/webp,*/*"],["Accept-Language","en-US,en;q=0.5"],["Accept-Encoding","gzip, deflate, br"],["Referer","https://www.iana.org/domains/reserved"],["Connection","keep-alive"]]', + is_XHR: 0, + is_full_page: 1, + is_frame_load: 0, + triggering_origin: "https://www.iana.org", + loading_origin: "https://www.iana.org", + loading_href: "https://www.iana.org/domains/reserved", + resource_type: "image", + top_level_url: "https://www.iana.org/domains/reserved", + parent_frame_id: -1, + frame_ancestors: "[]", + }, + tabActiveDwellTime: 14250, + }, + { + type: "http_responses" as OpenWPMType, + httpResponse: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 16, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "22", + is_cached: 0, + url: "https://www.iana.org/domains/reserved", + method: "GET", + response_status: 200, + response_status_text: "HTTP/1.1 200 OK", + time_stamp: "2018-11-23T01:34:47.127Z", + headers: + '[["Date","Fri, 23 Nov 2018 01:19:53 GMT"],["X-Frame-Options","SAMEORIGIN"],["Referrer-Policy","origin-when-cross-origin"],["Content-Security-Policy","upgrade-insecure-requests"],["Vary","Accept-Encoding"],["Content-Length","3155"],["Content-Encoding","gzip"],["Last-Modified","Tue, 21 Jul 2015 00:49:48 GMT"],["Cache-control","public, s-maxage=900, max-age=7202"],["Expires","Fri, 23 Nov 2018 03:19:53 GMT"],["Content-Type","text/html; charset=UTF-8"],["Server","Apache"],["Strict-Transport-Security","max-age=48211200; preload"],["X-Cache-Hits","131"],["Accept-Ranges","bytes"],["Connection","keep-alive"]]', + location: "", + }, + tabActiveDwellTime: 14500, + }, + { + type: "http_responses" as OpenWPMType, + httpResponse: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 17, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "24", + is_cached: 0, + url: "https://www.iana.org/_css/2015.1/screen.css", + method: "GET", + response_status: 200, + response_status_text: "HTTP/1.1 200 OK", + time_stamp: "2018-11-23T01:34:47.305Z", + headers: + '[["Date","Fri, 23 Nov 2018 01:15:57 GMT"],["X-Frame-Options","SAMEORIGIN"],["Referrer-Policy","origin-when-cross-origin"],["Content-Security-Policy","upgrade-insecure-requests"],["Vary","Accept-Encoding"],["Content-Length","9189"],["Content-Encoding","gzip"],["Last-Modified","Thu, 24 May 2018 19:40:12 GMT"],["Cache-control","public, s-maxage=1800, max-age=7205"],["Expires","Fri, 30 Nov 2018 01:15:57 GMT"],["Content-Type","text/css"],["Server","Apache"],["Strict-Transport-Security","max-age=48211200; preload"],["X-Cache-Hits","169"],["Accept-Ranges","bytes"],["Connection","keep-alive"]]', + location: "", + }, + tabActiveDwellTime: 14500, + }, + { + type: "http_responses" as OpenWPMType, + httpResponse: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 18, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "25", + is_cached: 0, + url: "https://www.iana.org/_css/2015.1/print.css", + method: "GET", + response_status: 200, + response_status_text: "HTTP/1.1 200 OK", + time_stamp: "2018-11-23T01:34:47.554Z", + headers: + '[["Date","Fri, 23 Nov 2018 01:13:22 GMT"],["X-Frame-Options","SAMEORIGIN"],["Referrer-Policy","origin-when-cross-origin"],["Content-Security-Policy","upgrade-insecure-requests"],["Vary","Accept-Encoding"],["Content-Length","7661"],["Content-Encoding","gzip"],["Last-Modified","Sat, 01 Oct 2016 16:44:02 GMT"],["Cache-control","public, s-maxage=1800, max-age=7205"],["Expires","Fri, 30 Nov 2018 01:13:22 GMT"],["Content-Type","text/css"],["Server","Apache"],["Strict-Transport-Security","max-age=48211200; preload"],["X-Cache-Hits","173"],["Accept-Ranges","bytes"],["Connection","keep-alive"]]', + location: "", + }, + tabActiveDwellTime: 14750, + }, + { + type: "http_responses" as OpenWPMType, + httpResponse: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 19, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "27", + is_cached: 0, + url: "https://www.iana.org/_js/2013.1/iana.js", + method: "GET", + response_status: 200, + response_status_text: "HTTP/1.1 200 OK", + time_stamp: "2018-11-23T01:34:47.576Z", + headers: + '[["Date","Thu, 22 Nov 2018 09:27:09 GMT"],["Expires","Thu, 29 Nov 2018 09:27:09 GMT"],["Vary","Accept-Encoding"],["X-Frame-Options","SAMEORIGIN"],["Referrer-Policy","origin-when-cross-origin"],["Content-Security-Policy","upgrade-insecure-requests"],["Last-Modified","Mon, 08 Aug 2016 18:14:43 GMT"],["Cache-control","public, s-maxage=1800, max-age=7205"],["Content-Encoding","gzip"],["Content-Type","application/javascript"],["Server","Apache"],["Strict-Transport-Security","max-age=48211200; preload"],["X-Cache-Hits","13770"],["Accept-Ranges","bytes"],["Content-Length","81"],["Connection","keep-alive"]]', + location: "", + }, + tabActiveDwellTime: 14750, + }, + { + type: "http_responses" as OpenWPMType, + httpResponse: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 20, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "26", + is_cached: 0, + url: "https://www.iana.org/_js/2013.1/jquery.js", + method: "GET", + response_status: 200, + response_status_text: "HTTP/1.1 200 OK", + time_stamp: "2018-11-23T01:34:47.726Z", + headers: + '[["Date","Fri, 23 Nov 2018 01:17:14 GMT"],["X-Frame-Options","SAMEORIGIN"],["Referrer-Policy","origin-when-cross-origin"],["Content-Security-Policy","upgrade-insecure-requests"],["Vary","Accept-Encoding"],["Content-Length","32980"],["Content-Encoding","gzip"],["Last-Modified","Mon, 22 Apr 2013 18:18:55 GMT"],["Cache-control","public, s-maxage=1800, max-age=7205"],["Expires","Fri, 30 Nov 2018 01:17:14 GMT"],["Content-Type","application/javascript"],["Server","Apache"],["Strict-Transport-Security","max-age=48211200; preload"],["X-Cache-Hits","176"],["Accept-Ranges","bytes"],["Connection","keep-alive"]]', + location: "", + }, + tabActiveDwellTime: 15000, + }, + { + type: "http_requests" as OpenWPMType, + httpRequest: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 21, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "29", + url: "https://www.iana.org/_img/2015.1/fonts/NotoSans-Regular.woff", + method: "GET", + time_stamp: "2018-11-23T01:34:47.779Z", + referrer: "https://www.iana.org/_css/2015.1/screen.css", + headers: + '[["Host","www.iana.org"],["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0"],["Accept","application/font-woff2;q=1.0,application/font-woff;q=0.9,*/*;q=0.8"],["Accept-Language","en-US,en;q=0.5"],["Accept-Encoding","gzip, deflate, br"],["Referer","https://www.iana.org/_css/2015.1/screen.css"],["Connection","keep-alive"]]', + is_XHR: 0, + is_full_page: 1, + is_frame_load: 0, + triggering_origin: "https://www.iana.org", + loading_origin: "https://www.iana.org", + loading_href: "https://www.iana.org/domains/reserved", + resource_type: "font", + top_level_url: "https://www.iana.org/domains/reserved", + parent_frame_id: -1, + frame_ancestors: "[]", + }, + tabActiveDwellTime: 15000, + }, + { + type: "http_requests" as OpenWPMType, + httpRequest: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 22, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "30", + url: "https://www.iana.org/_img/2015.1/fonts/NotoSans-Bold.woff", + method: "GET", + time_stamp: "2018-11-23T01:34:47.789Z", + referrer: "https://www.iana.org/_css/2015.1/screen.css", + headers: + '[["Host","www.iana.org"],["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0"],["Accept","application/font-woff2;q=1.0,application/font-woff;q=0.9,*/*;q=0.8"],["Accept-Language","en-US,en;q=0.5"],["Accept-Encoding","gzip, deflate, br"],["Referer","https://www.iana.org/_css/2015.1/screen.css"],["Connection","keep-alive"]]', + is_XHR: 0, + is_full_page: 1, + is_frame_load: 0, + triggering_origin: "https://www.iana.org", + loading_origin: "https://www.iana.org", + loading_href: "https://www.iana.org/domains/reserved", + resource_type: "font", + top_level_url: "https://www.iana.org/domains/reserved", + parent_frame_id: -1, + frame_ancestors: "[]", + }, + tabActiveDwellTime: 15000, + }, + { + type: "http_requests" as OpenWPMType, + httpRequest: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 23, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "31", + url: + "https://www.iana.org/_img/2015.1/fonts/SourceCodePro-Regular.woff", + method: "GET", + time_stamp: "2018-11-23T01:34:47.794Z", + referrer: "https://www.iana.org/_css/2015.1/screen.css", + headers: + '[["Host","www.iana.org"],["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0"],["Accept","application/font-woff2;q=1.0,application/font-woff;q=0.9,*/*;q=0.8"],["Accept-Language","en-US,en;q=0.5"],["Accept-Encoding","gzip, deflate, br"],["Referer","https://www.iana.org/_css/2015.1/screen.css"],["Connection","keep-alive"]]', + is_XHR: 0, + is_full_page: 1, + is_frame_load: 0, + triggering_origin: "https://www.iana.org", + loading_origin: "https://www.iana.org", + loading_href: "https://www.iana.org/domains/reserved", + resource_type: "font", + top_level_url: "https://www.iana.org/domains/reserved", + parent_frame_id: -1, + frame_ancestors: "[]", + }, + tabActiveDwellTime: 15000, + }, + { + type: "http_responses" as OpenWPMType, + httpResponse: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 24, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "28", + is_cached: 0, + url: "https://www.iana.org/_img/2013.1/iana-logo-header.svg", + method: "GET", + response_status: 200, + response_status_text: "HTTP/1.1 200 OK", + time_stamp: "2018-11-23T01:34:48.073Z", + headers: + '[["Date","Thu, 22 Nov 2018 10:12:40 GMT"],["Last-Modified","Fri, 04 Jan 2013 01:17:14 GMT"],["Vary","Accept-Encoding"],["Expires","Thu, 29 Nov 2018 10:12:40 GMT"],["X-Frame-Options","SAMEORIGIN"],["Referrer-Policy","origin-when-cross-origin"],["Content-Security-Policy","upgrade-insecure-requests"],["Content-Length","32870"],["Content-Type","image/svg+xml"],["Server","Apache"],["Strict-Transport-Security","max-age=48211200; preload"],["X-Cache-Hits","10380"],["Accept-Ranges","bytes"],["Connection","keep-alive"]]', + location: "", + }, + tabActiveDwellTime: 15250, + }, + { + type: "http_requests" as OpenWPMType, + httpRequest: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 25, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "32", + url: "https://www.iana.org/_img/bookmark_icon.ico", + method: "GET", + time_stamp: "2018-11-23T01:34:48.375Z", + referrer: "", + headers: + '[["Host","www.iana.org"],["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0"],["Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"],["Accept-Language","en-US,en;q=0.5"],["Accept-Encoding","gzip, deflate, br"],["Connection","keep-alive"]]', + is_XHR: 0, + is_full_page: 1, + is_frame_load: 0, + triggering_origin: "https://www.iana.org", + loading_origin: "https://www.iana.org", + loading_href: "https://www.iana.org/domains/reserved", + resource_type: "image", + top_level_url: "https://www.iana.org/domains/reserved", + parent_frame_id: -1, + frame_ancestors: "[]", + }, + tabActiveDwellTime: 15500, + }, + { + type: "http_responses" as OpenWPMType, + httpResponse: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 26, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "31", + is_cached: 0, + url: + "https://www.iana.org/_img/2015.1/fonts/SourceCodePro-Regular.woff", + method: "GET", + response_status: 200, + response_status_text: "HTTP/1.1 200 OK", + time_stamp: "2018-11-23T01:34:48.500Z", + headers: + '[["Date","Thu, 22 Nov 2018 16:29:13 GMT"],["Last-Modified","Wed, 26 Aug 2015 00:25:13 GMT"],["Vary","Accept-Encoding"],["Expires","Thu, 29 Nov 2018 16:29:13 GMT"],["X-Frame-Options","SAMEORIGIN"],["Referrer-Policy","origin-when-cross-origin"],["Content-Security-Policy","upgrade-insecure-requests"],["Content-Length","89024"],["Content-Type","application/font-woff"],["Server","Apache"],["Strict-Transport-Security","max-age=48211200; preload"],["X-Cache-Hits","6467"],["Accept-Ranges","bytes"],["Connection","keep-alive"]]', + location: "", + }, + tabActiveDwellTime: 15750, + }, + { + type: "http_responses" as OpenWPMType, + httpResponse: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 27, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "29", + is_cached: 0, + url: "https://www.iana.org/_img/2015.1/fonts/NotoSans-Regular.woff", + method: "GET", + response_status: 200, + response_status_text: "HTTP/1.1 200 OK", + time_stamp: "2018-11-23T01:34:48.657Z", + headers: + '[["Date","Thu, 22 Nov 2018 10:12:51 GMT"],["Last-Modified","Wed, 26 Aug 2015 00:25:13 GMT"],["Vary","Accept-Encoding"],["Expires","Thu, 29 Nov 2018 10:12:51 GMT"],["X-Frame-Options","SAMEORIGIN"],["Referrer-Policy","origin-when-cross-origin"],["Content-Security-Policy","upgrade-insecure-requests"],["Content-Length","157504"],["Content-Type","application/font-woff"],["Server","Apache"],["Strict-Transport-Security","max-age=48211200; preload"],["X-Cache-Hits","9494"],["Accept-Ranges","bytes"],["Connection","keep-alive"]]', + location: "", + }, + tabActiveDwellTime: 16000, + }, + { + type: "http_responses" as OpenWPMType, + httpResponse: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 28, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "30", + is_cached: 0, + url: "https://www.iana.org/_img/2015.1/fonts/NotoSans-Bold.woff", + method: "GET", + response_status: 200, + response_status_text: "HTTP/1.1 200 OK", + time_stamp: "2018-11-23T01:34:48.670Z", + headers: + '[["Date","Thu, 22 Nov 2018 18:11:54 GMT"],["Expires","Thu, 29 Nov 2018 18:11:54 GMT"],["Vary","Accept-Encoding"],["X-Frame-Options","SAMEORIGIN"],["Referrer-Policy","origin-when-cross-origin"],["Content-Security-Policy","upgrade-insecure-requests"],["Last-Modified","Wed, 26 Aug 2015 00:25:13 GMT"],["Cache-control","public, s-maxage=1800, max-age=7205"],["Content-Type","application/font-woff"],["Server","Apache"],["Strict-Transport-Security","max-age=48211200; preload"],["X-Cache-Hits","5123"],["Accept-Ranges","bytes"],["Content-Length","156596"],["Connection","keep-alive"]]', + location: "", + }, + tabActiveDwellTime: 16000, + }, + { + type: "http_responses" as OpenWPMType, + httpResponse: { + incognito: 0, + crawl_id: 0, + extension_session_uuid: "ec32bcbd-7fee-4aaf-b36d-0ef56557e4fd", + event_ordinal: 29, + window_id: 3, + tab_id: 1, + frame_id: 0, + request_id: "32", + is_cached: 0, + url: "https://www.iana.org/_img/bookmark_icon.ico", + method: "GET", + response_status: 200, + response_status_text: "HTTP/1.1 200 OK", + time_stamp: "2018-11-23T01:34:48.729Z", + headers: + '[["Date","Fri, 23 Nov 2018 01:17:38 GMT"],["X-Frame-Options","SAMEORIGIN"],["Referrer-Policy","origin-when-cross-origin"],["Content-Security-Policy","upgrade-insecure-requests"],["Vary","Accept-Encoding"],["Content-Length","4426"],["Content-Encoding","gzip"],["Last-Modified","Fri, 04 Jan 2013 01:17:14 GMT"],["Cache-control","public, s-maxage=1800, max-age=7205"],["Expires","Fri, 30 Nov 2018 01:17:38 GMT"],["Content-Type","image/vnd.microsoft.icon"],["Server","Apache"],["Strict-Transport-Security","max-age=48211200; preload"],["X-Cache-Hits","108"],["Accept-Ranges","bytes"],["Connection","keep-alive"]]', + location: "", + }, + tabActiveDwellTime: 16000, + }, + ]; + const studyPayloadPreprocessor = new StudyPayloadPreprocessor(); + exampleDotComVisitFollowedByMoreInformationLinkClickQueue.map( + (studyPayloadEnvelope: StudyPayloadEnvelope) => { + studyPayloadPreprocessor.addToQueue(studyPayloadEnvelope); + }, + ); + + describe("Queue processing 5 seconds after the first visit (around the time of the second visit)", function() { + it("should not yield any navigation batches to send", function() { + studyPayloadPreprocessor.processQueue(); + assert.equal(1, 1); + }); + }); + + describe("Subsequent queue processing 12 seconds after the visit (around 7 seconds after the second visit)", function() { + it("should yield relevant navigation batches to send", function() { + studyPayloadPreprocessor.processQueue(); + assert.equal(1, 1); + }); + }); + + describe("Subsequent queue processing 17 seconds after the visit (around 12 seconds after the second visit)", function() { + it("should yield relevant navigation batches to send", function() { + studyPayloadPreprocessor.processQueue(); + assert.equal(1, 1); + }); + }); + + describe("Subsequent queue processing 25 seconds after the visit (around 20 seconds after the second visit)", function() { + it("should yield relevant navigation batches to send", function() { + studyPayloadPreprocessor.processQueue(); + assert.equal(1, 1); + }); + }); + }); +}); diff --git a/feature.js/StudyPayloadPreprocessor.ts b/feature.js/StudyPayloadPreprocessor.ts new file mode 100644 index 0000000..f9702f1 --- /dev/null +++ b/feature.js/StudyPayloadPreprocessor.ts @@ -0,0 +1,157 @@ +import { + HttpRedirect, + HttpRequest, + HttpResponse, + JavascriptCookieRecord, + JavascriptOperation, + Navigation, + dateTimeUnicodeFormatString, +} from "openwpm-webext-instrumentation"; +import { CapturedContent, LogEntry } from "./dataReceiver"; +import { parse } from "date-fns"; + +declare namespace browser.alarms { + function create( + name: string, + alarmInfo: { + /** Time when the alarm is scheduled to first fire, in milliseconds past the epoch. */ + when?: number; + /** Number of minutes from the current time after which the alarm should first fire. */ + delayInMinutes?: number; + /** Number of minutes after which the alarm should recur repeatedly. */ + periodInMinutes?: number; + }, + ): void; + function clear(name: string): boolean; +} + +declare namespace browser.alarms.onAlarm { + function addListener(listener: any); + function removeListener(listener: any); +} + +declare namespace browser.runtime { + const id: any; +} + +declare namespace browser.privacyContext { + function aPrivateBrowserWindowIsOpen(): boolean; +} + +export interface NavigationBatch { + navigation: Navigation; + httpRequests: HttpRequest[]; + httpResponses: HttpResponse[]; + httpRedirects: HttpRedirect[]; + javascriptOperations: JavascriptOperation[]; + originalHttpRequestCount: number; + originalHttpResponseCount: number; + originalHttpRedirectCount: number; + originalJavascriptOperationCount: number; +} + +export type OpenWPMType = + | "navigations" + | "navigation_batches" + | "http_requests" + | "http_responses" + | "http_redirects" + | "javascript" + | "javascript_cookies" + | "openwpm_log" + | "openwpm_captured_content"; + +/** + * The basic packet structure and target for study analysis + */ +export interface StudyPayloadEnvelope { + type: OpenWPMType; + navigation?: Navigation; + navigationBatch?: NavigationBatch; + httpRequest?: HttpRequest; + httpResponse?: HttpResponse; + httpRedirect?: HttpRedirect; + javascriptOperation?: JavascriptOperation; + javascriptCookieRecord?: JavascriptCookieRecord; + logEntry?: LogEntry; + capturedContent?: CapturedContent; + tabActiveDwellTime?: number; +} + +/** + * Additional fiels are relevant at the study telemetry packet level + * since we drop the `payload` attribute if the calculatedPingSize + * exceeds a certain threshold + */ +export interface StudyTelemetryPacket extends StudyPayloadEnvelope { + calculatedPingSize: string; + calculatedPingSizeOverThreshold: number; +} + +export class StudyPayloadPreprocessor { + public studyPayloadEnvelopeQueue: StudyPayloadEnvelope[] = []; + public addToQueue(studyPayloadEnvelope: StudyPayloadEnvelope) { + this.studyPayloadEnvelopeQueue.push(studyPayloadEnvelope); + } + private alarmName: string; + + public async run() { + this.alarmName = `${browser.runtime.id}:queueProcessorAlarm`; + const alarmListener = async _alarm => { + if (await browser.privacyContext.aPrivateBrowserWindowIsOpen()) { + // do not process the batch queue right now (will attempt again at next alarm interval) + return; + } + this.processQueue(); + }; + browser.alarms.onAlarm.addListener(alarmListener); + browser.alarms.create(this.alarmName, { + periodInMinutes: 10 / 60, // every 10 seconds + }); + } + + public async cleanup() { + if (this.alarmName) { + await browser.alarms.clear(this.alarmName); + } + } + + /** + * Removes study payload envelopes from the queue, grouped by their presumed + * originating web navigations + * @param nowDateTime + */ + public async processQueue(nowDateTime: Date = new Date()) { + const webNavigationStudyPayloadEnvelopes = this.studyPayloadEnvelopeQueue.filter( + (studyPayloadEnvelope: StudyPayloadEnvelope) => { + return studyPayloadEnvelope.type === "navigations"; + }, + ); + + const webNavigationStudyPayloadEnvelopesToSubmit = webNavigationStudyPayloadEnvelopes.filter( + (studyPayloadEnvelope: StudyPayloadEnvelope) => { + const navigation = studyPayloadEnvelope.navigation; + const committedDateTime = parse( + navigation.committed_time_stamp, + dateTimeUnicodeFormatString, + new Date(), + ); + console.log( + "committedDateTime", + committedDateTime, + ); + return true; + }, + ); + + console.log( + "TODO processQueue", + this.studyPayloadEnvelopeQueue.length, + webNavigationStudyPayloadEnvelopes.length, + // this.studyPayloadEnvelopeQueue, + // webNavigationStudyPayloadEnvelopesToSubmit, + // JSON.stringify(this.studyPayloadEnvelopeQueue), + ); + + } +} diff --git a/feature.js/dataReceiver.ts b/feature.js/dataReceiver.ts index 62d44fb..2411ce7 100644 --- a/feature.js/dataReceiver.ts +++ b/feature.js/dataReceiver.ts @@ -1,11 +1,18 @@ -import { telemetrySender } from "./telemetrySender"; import { humanFileSize } from "./humanFileSize"; import { ActiveTabDwellTimeMonitor } from "./ActiveTabDwellTimeMonitor"; +import { StudyPayloadPreprocessor } from "./StudyPayloadPreprocessor"; +import { TelemetrySender } from "./TelemetrySender"; // Export active dwell time monitor singleton // (used to annotate received tab-relevant data packets) export const activeTabDwellTimeMonitor = new ActiveTabDwellTimeMonitor(); +// Setup study payload processor singleton +export const studyPayloadPreprocessor = new StudyPayloadPreprocessor(); + +// Setup telemetry sender singleton +const telemetrySender = new TelemetrySender(studyPayloadPreprocessor); + declare namespace browser.study { const logger: any; } @@ -44,7 +51,7 @@ export const logInfo = async function(msg) { const level = "info"; const logEntry: LogEntry = { level, msg }; await browser.study.logger.log(`OpenWPM INFO log message: ${msg}`); - await telemetrySender.submitOpenWPMPacketToTelemetry("openwpm_log", logEntry); + await telemetrySender.submitOpenWPMPayload("openwpm_log", logEntry); }; export const logWarn = async function(msg) { @@ -54,7 +61,7 @@ export const logWarn = async function(msg) { const level = "warn"; const logEntry: LogEntry = { level, msg }; await browser.study.logger.warn(`OpenWPM WARN log message: ${msg}`); - await telemetrySender.submitOpenWPMPacketToTelemetry("openwpm_log", logEntry); + await telemetrySender.submitOpenWPMPayload("openwpm_log", logEntry); }; export const logError = async function(msg) { @@ -64,7 +71,7 @@ export const logError = async function(msg) { const level = "error"; const logEntry: LogEntry = { level, msg }; await browser.study.logger.error(`OpenWPM ERROR log message: ${msg}`); - await telemetrySender.submitOpenWPMPacketToTelemetry("openwpm_log", logEntry); + await telemetrySender.submitOpenWPMPayload("openwpm_log", logEntry); }; export const logCritical = async function(msg) { @@ -74,7 +81,7 @@ export const logCritical = async function(msg) { const level = "critical"; const logEntry: LogEntry = { level, msg }; await browser.study.logger.error(`OpenWPM CRITICAL log message: ${msg}`); - await telemetrySender.submitOpenWPMPacketToTelemetry("openwpm_log", logEntry); + await telemetrySender.submitOpenWPMPayload("openwpm_log", logEntry); }; export const saveRecord = async function(instrument, record) { @@ -94,7 +101,7 @@ export const saveRecord = async function(instrument, record) { record.tab_id, ); } - await telemetrySender.submitOpenWPMPacketToTelemetry( + await telemetrySender.submitOpenWPMPayload( instrument, record, tabActiveDwellTime, @@ -114,7 +121,7 @@ export const saveContent = async function(content, contentHash) { content, contentHash, }; - await telemetrySender.submitOpenWPMPacketToTelemetry( + await telemetrySender.submitOpenWPMPayload( "openwpm_captured_content", capturedContent, ); diff --git a/feature.js/dateUtils.spec.ts b/feature.js/dateUtils.spec.ts new file mode 100644 index 0000000..66075c2 --- /dev/null +++ b/feature.js/dateUtils.spec.ts @@ -0,0 +1,12 @@ +import { assert } from "chai"; +import { parseIsoDateTimeString } from "./dateUtils"; + +describe("dateUtils", function() { + describe("parseIsoDateTimeString", function() { + it("ISO date time strings should be parsable", function() { + const dateTimeIsoString = "2018-11-22T23:13:05.622Z"; + const parsedDateTime = parseIsoDateTimeString(dateTimeIsoString); + assert.equal(parsedDateTime.toISOString(), dateTimeIsoString); + }); + }); +}); diff --git a/feature.js/dateUtils.ts b/feature.js/dateUtils.ts new file mode 100644 index 0000000..205e331 --- /dev/null +++ b/feature.js/dateUtils.ts @@ -0,0 +1,6 @@ +import { parse } from "date-fns"; +import { dateTimeUnicodeFormatString } from "openwpm-webext-instrumentation"; + +export const parseIsoDateTimeString = isoDateTimeString => { + return parse(isoDateTimeString, dateTimeUnicodeFormatString, new Date()); +}; diff --git a/feature.js/index.ts b/feature.js/index.ts index 12b1e8e..2fe98fc 100644 --- a/feature.js/index.ts +++ b/feature.js/index.ts @@ -83,6 +83,7 @@ class Feature { async startOpenWPMInstrumentation(config, isFirstRun) { dataReceiver.activeTabDwellTimeMonitor.run(); + dataReceiver.studyPayloadPreprocessor.run(); this.openwpmCrawlId = config["crawl_id"]; if (config["navigation_instrument"]) { this.navigationInstrument = new NavigationInstrument(dataReceiver); @@ -138,6 +139,9 @@ class Feature { if (dataReceiver.activeTabDwellTimeMonitor) { dataReceiver.activeTabDwellTimeMonitor.cleanup(); } + if (dataReceiver.studyPayloadPreprocessor) { + dataReceiver.studyPayloadPreprocessor.cleanup(); + } } } diff --git a/feature.js/telemetrySender.ts b/feature.js/telemetrySender.ts index f61a92c..c6b39a7 100644 --- a/feature.js/telemetrySender.ts +++ b/feature.js/telemetrySender.ts @@ -2,7 +2,9 @@ import { humanFileSize } from "./humanFileSize"; declare namespace browser.study { const logger: any; + function calculateTelemetryPingSize(payload: any); + function sendTelemetry(payload: any); } @@ -11,66 +13,88 @@ declare namespace browser.privacyContext { } import { - Navigation, - HttpRequest, - HttpResponse, - HttpRedirect, - JavascriptOperation, - JavascriptCookieRecord, -} from "openwpm-webext-instrumentation"; -import { CapturedContent, LogEntry } from "./dataReceiver"; + OpenWPMType, + StudyPayloadEnvelope, + StudyPayloadPreprocessor, + StudyTelemetryPacket, +} from "./studyPayloadPreprocessor"; -type OpenWPMType = - | "navigations" - | "http_requests" - | "http_responses" - | "http_redirects" - | "javascript" - | "javascript_cookies" - | "openwpm_log" - | "openwpm_captured_content"; -type OpenWPMPayload = - | Navigation - | HttpRequest - | HttpResponse - | HttpRedirect - | JavascriptOperation - | JavascriptCookieRecord - | LogEntry - | CapturedContent; - -interface StudyTelemetryPacket { - type: OpenWPMType; - payload: OpenWPMPayload; - calculatedPingSize: string; - calculatedPingSizeOverThreshold: number; - tabActiveDwellTime?: number; -} - -interface StringifiedStudyTelemetryPacket { +/** + * Shield utils schema requires all study telemetry packet + * attributes to be strings + */ +export interface StringifiedStudyTelemetryPacket { type?: string; - payload?: string; + navigation?: string; + navigationBatch?: string; + httpRequest?: string; + httpResponse?: string; + httpRedirect?: string; + javascriptOperation?: string; + javascriptCookieRecord?: string; + logEntry?: string; + capturedContent?: string; calculatedPingSize?: string; calculatedPingSizeOverThreshold?: string; tabActiveDwellTime?: string; } export class TelemetrySender { - async submitOpenWPMPacketToTelemetry( - type, - payload, + private studyPayloadPreprocessor: StudyPayloadPreprocessor; + + constructor(studyPayloadPreprocessor: StudyPayloadPreprocessor) { + this.studyPayloadPreprocessor = studyPayloadPreprocessor; + } + + async submitOpenWPMPayload( + type: OpenWPMType, + payload: any, tabActiveDwellTime: number = null, ) { if (await browser.privacyContext.aPrivateBrowserWindowIsOpen()) { // drop the ping - do not send any telemetry return; } - const studyTelemetryPacket: StudyTelemetryPacket = { + + const studyPayloadEnvelope: StudyPayloadEnvelope = { type, - payload, + navigation: type === "navigations" ? payload : undefined, + navigationBatch: type === "navigation_batches" ? payload : undefined, + httpRequest: type === "http_requests" ? payload : undefined, + httpResponse: type === "http_responses" ? payload : undefined, + httpRedirect: type === "http_redirects" ? payload : undefined, + javascriptOperation: type === "javascript" ? payload : undefined, + javascriptCookieRecord: + type === "javascript_cookies" ? payload : undefined, + logEntry: type === "openwpm_log" ? payload : undefined, + capturedContent: + type === "openwpm_captured_content" ? payload : undefined, + tabActiveDwellTime, + }; + + // Any http or javascript packet with tabId is sent for batching by corresponding navigation + // or dropped (if no corresponding navigation showed up) + if ( + [ + "navigations", + "http_requests", + "http_responses", + "http_redirects", + "javascript", + ].includes(type) && + payload.extension_session_uuid && + payload.window_id > -1 && + payload.tab_id > -1 && + payload.frame_id > -1 + ) { + this.studyPayloadPreprocessor.addToQueue(studyPayloadEnvelope); + return; + } + + const studyTelemetryPacket: StudyTelemetryPacket = { + ...studyPayloadEnvelope, calculatedPingSize: "0000000000", // Will be replaced below with the real (approximate) calculated ping size calculatedPingSizeOverThreshold: 0, - tabActiveDwellTime: tabActiveDwellTime, }; const stringifiedStudyTelemetryPacket: StringifiedStudyTelemetryPacket = this.stringifyPayload( studyTelemetryPacket, @@ -86,16 +110,28 @@ export class TelemetrySender { ): StringifiedStudyTelemetryPacket { return { type: JSON.stringify(studyTelemetryPacket.type), - payload: JSON.stringify(studyTelemetryPacket.payload), + navigation: JSON.stringify(studyTelemetryPacket.navigation), + navigationBatch: JSON.stringify(studyTelemetryPacket.navigationBatch), + httpRequest: JSON.stringify(studyTelemetryPacket.httpRequest), + httpResponse: JSON.stringify(studyTelemetryPacket.httpResponse), + httpRedirect: JSON.stringify(studyTelemetryPacket.httpRedirect), + javascriptOperation: JSON.stringify( + studyTelemetryPacket.javascriptOperation, + ), + javascriptCookieRecord: JSON.stringify( + studyTelemetryPacket.javascriptCookieRecord, + ), + logEntry: JSON.stringify(studyTelemetryPacket.logEntry), + capturedContent: JSON.stringify(studyTelemetryPacket.capturedContent), + tabActiveDwellTime: JSON.stringify( + studyTelemetryPacket.tabActiveDwellTime, + ), calculatedPingSize: JSON.stringify( studyTelemetryPacket.calculatedPingSize, ), calculatedPingSizeOverThreshold: JSON.stringify( studyTelemetryPacket.calculatedPingSizeOverThreshold, ), - tabActiveDwellTime: JSON.stringify( - studyTelemetryPacket.tabActiveDwellTime, - ), }; } @@ -113,7 +149,15 @@ export class TelemetrySender { } ping which is being submitted: ${humanFileSize(calculatedPingSize)}`; if (calculatedPingSize > 1024 * 500) { await browser.study.logger.log(logMessage); - delete stringifiedStudyTelemetryPacket.payload; + delete stringifiedStudyTelemetryPacket.navigation; + delete stringifiedStudyTelemetryPacket.navigationBatch; + delete stringifiedStudyTelemetryPacket.httpRequest; + delete stringifiedStudyTelemetryPacket.httpResponse; + delete stringifiedStudyTelemetryPacket.httpRedirect; + delete stringifiedStudyTelemetryPacket.javascriptOperation; + delete stringifiedStudyTelemetryPacket.javascriptCookieRecord; + delete stringifiedStudyTelemetryPacket.logEntry; + delete stringifiedStudyTelemetryPacket.capturedContent; stringifiedStudyTelemetryPacket.calculatedPingSizeOverThreshold = "1"; await browser.study.logger.log( "Calculated ping size over 500kb - OpenWPM payload dropped", @@ -132,6 +176,3 @@ export class TelemetrySender { ); } } - -// export a singleton -export const telemetrySender = new TelemetrySender(); diff --git a/karma.conf.js b/karma.conf.js index ffbebef..e2d55bd 100644 --- a/karma.conf.js +++ b/karma.conf.js @@ -40,6 +40,7 @@ module.exports = function(config) { "node_modules/sinon/pkg/sinon.js", "node_modules/sinon-chrome/bundle/sinon-chrome.min.js", "feature.js/index.ts", + "feature.js/*.spec.ts", "test/unit/*.spec.js", ], webpack: webpackConfig, diff --git a/package-lock.json b/package-lock.json index fed8952..99fa41e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -3675,6 +3675,12 @@ "integrity": "sha1-nfflL7Kgyw+4kFjugMMQQiXzfh0=", "dev": true }, + "date-fns": { + "version": "1.29.0", + "resolved": "https://registry.npmjs.org/date-fns/-/date-fns-1.29.0.tgz", + "integrity": "sha512-lbTXWZ6M20cWH8N9S6afb0SBm6tMk+uUg6z3MqHPKE9atmsY3kJkTm8vKe93izJ2B2+q5MV990sM2CHgtAZaOw==", + "dev": true + }, "load-json-file": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/load-json-file/-/load-json-file-4.0.0.tgz", @@ -4102,10 +4108,9 @@ "dev": true }, "date-fns": { - "version": "1.29.0", - "resolved": "https://registry.npmjs.org/date-fns/-/date-fns-1.29.0.tgz", - "integrity": "sha512-lbTXWZ6M20cWH8N9S6afb0SBm6tMk+uUg6z3MqHPKE9atmsY3kJkTm8vKe93izJ2B2+q5MV990sM2CHgtAZaOw==", - "dev": true + "version": "2.0.0-alpha.25", + "resolved": "https://registry.npmjs.org/date-fns/-/date-fns-2.0.0-alpha.25.tgz", + "integrity": "sha512-iQzJkHF0L4wah9Ae9PkvwemwFz6qmRLuNZcghmvf2t+ptLs1qXzONLiGtjmPQzL6+JpC01JjlTopY2AEy4NFAg==" }, "date-format": { "version": "1.2.0", @@ -10028,6 +10033,12 @@ "restore-cursor": "^1.0.1" } }, + "date-fns": { + "version": "1.29.0", + "resolved": "https://registry.npmjs.org/date-fns/-/date-fns-1.29.0.tgz", + "integrity": "sha512-lbTXWZ6M20cWH8N9S6afb0SBm6tMk+uUg6z3MqHPKE9atmsY3kJkTm8vKe93izJ2B2+q5MV990sM2CHgtAZaOw==", + "dev": true + }, "figures": { "version": "1.7.0", "resolved": "https://registry.npmjs.org/figures/-/figures-1.7.0.tgz", @@ -12008,8 +12019,8 @@ } }, "openwpm-webext-instrumentation": { - "version": "github:mozilla/openwpm-webext-instrumentation#5575d378b778af958be0220754b20ed7a0879c90", - "from": "github:mozilla/openwpm-webext-instrumentation#several-enhancements" + "version": "github:mozilla/openwpm-webext-instrumentation#778da7aeff9a0f60a6516e9a94481e1f305f2e9f", + "from": "github:mozilla/openwpm-webext-instrumentation#enhancements-batch-2" }, "opn": { "version": "5.3.0", diff --git a/package.json b/package.json index e081df6..b3c68b4 100644 --- a/package.json +++ b/package.json @@ -7,8 +7,9 @@ "url": "https://github.com/mozilla/shield-studies-addon-template/issues" }, "dependencies": { + "date-fns": "^2.0.0-alpha.25", "idb": "^2.0.4", - "openwpm-webext-instrumentation": "github:mozilla/openwpm-webext-instrumentation#several-enhancements", + "openwpm-webext-instrumentation": "github:mozilla/openwpm-webext-instrumentation#enhancements-batch-2", "shield-studies-addon-utils": "github:motin/shield-studies-addon-utils#issues-142-and-128-support-pioneer-pipeline" }, "devDependencies": { diff --git a/src/openwpmSetup.js b/src/openwpmSetup.js index 75c8c7d..416e18a 100644 --- a/src/openwpmSetup.js +++ b/src/openwpmSetup.js @@ -20,7 +20,7 @@ const baseOpenwpmConfig = { http_instrument: true, save_javascript: false, save_all_content: false, - crawl_id: "pioneer", + crawl_id: 0, }; /**