Merge branch 'master' into nhnt11-callstacks

This commit is contained in:
Stefan Zabka 2020-01-10 15:00:50 +01:00
Родитель b0b5bb8858 9686b84029
Коммит 82eced7a80
63 изменённых файлов: 307 добавлений и 267 удалений

Просмотреть файл

@ -34,17 +34,13 @@ RUN ./install-system.sh --no-flash
RUN mv firefox-bin /opt/firefox-bin
ENV FIREFOX_BINARY /opt/firefox-bin/firefox-bin
# Instead of running install-pip-and-packages.sh, the packages are installed
# manually using pip and pip3 so that python2 and python3 are supported in the
# final image.
RUN apt-get -y install python-pip python3-pip
# For some reasons, python3-publicsuffix doesn't work with pip3 at the moment,
# so install it from the ubuntu repository
RUN apt-get -y install python3-publicsuffix
COPY requirements.txt .
RUN pip3 install -U -r requirements.txt
COPY install-pip-and-packages.sh .
RUN ./install-pip-and-packages.sh
COPY --from=extension /usr/src/app/dist/openwpm-*.zip automation/Extension/firefox/openwpm.xpi
@ -59,5 +55,5 @@ COPY . .
# possible to run everything as root as well.
RUN adduser --disabled-password --gecos "OpenWPM" openwpm
# Alternatively, python3 could be used here
# Setting demo.py as the default command
CMD python3 demo.py

Просмотреть файл

@ -7,10 +7,41 @@ of websites. OpenWPM is built on top of Firefox, with automation provided
by Selenium. It includes several hooks for data collection. Check out
the instrumentation section below for more details.
Table of Contents
-----------------
* [Installation](#installation)
* [Quick Start](#quick-start)
* [Instrumentation and Data Access](#instrumentation-and-data-access)
* [Output Formats](#output-format)
* [Local Databases](#local-databases)
* [Parquet on Amazon S3 (Experimental)](#parquet-on-amazon-s3-experimental)
* [Browser and Platform Configuration](#browser-and-platform-configuration)
* [Browser Configuration Options](#platform-configuration-options)
* [Browser Profile Support](#browser-profile-support)
* [Stateful vs Stateless crawls](#stateful-vs-stateless-crawls)
* [Loading and saving a browser profile](#loading-and-saving-a-browser-profile)
* [Development pointers](#development-pointers)
* [Editing instrumentation](#editing-instrumentation)
* [Debugging the platform](#debugging-the-platform)
* [Managing requirements](#managing-requirements)
* [Running tests](#running-tests)
* [Mac OSX (Limited support for developers)](#mac-osx-limited-support-for-developers)
* [Troubleshooting](#troubleshooting)
* [Docker Deployment for OpenWPM](#docker-deployment-for-openwpm)
* [Building the Docker Container](#building-the-docker-container)
* [Running Measurements from inside the Container](#running-measurements-from-inside-the-container)
* [MacOS GUI applications in Docker](#macos-gui-applications-in-docker)
* [Disclaimer](#disclaimer)
* [Citation](#citation)
* [License](#license)
Installation
------------
OpenWPM has been developed and tested on Ubuntu 14.04/16.04. An installation
OpenWPM is a Python 3 application developed and tested for Ubuntu 18.04.
Python 2 is not supported. An installation
script, `install.sh` is included to install both the system and python
dependencies automatically. A few of the python dependencies require specific
versions, so you should install the dependencies in a virtual environment if
@ -419,8 +450,8 @@ OpenWPM should be placed in the former, while those only required to run the
tests (or perform other development tasks) should be placed in the latter.
To update dependencies, run the following two commands **in order**:
* `pip-compile --upgrade requirements.txt`
* `pip-compile --upgrade requirements-dev.txt`
* `pip-compile --upgrade requirements.in`
* `pip-compile --upgrade requirements-dev.in`
It's important that these are run in order, as we layer the dev
dependencies on the output of the pinned production dependencies as per

Просмотреть файл

@ -1,21 +1,19 @@
from __future__ import absolute_import
import errno
import logging
import os
import pickle
import shutil
import signal
import sys
import threading
import time
import traceback
from queue import Empty as EmptyQueue
import psutil
from multiprocess import Queue
from selenium.common.exceptions import WebDriverException
from six import reraise
from six.moves import cPickle as pickle
from six.moves.queue import Empty as EmptyQueue
from tblib import pickling_support
from .Commands import command_executor
@ -126,7 +124,7 @@ class Browser:
launch_status[result[1]] = True
return result[2]
elif result[0] == 'CRITICAL':
reraise(*pickle.loads(result[1]))
raise pickle.loads(result[1])
elif result[0] == 'FAILED':
raise BrowserCrashError(
'Browser spawn returned failure status')

Просмотреть файл

@ -1,4 +1,3 @@
from __future__ import absolute_import
from .Errors import CommandExecutionError
@ -23,7 +22,8 @@ class CommandSequence:
called prior to that.
"""
def __init__(self, url, reset=False, blocking=False, retry_number=None):
def __init__(self, url, reset=False,
blocking=False, retry_number=None, site_rank=None):
"""Initialize command sequence.
Parameters
@ -37,6 +37,9 @@ class CommandSequence:
retry_number : int, optional
Integer denoting the number of attempts that have been made to
execute this command. Will be saved in `crawl_history`.
site_rank : int, optional
Integer indicating the ranking of the page to visit, saved
to `site_visits`
"""
self.url = url
self.reset = reset
@ -45,6 +48,7 @@ class CommandSequence:
self.commands_with_timeout = []
self.total_timeout = 0
self.contains_get_or_browse = False
self.site_rank = site_rank
def get(self, sleep=0, timeout=60):
""" goes to a url """

Просмотреть файл

@ -1,4 +1,3 @@
from __future__ import absolute_import
import gzip
import json
@ -17,7 +16,6 @@ from selenium.common.exceptions import (MoveTargetOutOfBoundsException,
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from six.moves import range
from ..SocketInterface import clientsocket
from .utils.lso import get_flash_cookies
@ -352,9 +350,8 @@ def recursive_dump_page_source(visit_id, driver, manager_params, suffix=''):
page_source = dict()
page_source['doc_url'] = doc_url
source = driver.page_source
import six
if type(source) != six.text_type:
source = six.text_type(source, 'utf-8')
if type(source) != str:
source = str(source, 'utf-8')
page_source['source'] = source
page_source['iframes'] = dict()

Просмотреть файл

@ -1,4 +1,3 @@
from __future__ import absolute_import
from . import browser_commands, profile_commands

Просмотреть файл

@ -1,12 +1,10 @@
from __future__ import absolute_import
import logging
import os
import pickle
import shutil
import tarfile
from six.moves import cPickle as pickle
from ..Errors import ProfileLoadError
from .utils.file_utils import rmsubtree
from .utils.firefox_profile import sleep_until_sqlite_checkpoint

Просмотреть файл

@ -4,13 +4,11 @@
#
# Steven Englehardt (github.com/englehardt)
from __future__ import absolute_import, print_function
import re
import bs4
from bs4 import BeautifulSoup as bs
from six.moves import range
def is_clickable(xpath):
@ -150,7 +148,7 @@ def xp1_wildcard(attr, string, normalize=True):
def main():
# Output some sample XPaths
print("--- Sample XPaths ---")
from six.moves.urllib.request import urlopen
from urllib.request import urlopen
import re
from random import choice
rsp = urlopen('http://www.reddit.com/')

Просмотреть файл

@ -1,5 +1,4 @@
# A collection of file utilities
from __future__ import absolute_import
import os
import shutil

Просмотреть файл

@ -1,6 +1,5 @@
# This is code adapted from KU Leuven crawler code written by
# Gunes Acar and Marc Juarez
from __future__ import absolute_import, print_function
import os
import time

Просмотреть файл

@ -1,7 +1,6 @@
# This is code adapted from KU Leuven crawler code written by
# Gunes Acar and Marc Juarez
from __future__ import absolute_import, print_function
import fnmatch
import os
@ -14,11 +13,10 @@ from miniamf import sol
def ensure_unicode(val):
"""Coerce VAL to a Unicode string by any means necessary."""
import six
if isinstance(val, six.text_type):
if isinstance(val, str):
return val
if not isinstance(val, six.binary_type):
return six.text_type(val)
if not isinstance(val, bytes):
return str(val)
try:
return val.decode("utf-8", "backslashescape")
except (UnicodeDecodeError, TypeError):
@ -52,9 +50,8 @@ class FlashCookie(_BaseFlashCookie):
def parse_flash_cookies(lso_file):
import six
lso_dict = sol.load(lso_file)
return [FlashCookie(lso_file, k, v) for k, v in six.iteritems(lso_dict)]
return [FlashCookie(lso_file, k, v) for k, v in iter(lso_dict.items())]
def gen_find_files(filepat, top):

Просмотреть файл

@ -1,11 +1,11 @@
# A set of extensions to the functions normally provided by the selenium
# webdriver. These are primarily for parsing and searching.
from __future__ import absolute_import
import random
import re
import time
from urllib import parse as urlparse
from selenium.common.exceptions import (ElementNotVisibleException,
NoSuchElementException,
@ -14,7 +14,6 @@ from selenium.common.exceptions import (ElementNotVisibleException,
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from six.moves.urllib import parse as urlparse
from ...utilities import domain_utils as du
from . import XPathUtil

Просмотреть файл

@ -1,10 +1,10 @@
import abc
import logging
import queue
import threading
import time
from multiprocess import Queue
from six.moves import queue
from ..SocketInterface import serversocket
from ..utilities.multiprocess_utils import Process
@ -19,10 +19,11 @@ STATUS_UPDATE_INTERVAL = 5 # seconds
class BaseListener(object):
"""Base class for the data aggregator listener process. This class is used
alongside the BaseAggregator class to spawn an aggregator process that
combines data collected in multiple crawl processes and write it to disk as
specified in the child class. The BaseListener class is instantiated in the
remote process, and sets up a listening socket to receive data. Classes
which inherit from this base class define how that data is written to disk.
combines data collected in multiple crawl processes and stores it
persistently as specified in the child class. The BaseListener class
is instantiated in the remote process, and sets up a listening socket to
receive data. Classes which inherit from this base class define
how that data is written to disk.
Parameters
----------

Просмотреть файл

@ -1,4 +1,3 @@
from __future__ import absolute_import, print_function
import base64
import json
@ -9,8 +8,6 @@ from sqlite3 import (IntegrityError, InterfaceError, OperationalError,
ProgrammingError)
import plyvel
import six
from six.moves import range
from .BaseAggregator import RECORD_TYPE_CONTENT, BaseAggregator, BaseListener
@ -102,10 +99,10 @@ class LocalListener(BaseListener):
statement, args = self._generate_insert(
table=record[0], data=record[1])
for i in range(len(args)):
if isinstance(args[i], six.binary_type):
args[i] = six.text_type(args[i], errors='ignore')
if isinstance(args[i], bytes):
args[i] = str(args[i], errors='ignore')
elif callable(args[i]):
args[i] = six.text_type(args[i])
args[i] = str(args[i])
elif type(args[i]) == dict:
print(args[i])
args[i] = json.dumps(args[i])

Просмотреть файл

@ -1,11 +1,12 @@
from __future__ import absolute_import, print_function
import base64
import gzip
import hashlib
import io
import json
import queue
import random
import time
import uuid
from collections import defaultdict
import boto3
@ -13,11 +14,9 @@ import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import s3fs
import six
from botocore.client import Config
from botocore.exceptions import ClientError, EndpointConnectionError
from pyarrow.filesystem import S3FSWrapper # noqa
from six.moves import queue
from .BaseAggregator import RECORD_TYPE_CONTENT, BaseAggregator, BaseListener
from .parquet_schema import PQ_SCHEMAS
@ -174,15 +173,15 @@ class S3Listener(BaseListener):
self.logger.debug(
"File `%s` already exists on s3, skipping..." % filename)
return
if not isinstance(string, six.binary_type):
if not isinstance(string, bytes):
string = string.encode('utf-8')
if compressed:
out_f = six.BytesIO()
out_f = io.BytesIO()
with gzip.GzipFile(fileobj=out_f, mode='w') as writer:
writer.write(string)
out_f.seek(0)
else:
out_f = six.BytesIO(string)
out_f = io.BytesIO(string)
# Upload to S3
try:
@ -206,7 +205,7 @@ class S3Listener(BaseListener):
continue
if table_name == SITE_VISITS_INDEX:
out_str = '\n'.join([json.dumps(x) for x in batches])
if not isinstance(out_str, six.binary_type):
if not isinstance(out_str, bytes):
out_str = out_str.encode('utf-8')
fname = '%s/site_index/instance-%s-%s.json.gz' % (
self.dir, self._instance_id,
@ -287,10 +286,10 @@ class S3Listener(BaseListener):
# Convert data to text type
for k, v in data.items():
if isinstance(v, six.binary_type):
data[k] = six.text_type(v, errors='ignore')
if isinstance(v, bytes):
data[k] = str(v, errors='ignore')
elif callable(v):
data[k] = six.text_type(v)
data[k] = str(v)
# TODO: Can we fix this in the extension?
elif type(v) == dict:
data[k] = json.dumps(v)
@ -343,7 +342,7 @@ class S3Aggregator(BaseAggregator):
self.dir = manager_params['s3_directory']
self.bucket = manager_params['s3_bucket']
self.s3 = boto3.client('s3')
self._instance_id = (uuid.uuid4().int & (1 << 32) - 1) - 2**31
self._instance_id = random.getrandbits(32)
self._create_bucket()
def _create_bucket(self):
@ -368,13 +367,13 @@ class S3Aggregator(BaseAggregator):
# Config parameters for update
out = dict()
out['manager_params'] = self.manager_params
out['openwpm_version'] = six.text_type(openwpm_version)
out['browser_version'] = six.text_type(browser_version)
out['openwpm_version'] = str(openwpm_version)
out['browser_version'] = str(browser_version)
out['browser_params'] = self.browser_params
out_str = json.dumps(out)
if not isinstance(out_str, six.binary_type):
if not isinstance(out_str, bytes):
out_str = out_str.encode('utf-8')
out_f = six.BytesIO(out_str)
out_f = io.BytesIO(out_str)
# Upload to S3 and delete local copy
try:
@ -384,22 +383,22 @@ class S3Aggregator(BaseAggregator):
raise
def get_next_visit_id(self):
"""Generate visit id as randomly generated 53bit UUIDs.
"""Generate visit id as randomly generated positive integer less than 2^53.
Parquet can support integers up to 64 bits, but Javascript can only
represent integers up to 53 bits:
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/MAX_SAFE_INTEGER
Thus, we cap these values at 53 bits.
"""
return (uuid.uuid4().int & (1 << 53) - 1) - 2**52
return random.getrandbits(53)
def get_next_crawl_id(self):
"""Generate crawl id as randomly generated 32bit UUIDs
"""Generate crawl id as randomly generated positive 32bit integer
Note: Parquet's partitioned dataset reader only supports integer
partition columns up to 32 bits.
"""
return (uuid.uuid4().int & (1 << 32) - 1) - 2**31
return random.getrandbits(32)
def launch(self):
"""Launch the aggregator listener process"""

Просмотреть файл

@ -5,17 +5,18 @@ PQ_SCHEMAS = dict()
# site_visits
fields = [
pa.field('visit_id', pa.int64(), nullable=False),
pa.field('crawl_id', pa.int32(), nullable=False),
pa.field('instance_id', pa.int32(), nullable=False),
pa.field('site_url', pa.string(), nullable=False)
pa.field('crawl_id', pa.uint32(), nullable=False),
pa.field('instance_id', pa.uint32(), nullable=False),
pa.field('site_url', pa.string(), nullable=False),
pa.field('site_rank', pa.uint32())
]
PQ_SCHEMAS['site_visits'] = pa.schema(fields)
# flash_cookies
fields = [
pa.field('crawl_id', pa.int32(), nullable=False),
pa.field('crawl_id', pa.uint32(), nullable=False),
pa.field('visit_id', pa.int64(), nullable=False),
pa.field('instance_id', pa.int32(), nullable=False),
pa.field('instance_id', pa.uint32(), nullable=False),
pa.field('domain', pa.string()),
pa.field('filename', pa.string()),
pa.field('local_path', pa.string()),
@ -26,9 +27,9 @@ PQ_SCHEMAS['flash_cookies'] = pa.schema(fields)
# crawl_history
fields = [
pa.field('crawl_id', pa.int32(), nullable=False),
pa.field('crawl_id', pa.uint32(), nullable=False),
pa.field('visit_id', pa.int64(), nullable=False),
pa.field('instance_id', pa.int32(), nullable=False),
pa.field('instance_id', pa.uint32(), nullable=False),
pa.field('command', pa.string()),
pa.field('arguments', pa.string()),
pa.field('retry_number', pa.int8()),
@ -41,9 +42,9 @@ PQ_SCHEMAS['crawl_history'] = pa.schema(fields)
# http_requests
fields = [
pa.field('incognito', pa.int32()),
pa.field('crawl_id', pa.int32()),
pa.field('crawl_id', pa.uint32()),
pa.field('visit_id', pa.int64()),
pa.field('instance_id', pa.int32(), nullable=False),
pa.field('instance_id', pa.uint32(), nullable=False),
pa.field('extension_session_uuid', pa.string()),
pa.field('event_ordinal', pa.int64()),
pa.field('window_id', pa.int64()),
@ -76,9 +77,9 @@ PQ_SCHEMAS['http_requests'] = pa.schema(fields)
# http_responses
fields = [
pa.field('incognito', pa.int32()),
pa.field('crawl_id', pa.int32()),
pa.field('crawl_id', pa.uint32()),
pa.field('visit_id', pa.int64()),
pa.field('instance_id', pa.int32(), nullable=False),
pa.field('instance_id', pa.uint32(), nullable=False),
pa.field('extension_session_uuid', pa.string()),
pa.field('event_ordinal', pa.int64()),
pa.field('window_id', pa.int64()),
@ -100,9 +101,9 @@ PQ_SCHEMAS['http_responses'] = pa.schema(fields)
# http_redirects
fields = [
pa.field('incognito', pa.int32()),
pa.field('crawl_id', pa.int32()),
pa.field('crawl_id', pa.uint32()),
pa.field('visit_id', pa.int64()),
pa.field('instance_id', pa.int32(), nullable=False),
pa.field('instance_id', pa.uint32(), nullable=False),
pa.field('old_request_url', pa.string()),
pa.field('old_request_id', pa.string()),
pa.field('new_request_url', pa.string()),
@ -121,9 +122,9 @@ PQ_SCHEMAS['http_redirects'] = pa.schema(fields)
# javascript
fields = [
pa.field('incognito', pa.int32()),
pa.field('crawl_id', pa.int32()),
pa.field('crawl_id', pa.uint32()),
pa.field('visit_id', pa.int64()),
pa.field('instance_id', pa.int32(), nullable=False),
pa.field('instance_id', pa.uint32(), nullable=False),
pa.field('extension_session_uuid', pa.string()),
pa.field('event_ordinal', pa.int64()),
pa.field('page_scoped_event_ordinal', pa.int64()),
@ -148,9 +149,9 @@ PQ_SCHEMAS['javascript'] = pa.schema(fields)
# javascript_cookies
fields = [
pa.field('crawl_id', pa.int32()),
pa.field('crawl_id', pa.uint32()),
pa.field('visit_id', pa.int64()),
pa.field('instance_id', pa.int32(), nullable=False),
pa.field('instance_id', pa.uint32(), nullable=False),
pa.field('extension_session_uuid', pa.string()),
pa.field('event_ordinal', pa.int64()),
pa.field('record_type', pa.string()),
@ -174,9 +175,9 @@ PQ_SCHEMAS['javascript_cookies'] = pa.schema(fields)
# navigations
fields = [
pa.field('incognito', pa.int32()),
pa.field('crawl_id', pa.int32()),
pa.field('crawl_id', pa.uint32()),
pa.field('visit_id', pa.int64()),
pa.field('instance_id', pa.int32(), nullable=False),
pa.field('instance_id', pa.uint32(), nullable=False),
pa.field('extension_session_uuid', pa.string()),
pa.field('process_id', pa.int64()),
pa.field('window_id', pa.int64()),

Просмотреть файл

@ -24,6 +24,7 @@ CREATE TABLE IF NOT EXISTS site_visits (
visit_id INTEGER PRIMARY KEY,
crawl_id INTEGER NOT NULL,
site_url VARCHAR(500) NOT NULL,
site_rank INTEGER,
FOREIGN KEY(crawl_id) REFERENCES crawl(id));
/*

Просмотреть файл

@ -1,6 +1,5 @@
""" Set prefs and load extensions in Firefox """
from __future__ import absolute_import, print_function
import os
import shutil

Просмотреть файл

@ -1,4 +1,3 @@
from __future__ import absolute_import
from ..Errors import BrowserConfigError
from . import deploy_firefox

Просмотреть файл

@ -1,11 +1,9 @@
from __future__ import absolute_import
import json
import logging
import os.path
import random
import six
from selenium import webdriver
from ..Commands.profile_commands import load_profile
@ -120,7 +118,7 @@ def deploy_firefox(status_queue, browser_params, manager_params,
fo.add_argument('--height={}'.format(DEFAULT_SCREEN_RES[1]))
if browser_params['save_content']:
if isinstance(browser_params['save_content'], six.string_types):
if isinstance(browser_params['save_content'], str):
configured_types = set(browser_params['save_content'].split(','))
if not configured_types.issubset(ALL_RESOURCE_TYPES):
diff = configured_types.difference(ALL_RESOURCE_TYPES)

Просмотреть файл

@ -2,7 +2,6 @@
Workarounds for Selenium headaches.
"""
from __future__ import absolute_import
import errno
import json
@ -20,7 +19,6 @@ from selenium.webdriver.firefox.firefox_profile import AddonFormatError
from selenium.webdriver.firefox.firefox_profile import \
FirefoxProfile as BaseFirefoxProfile
from selenium.webdriver.firefox.options import Options
from six.moves import range
__all__ = ['FirefoxBinary', 'FirefoxProfile', 'FirefoxLogInterceptor',
'Options']

Просмотреть файл

@ -0,0 +1 @@
registry=https://registry.npmjs.org/

Просмотреть файл

@ -8,19 +8,19 @@
},
"devDependencies": {
"eslint": "^5.16.0",
"eslint-plugin-import": "^2.17.3",
"eslint-plugin-import": "^2.19.1",
"eslint-plugin-json": "^1.4.0",
"eslint-plugin-mozilla": "^0.14.0",
"eslint-plugin-no-unsanitized": "^3.0.2",
"npm-run-all": "^4.1.1",
"ts-loader": "^5.4.5",
"tslint": "^5.17.0",
"tslint": "^5.20.1",
"tslint-eslint-rules": "^5.4.0",
"typescript": "^3.5.1",
"typescript": "^3.7.3",
"typescript-eslint-parser": "^20.0.0",
"web-ext": "^3.1.1",
"webpack": "^4.33.0",
"webpack-cli": "^3.3.4"
"web-ext": "^3.2.1",
"webpack": "^4.41.3",
"webpack-cli": "^3.3.10"
},
"engines": {
"node": ">=8.11.1"

Просмотреть файл

@ -0,0 +1 @@
registry=https://registry.npmjs.org/

Просмотреть файл

@ -62,22 +62,22 @@
"devDependencies": {
"@types/firefox-webext-browser": "^63.0.0",
"ava": "1.0.0-beta.7",
"codecov": "^3.1.0",
"codecov": "^3.6.1",
"commitizen": "^4.0.3",
"cz-conventional-changelog": "^2.1.0",
"gh-pages": "^2.0.0",
"gh-pages": "^2.1.1",
"npm-run-all": "^4.1.5",
"nyc": "^14.1.1",
"opn-cli": "^3.1.0",
"prettier": "^1.14.3",
"prettier": "^1.19.1",
"publish-please": "^5.5.1",
"standard-version": "github:conventional-changelog/standard-version#master",
"trash-cli": "^1.4.0",
"tslint": "^5.11.0",
"tslint": "^5.20.1",
"tslint-config-prettier": "^1.15.0",
"tslint-immutable": "^4.7.0",
"typedoc": "^0.15.0",
"typescript": "^3.0.3"
"typedoc": "^0.15.5",
"typescript": "^3.7.3"
},
"ava": {
"failFast": true,

Просмотреть файл

@ -589,9 +589,13 @@ export function jsInstruments(event_id, sendMessagesToLogger) {
// Store original descriptor in closure
const propDesc = Object.getPropertyDescriptor(object, propertyName);
// Property descriptor must exist unless we are instrumenting a
// non-existing property
if (
!propDesc &&
logSettings.nonExistingPropertiesToInstrument.indexOf(propertyName) == -1
(!logSettings.nonExistingPropertiesToInstrument ||
logSettings.nonExistingPropertiesToInstrument.indexOf(propertyName) ==
-1)
) {
console.error(
"Property descriptor not found for",

Просмотреть файл

@ -1,4 +1,3 @@
from __future__ import absolute_import, print_function
import copy
import json
@ -10,13 +9,12 @@ import struct
import sys
import threading
import time
from queue import Empty as EmptyQueue
import dill
import sentry_sdk
import six
from multiprocess import JoinableQueue
from sentry_sdk.integrations.logging import BreadcrumbHandler, EventHandler
from six.moves.queue import Empty as EmptyQueue
from tblib import pickling_support
from .Commands.utils.webdriver_utils import parse_neterror
@ -111,6 +109,7 @@ class MPLogger(object):
# Configure log handlers
self._status_queue = JoinableQueue()
self._log_file = os.path.expanduser(log_file)
self._initialize_loggers()
# Configure sentry (if available)
@ -266,9 +265,9 @@ class MPLogger(object):
and those sent to Sentry.
"""
if obj['exc_info']:
obj['exc_info'] = dill.loads(six.ensure_str(obj['exc_info']))
obj['exc_info'] = dill.loads(obj['exc_info'])
if obj['args']:
obj['args'] = dill.loads(six.ensure_str(obj['args']))
obj['args'] = dill.loads(obj['args'])
record = logging.makeLogRecord(obj)
self._file_handler.emit(record)
if self._sentry_dsn:

Просмотреть файл

@ -1,19 +1,11 @@
from __future__ import absolute_import, print_function
import json
import socket
import struct
import threading
import traceback
from queue import Queue
import dill
import six
from six.moves import input
from six.moves.queue import Queue
if six.PY2:
class ConnectionAbortedError(Exception):
pass
# TODO - Implement a cleaner shutdown for server socket
# see: https://stackoverflow.com/a/1148237
@ -142,10 +134,9 @@ class clientsocket:
using dill if not string, and prepends msg len (4-bytes) and
serialization type (1-byte).
"""
import six
if isinstance(msg, six.binary_type):
if isinstance(msg, bytes):
serialization = b'n'
elif isinstance(msg, six.text_type):
elif isinstance(msg, str):
serialization = b'u'
msg = msg.encode('utf-8')
elif self.serialization == 'dill':

Просмотреть файл

@ -1,19 +1,16 @@
from __future__ import absolute_import, division
import copy
import json
import logging
import os
import pickle
import threading
import time
import traceback
from queue import Empty as EmptyQueue
import psutil
import tblib
from six import reraise
from six.moves import cPickle as pickle
from six.moves import range
from six.moves.queue import Empty as EmptyQueue
from . import CommandSequence, MPLogger
from .BrowserManager import Browser
@ -301,7 +298,7 @@ class TaskManager:
"failure limit.", self.failure_status['CommandSequence']
)
if self.failure_status['ErrorType'] == 'CriticalChildException':
reraise(*pickle.loads(self.failure_status['Exception']))
raise pickle.loads(self.failure_status['Exception'])
# CRAWLER COMMAND CODE
@ -401,7 +398,8 @@ class TaskManager:
self.sock.send(("site_visits", {
"visit_id": browser.curr_visit_id,
"crawl_id": browser.crawl_id,
"site_url": command_sequence.url
"site_url": command_sequence.url,
"site_rank": command_sequence.site_rank
}))
# Start command execution thread
@ -469,6 +467,7 @@ class TaskManager:
if status == "OK":
command_status = 'ok'
elif status[0] == "CRITICAL":
command_status = 'critical'
self.logger.critical(
"BROWSER %i: Received critical error from browser "
"process while executing command %s. Setting failure "

Просмотреть файл

@ -210,15 +210,12 @@ Finis.
# Import our required modules
#
from __future__ import absolute_import
import re
#
import string
import warnings
from six.moves import map, range
from six.moves.cPickle import dumps, loads
from pickle import dumps, loads
__all__ = ["CookieError", "BaseCookie", "SimpleCookie", "SerialCookie",
"SmartCookie", "Cookie"]

Просмотреть файл

@ -1,12 +1,11 @@
from __future__ import absolute_import, print_function
import json
import os
import sqlite3
import time
from urllib.parse import urlparse
from netlib.odict import ODictCaseless
from six.moves.urllib.parse import urlparse
# This should be the modified Cookie.py included
# the standard lib Cookie.py has many bugs
@ -24,14 +23,13 @@ def encode_to_unicode(string):
Encode from UTF-8/ISO-8859-1 to Unicode.
Ignore errors if both of these don't work
"""
import six
try:
encoded = six.text_type(string, 'UTF-8')
encoded = str(string, 'UTF-8')
except UnicodeDecodeError:
try:
encoded = six.text_type(string, 'ISO-8859-1')
encoded = str(string, 'ISO-8859-1')
except UnicodeDecodeError:
encoded = six.text_type(string, 'UTF-8', errors='ignore')
encoded = str(string, 'UTF-8', errors='ignore')
return encoded
@ -122,9 +120,8 @@ def parse_cookies(cookie_string, verbose, url=None, response_cookie=False):
"""
queries = list()
attrs = ()
import six
try:
if type(cookie_string) == six.text_type:
if type(cookie_string) == str:
cookie_string = cookie_string.encode('utf-8')
cookie = Cookie.BaseCookie(cookie_string)
for key in cookie.keys():

Просмотреть файл

@ -1,4 +1,3 @@
from __future__ import absolute_import
import os
import sqlite3

Просмотреть файл

@ -1,14 +1,12 @@
from __future__ import absolute_import, print_function
import codecs
import os
import tempfile
from functools import wraps
from ipaddress import ip_address
from urllib.parse import urlparse
from publicsuffix import PublicSuffixList, fetch
from six.moves import range
from six.moves.urllib.parse import urlparse
# We cache the Public Suffix List in temp directory
PSL_CACHE_LOC = os.path.join(tempfile.gettempdir(), 'public_suffix_list.dat')
@ -45,9 +43,8 @@ def is_ip_address(hostname):
"""
Check if the given string is a valid IP address
"""
import six
try:
ip_address(six.text_type(hostname))
ip_address(str(hostname))
return True
except ValueError:
return False

Просмотреть файл

@ -1,4 +1,3 @@
from __future__ import absolute_import, print_function
import json
import os
@ -89,13 +88,11 @@ def get_version():
openwpm = f.readline().strip()
firefox_binary_path = get_firefox_binary_path()
import six
try:
firefox = subprocess.check_output([firefox_binary_path, "--version"])
except subprocess.CalledProcessError as e:
six.raise_from(
RuntimeError("Firefox not found. Did you run `./install.sh`?"),
e)
raise RuntimeError("Firefox not found. "
" Did you run `./install.sh`?") from e
ff = firefox.split()[-1]
return openwpm, ff

Просмотреть файл

@ -1,11 +1,10 @@
from __future__ import absolute_import
import json
import os
import time
import boto3
import sentry_sdk
from six.moves import range
from automation import CommandSequence, MPLogger, TaskManager
from automation.utilities import rediswq
@ -22,6 +21,7 @@ NAVIGATION_INSTRUMENT = os.getenv('NAVIGATION_INSTRUMENT', '1') == '1'
JS_INSTRUMENT = os.getenv('JS_INSTRUMENT', '1') == '1'
JS_INSTRUMENT_MODULES = os.getenv('JS_INSTRUMENT_MODULES', None)
SAVE_CONTENT = os.getenv('SAVE_CONTENT', '')
PREFS = os.getenv('PREFS', None)
DWELL_TIME = int(os.getenv('DWELL_TIME', '10'))
TIMEOUT = int(os.getenv('TIMEOUT', '60'))
SENTRY_DSN = os.getenv('SENTRY_DSN', None)
@ -49,6 +49,8 @@ for i in range(NUM_BROWSERS):
browser_params[i]['save_content'] = False
else:
browser_params[i]['save_content'] = SAVE_CONTENT
if PREFS:
browser_params[i]['prefs'] = json.loads(PREFS)
browser_params[i]['headless'] = True
# Manager configuration
@ -89,6 +91,7 @@ if SENTRY_DSN:
scope.set_tag('CRAWL_REFERENCE', '%s/%s' %
(S3_BUCKET, CRAWL_DIRECTORY))
# context adds addition information that may be of interest
scope.set_context("PREFS", PREFS)
scope.set_context("crawl_config", {
'REDIS_QUEUE_NAME': REDIS_QUEUE_NAME,
})

Просмотреть файл

@ -1,6 +1,3 @@
from __future__ import absolute_import
from six.moves import range
from automation import CommandSequence, TaskManager

Просмотреть файл

@ -2,4 +2,4 @@
set -e
# Dependencies for OpenWPM development -- NOT needed to run the platform.
pip install --user -U -r requirements-dev.txt
pip3 install --user -U -r requirements-dev.txt

Просмотреть файл

@ -31,8 +31,8 @@ brew install node || true
# Use the Unbranded build that corresponds to a specific Firefox version (source: https://wiki.mozilla.org/Add-ons/Extension_Signing#Unbranded_Builds)
brew install wget || true
UNBRANDED_FF70_RELEASE_MAC_BUILD="https://queue.taskcluster.net/v1/task/Kclp8treRdORNcAmrNLgJg/runs/0/artifacts/public/build/target.dmg"
wget "$UNBRANDED_FF70_RELEASE_MAC_BUILD"
UNBRANDED_FF71_RELEASE_MAC_BUILD="https://firefox-ci-tc.services.mozilla.com/api/queue/v1/task/LQgnuH1-R8a31vCSFufr2g/runs/0/artifacts/public/build/target.dmg"
wget "$UNBRANDED_FF71_RELEASE_MAC_BUILD"
# Install Firefox Nightly
rm -rf Nightly.app || true
hdiutil attach -nobrowse -mountpoint /Volumes/firefox-tmp target.dmg
@ -42,7 +42,7 @@ rm target.dmg
# Selenium 3.3+ requires a 'geckodriver' helper executable, which is not yet
# packaged.
GECKODRIVER_VERSION=0.24.0
GECKODRIVER_VERSION=0.26.0
GECKODRIVER_ARCH=macos
wget https://github.com/mozilla/geckodriver/releases/download/v${GECKODRIVER_VERSION}/geckodriver-v${GECKODRIVER_VERSION}-${GECKODRIVER_ARCH}.tar.gz

Просмотреть файл

@ -39,7 +39,7 @@ if [ "$flash" = true ]; then
fi
sudo apt-get update
sudo apt-get install -y firefox htop git python-dev libxml2-dev libxslt-dev libffi-dev libssl-dev build-essential libboost-python-dev libleveldb-dev libjpeg-dev curl wget git bash vim
sudo apt-get install -y firefox htop git libxml2-dev libxslt-dev libffi-dev libssl-dev build-essential libboost-python-dev libleveldb-dev libjpeg-dev curl wget git bash vim
# For some versions of ubuntu, the package libleveldb1v5 isn't available. Use libleveldb1 instead.
sudo apt-get install -y libleveldb1v5 || sudo apt-get install -y libleveldb1
@ -49,8 +49,8 @@ if [ "$flash" = true ]; then
fi
# Use the Unbranded build that corresponds to a specific Firefox version (source: https://wiki.mozilla.org/Add-ons/Extension_Signing#Unbranded_Builds)
UNBRANDED_FF70_RELEASE_LINUX_BUILD="https://queue.taskcluster.net/v1/task/S9x3bLGUQTOAUfopXQGLzg/runs/0/artifacts/public/build/target.tar.bz2"
wget "$UNBRANDED_FF70_RELEASE_LINUX_BUILD"
UNBRANDED_FF71_RELEASE_LINUX_BUILD="https://firefox-ci-tc.services.mozilla.com/api/queue/v1/task/QKKKcc7VQhq8ngUotlj6hA/runs/0/artifacts/public/build/target.tar.bz2"
wget "$UNBRANDED_FF71_RELEASE_LINUX_BUILD"
tar jxf target.tar.bz2
rm -rf firefox-bin
mv firefox firefox-bin
@ -58,7 +58,7 @@ rm target.tar.bz2
# Selenium 3.3+ requires a 'geckodriver' helper executable, which is not yet
# packaged.
GECKODRIVER_VERSION=0.24.0
GECKODRIVER_VERSION=0.26.0
case $(uname -m) in
(x86_64)
GECKODRIVER_ARCH=linux64

Просмотреть файл

@ -7,41 +7,41 @@
airspeed==0.5.10 # via localstack
amazon_kclpy-ext==1.5.1 # via localstack
argparse==1.4.0 # via amazon-kclpy-ext
asn1crypto==1.0.1 # via cryptography
attrs==19.2.0 # via jsonschema
attrs==19.3.0 # via jsonschema
autopep8==1.4.4
aws-sam-translator==1.15.0 # via cfn-lint
aws-sam-translator==1.15.1 # via cfn-lint
aws-xray-sdk==2.4.2 # via moto-ext
awscli==1.16.254 # via localstack
boto3==1.9.244 # via aws-sam-translator, localstack, localstack-client, moto-ext
awscli==1.16.289 # via localstack
boto3==1.10.25 # via aws-sam-translator, localstack, localstack-client, moto-ext
boto==2.49.0 # via amazon-kclpy-ext, localstack, moto-ext
botocore==1.12.244 # via aws-xray-sdk, awscli, boto3, localstack, moto-ext, s3transfer
botocore==1.13.25 # via aws-xray-sdk, awscli, boto3, localstack, moto-ext
cachetools==3.1.1 # via airspeed
certifi==2019.9.11 # via requests
cffi==1.12.3 # via cryptography
cfn-lint==0.24.4 # via moto-ext
cffi==1.13.2 # via cryptography
cfn-lint==0.25.2 # via moto-ext
chardet==3.0.4 # via requests
click==7.0 # via flask
colorama==0.4.1 # via awscli
coverage==4.5.4 # via localstack, python-coveralls
cryptography==2.7 # via moto-ext, pyopenssl, sshpubkeys
decorator==4.4.0 # via jsonpath-rw
cryptography==2.8 # via moto-ext, pyopenssl, sshpubkeys
decorator==4.4.1 # via jsonpath-rw
dnslib==0.9.10 # via localstack-ext
dnspython==1.16.0 # via localstack, localstack-ext
docker==4.1.0 # via moto-ext
docopt==0.6.2 # via localstack
docutils==0.15.2 # via awscli, botocore
ecdsa==0.13.3 # via python-jose, sshpubkeys
ecdsa==0.14.1 # via python-jose, sshpubkeys
elasticsearch==6.4.0 # via localstack
entrypoints==0.3 # via flake8
flake8-quotes==2.1.0 # via localstack
flake8==3.7.8 # via flake8-quotes, localstack
flake8-quotes==2.1.1 # via localstack
flake8==3.7.9 # via flake8-quotes, localstack
flask-cors==3.0.3 # via localstack
flask==1.0.2 # via flask-cors, flask-swagger, localstack
flask_swagger==0.2.12 # via localstack
forbiddenfruit==0.1.3 # via localstack
future==0.17.1 # via aws-xray-sdk, python-jose
future==0.18.2 # via aws-xray-sdk, python-jose
idna==2.8 # via moto-ext, requests
importlib-metadata==0.23 # via jsonschema
itsdangerous==1.1.0 # via flask
jinja2==2.10.3 # via flask, moto-ext
jmespath==0.9.4 # via boto3, botocore
@ -50,44 +50,44 @@ jsonpatch==1.24 # via cfn-lint
jsonpath-rw==1.4.0 # via localstack
jsonpickle==1.2 # via aws-xray-sdk
jsonpointer==2.0 # via jsonpatch
jsonschema==3.0.2 # via aws-sam-translator, cfn-lint
localstack-client==0.14 # via localstack
localstack-ext==0.10.41 # via localstack
localstack[full]==0.10.4.2
jsonschema==3.2.0 # via aws-sam-translator, cfn-lint
localstack-client==0.15 # via localstack
localstack-ext==0.10.66 # via localstack
localstack[full]==0.10.5
markupsafe==1.1.1 # via jinja2
mccabe==0.6.1 # via flake8
mock==3.0.5 # via amazon-kclpy-ext, moto-ext
moto-ext==1.3.14.1 # via localstack
moto-ext==1.3.14.2 # via localstack
nose-timer==0.7.5 # via localstack
nose==1.3.7 # via localstack, nose-timer
ply==3.11 # via jsonpath-rw
psutil==5.4.8 # via localstack
pyaes==1.6.0 # via localstack-ext
pyasn1==0.4.7 # via rsa
pyasn1==0.4.8 # via rsa
pycodestyle==2.5.0 # via autopep8, flake8
pycparser==2.19 # via cffi
pyflakes==2.1.1 # via flake8
pympler==0.7 # via localstack
pympler==0.8 # via localstack
pyopenssl==17.5.0 # via localstack
pyrsistent==0.15.4 # via jsonschema
pyrsistent==0.15.5 # via jsonschema
python-coveralls==2.9.3 # via localstack
python-dateutil==2.8.0 # via botocore, moto-ext
python-jose==3.0.1 # via moto-ext
pytz==2019.3 # via moto-ext
pyyaml==5.1 # via awscli, cfn-lint, flask-swagger, localstack, moto-ext, python-coveralls
requests-aws4auth==0.9 # via localstack
requests==2.22.0 # via docker, localstack, moto-ext, python-coveralls, requests-aws4auth, responses
requests==2.22.0 # via docker, localstack, localstack-ext, moto-ext, python-coveralls, requests-aws4auth, responses
responses==0.10.6 # via moto-ext
rsa==3.4.2 # via awscli, python-jose
s3transfer==0.2.1 # via awscli, boto3
six==1.12.0 # via airspeed, aws-sam-translator, cfn-lint, cryptography, docker, flask-cors, jsonpath-rw, jsonschema, localstack, mock, moto-ext, pyopenssl, pyrsistent, python-coveralls, python-dateutil, python-jose, responses, websocket-client
six==1.13.0 # via airspeed, aws-sam-translator, cfn-lint, cryptography, docker, ecdsa, flask-cors, jsonpath-rw, jsonschema, localstack, mock, moto-ext, pyopenssl, pyrsistent, python-coveralls, python-jose, responses, websocket-client
sshpubkeys==3.1.0 # via moto-ext
subprocess32==3.5.4 # via localstack
urllib3==1.25.6 # via botocore, elasticsearch, requests
urllib3==1.25.7 # via botocore, elasticsearch, requests
websocket-client==0.56.0 # via docker
werkzeug==0.16.0 # via flask, moto-ext
wrapt==1.11.2 # via aws-xray-sdk
xmltodict==0.12.0 # via localstack, moto-ext
# The following packages are considered to be unsafe in a requirements file:
# setuptools==41.4.0 # via cfn-lint, jsonschema
# setuptools==41.6.0 # via jsonschema

Просмотреть файл

@ -23,7 +23,6 @@ s3fs
selenium
sentry-sdk
setuptools
six
tabulate
tblib
tld

Просмотреть файл

@ -4,71 +4,61 @@
#
# pip-compile requirements.in
#
atomicwrites==1.3.0 # via pytest
attrs==19.2.0 # via pytest
attrs==19.3.0 # via pytest
backcall==0.1.0 # via ipython
beautifulsoup4==4.8.1
boto3==1.9.244
botocore==1.12.244 # via boto3, s3fs, s3transfer
boto3==1.10.25
botocore==1.13.25 # via boto3, s3fs
certifi==2019.9.11 # via sentry-sdk
cython==0.29.13
decorator==4.4.0 # via ipython, traitlets
defusedxml==0.6.0 # via mini-amf
cython==0.29.14
decorator==4.4.1 # via ipython
dill==0.3.1.1
docutils==0.15.2 # via botocore
entrypoints==0.3 # via flake8
flake8-isort==2.7.0
flake8==3.7.8
fsspec==0.5.2 # via s3fs
flake8==3.7.9
fsspec==0.6.0 # via s3fs
importlib-metadata==0.23 # via pluggy, pytest
ipython-genutils==0.2.0 # via traitlets
ipython==7.8.0
isort==4.3.21 # via flake8-isort
ipython==7.9.0
jedi==0.15.1 # via ipython
jmespath==0.9.4 # via boto3, botocore
mccabe==0.6.1 # via flake8
mini-amf==0.9.1
mmh3==2.5.1
more-itertools==7.2.0 # via pytest, zipp
more-itertools==7.2.0 # via pytest
multiprocess==0.70.9
numpy==1.17.2
numpy==1.17.4
packaging==19.2 # via pytest
pandas==0.25.1
parso==0.5.1 # via jedi
pandas==0.25.3
pexpect==4.7.0 # via ipython
pickleshare==0.7.5 # via ipython
pillow==6.2.0
pluggy==0.13.0 # via pytest
pillow==6.2.1
pluggy==0.13.1 # via pytest
plyvel==1.1.0
prompt-toolkit==2.0.10 # via ipython
psutil==5.4.8
ptyprocess==0.6.0 # via pexpect
publicsuffix==1.1.0
py==1.8.0 # via pytest
pyarrow==0.15.0
pyasn1==0.4.7
pyarrow==0.15.1
pyasn1==0.4.8
pycodestyle==2.5.0 # via flake8
pyflakes==2.1.1 # via flake8
pygments==2.4.2 # via ipython
pyparsing==2.4.2 # via packaging
pytest==5.2.1
pytest==5.3.0
python-dateutil==2.8.0
pytz==2019.3 # via pandas
redis==3.3.8
s3fs==0.3.5
redis==3.3.11
s3fs==0.4.0
s3transfer==0.2.1 # via boto3
selenium==3.141.0
sentry-sdk==0.12.3
six==1.12.0
soupsieve==1.9.4 # via beautifulsoup4
tabulate==0.8.5
tblib==1.4.0
testfixtures==6.10.0 # via flake8-isort
tld==0.9.6
sentry-sdk==0.13.2
six==1.13.0 # via pyarrow, tld
tabulate==0.8.6
tblib==1.5.0
tld==0.9.8
traitlets==4.3.3 # via ipython
urllib3==1.25.6 # via botocore, selenium, sentry-sdk
wcwidth==0.1.7 # via prompt-toolkit, pytest
zipp==0.6.0 # via importlib-metadata
urllib3==1.25.7 # via botocore, sentry-sdk
wcwidth==0.1.7 # via pytest
# The following packages are considered to be unsafe in a requirements file:
# setuptools==41.4.0
# setuptools==41.6.0

Просмотреть файл

@ -1,4 +1,3 @@
from __future__ import absolute_import, print_function
import os
import subprocess

Просмотреть файл

@ -1,5 +1,4 @@
""" Contains lists of expected data and or rows for tests """
from __future__ import absolute_import
from .utilities import BASE_TEST_URL, BASE_TEST_URL_DOMAIN

Просмотреть файл

@ -1,4 +1,3 @@
from __future__ import absolute_import, print_function
import atexit
import subprocess

Просмотреть файл

@ -1,4 +1,3 @@
from __future__ import absolute_import
import os
from os.path import isfile, join

Просмотреть файл

@ -1,10 +1,9 @@
from __future__ import absolute_import
import os
import tarfile
from urllib.parse import urlparse
import pytest
from six.moves.urllib.parse import urlparse
from ..automation import TaskManager
from ..automation.utilities import db_utils, domain_utils

Просмотреть файл

@ -1,4 +1,3 @@
from __future__ import absolute_import
from ..automation import CommandSequence, TaskManager
from ..automation.utilities import db_utils

Просмотреть файл

@ -1,4 +1,3 @@
from __future__ import absolute_import, print_function
from os.path import dirname, isfile, realpath

Просмотреть файл

@ -1,4 +1,3 @@
from __future__ import absolute_import
import os
from datetime import datetime

Просмотреть файл

@ -1,17 +1,14 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import base64
import json
import os
from hashlib import sha256
from time import sleep
from urllib.parse import urlparse
import pytest
import six
from six.moves import range
from six.moves.urllib.parse import urlparse
from ..automation import CommandSequence, TaskManager
from ..automation.utilities import db_utils
@ -830,7 +827,7 @@ class TestPOSTInstrument(OpenWPMTest):
encoding_type = "text/plain"
db = self.visit('/post_request.html?encoding_type=' + encoding_type)
post_body = self.get_post_request_body_from_db(db, True)
if not isinstance(self.post_data_multiline_raw, six.text_type):
if not isinstance(self.post_data_multiline_raw, str):
expected = self.post_data_multiline_raw.decode('utf-8')
else:
expected = self.post_data_multiline_raw
@ -880,7 +877,7 @@ class TestPOSTInstrument(OpenWPMTest):
db = self.visit("/post_request_ajax.html?format=" + post_format)
post_body = self.get_post_request_body_from_db(db, True)
# Binary strings get put into the database as-if they were latin-1.
assert six.binary_type(bytearray(range(100))) == post_body
assert bytes(bytearray(range(100))) == post_body
@pytest.mark.skip(reason="Firefox is currently not able to return the "
"file content for an upload, only the filename")

Просмотреть файл

@ -1,4 +1,3 @@
from __future__ import absolute_import
import re

Просмотреть файл

@ -1,4 +1,3 @@
from __future__ import absolute_import
import re

Просмотреть файл

@ -1,4 +1,3 @@
from __future__ import absolute_import
import re

Просмотреть файл

@ -1,4 +1,3 @@
from __future__ import absolute_import
import re

Просмотреть файл

@ -1,4 +1,3 @@
from __future__ import absolute_import
import logging
import os
@ -67,6 +66,16 @@ def child_proc_with_exception(index):
)
def child_proc_logging_exception():
logger = logging.getLogger('openwpm')
try:
raise Exception("This is my generic Test Exception")
except Exception:
logger.error(
"I'm logging an exception", exc_info=True,
)
class TestMPLogger(OpenWPMTest):
def get_logfile_path(self, directory):
@ -163,3 +172,14 @@ class TestMPLogger(OpenWPMTest):
assert(log_content.count(CHILD_INFO_STR_1 % child) == 1)
assert(log_content.count(CHILD_INFO_STR_2 % child) == 1)
assert(log_content.count(CHILD_EXCEPTION_STR % child) == 1)
def test_child_process_logging(self, tmpdir):
log_file = self.get_logfile_path(str(tmpdir))
openwpm_logger = MPLogger.MPLogger(log_file)
child_process = Process(target=child_proc_logging_exception())
child_process.daemon = True
child_process.start()
openwpm_logger.close()
child_process.join()
log_content = self.get_logfile_contents(log_file)
assert ("I'm logging an exception" in log_content)

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -0,0 +1,42 @@
<!-- sample code derived from https://www.browserleaks.com/canvas#how-does-it-work -->
<html>
<title>Large Payloads Test Page</title>
<h1>Large Payloads Test Page</h1>
<h3>Yields large payloads within the JS instrumentation</h3>
<canvas id="myCanvas"></canvas>
<pre id="canvasDataURL"></pre>
<img id="canvasDataURLImage" src="about:blank"/>
<script>
console.log("Before", new Date().toISOString());
const str200kb = "01234567890".repeat(100 * 200);
// Text with lowercase/uppercase/punctuation symbols and a large string
var txt = "BrowserLeaks,com <canvas> 1.0" + str200kb;
var canvas = document.getElementById('myCanvas');
var ctx = canvas.getContext("2d");
ctx.textBaseline = "top";
// The most common type
ctx.font = "14px 'Arial'";
ctx.textBaseline = "alphabetic";
ctx.fillStyle = "#f60";
ctx.fillRect(125,1,62,20);
// Some tricks for color mixing to increase the difference in rendering
ctx.fillStyle = "#069";
ctx.fillText(txt, 2, 15);
ctx.fillStyle = "rgba(102, 204, 0, 0.7)";
ctx.fillText(txt, 4, 17);
ctx.fillStyle = str200kb;
ctx.fillText(txt, 6, 18);
var pre = document.getElementById("canvasDataURL");
pre.innerHTML = canvas.toDataURL();
var img = document.getElementById("canvasDataURLImage");
img.src = canvas.toDataURL();
console.log("After", new Date().toISOString());
</script>
</html>

Просмотреть файл

@ -1,4 +1,3 @@
from __future__ import absolute_import
from os.path import isfile, join

Просмотреть файл

@ -1,4 +1,3 @@
from __future__ import absolute_import
import json
import time
@ -6,7 +5,6 @@ from collections import defaultdict
import boto3
import pytest
import six
from localstack.services import infra
from ..automation import TaskManager
@ -72,6 +70,8 @@ class TestS3Aggregator(OpenWPMTest):
table = dataset.load_table(table_name)
visit_ids[table_name] = table.visit_id.unique()
assert len(visit_ids[table_name]) == NUM_VISITS * NUM_BROWSERS
for vid in visit_ids[table_name]:
assert(vid >= 0) and (vid < (1 << 53))
for table_name, ids in visit_ids.items():
assert set(ids) == set(visit_ids['site_visits'])
@ -83,7 +83,7 @@ class TestS3Aggregator(OpenWPMTest):
# of configuration files
config_file = dataset.list_files('config', prepend_root=True)
assert len(config_file) == 1 # only one instance started in test
config = json.loads(six.text_type(
config = json.loads(str(
dataset.get_file(config_file[0]), 'utf-8'))
assert len(config['browser_params']) == NUM_BROWSERS

Просмотреть файл

@ -1,13 +1,12 @@
from __future__ import absolute_import
import glob
import gzip
import json
import os
import re
from urllib.parse import urlparse
from PIL import Image
from six.moves.urllib.parse import urlparse
from ..automation import CommandSequence, TaskManager
from ..automation.utilities import db_utils
@ -152,9 +151,9 @@ class TestSimpleCommands(OpenWPMTest):
manager = TaskManager.TaskManager(manager_params, browser_params)
# Set up two sequential browse commands to two URLS
cs_a = CommandSequence.CommandSequence(url_a)
cs_a = CommandSequence.CommandSequence(url_a, site_rank=0)
cs_a.browse(num_links=1, sleep=1)
cs_b = CommandSequence.CommandSequence(url_b)
cs_b = CommandSequence.CommandSequence(url_b, site_rank=1)
cs_b.browse(num_links=1, sleep=1)
manager.execute_command_sequence(cs_a)
@ -162,13 +161,16 @@ class TestSimpleCommands(OpenWPMTest):
manager.close()
qry_res = db_utils.query_db(manager_params['db'],
"SELECT site_url FROM site_visits")
"SELECT site_url, site_rank"
" FROM site_visits")
# We had two separate page visits
assert len(qry_res) == 2
assert qry_res[0][0] == url_a
assert qry_res[0][1] == 0
assert qry_res[1][0] == url_b
assert qry_res[1][1] == 1
def test_browse_http_table_valid(self):
"""Check CommandSequence.browse() works and populates http tables correctly.

Просмотреть файл

@ -1,4 +1,3 @@
from __future__ import absolute_import
import pytest

Просмотреть файл

@ -1,18 +1,17 @@
from __future__ import absolute_import, print_function
import os
import socketserver
import threading
from http.server import SimpleHTTPRequestHandler
from os.path import dirname, realpath
from random import choice
from urllib.parse import parse_qs, urlparse
import boto3
import pyarrow.parquet as pq
import s3fs
from botocore.credentials import Credentials
from pyarrow.filesystem import S3FSWrapper # noqa
from six.moves import range, socketserver
from six.moves.SimpleHTTPServer import SimpleHTTPRequestHandler
from six.moves.urllib.parse import parse_qs, urlparse
LOCAL_WEBSERVER_PORT = 8000
BASE_TEST_URL_DOMAIN = "localtest.me"