зеркало из https://github.com/openwpm/OpenWPM.git
Merge branch 'master' into nhnt11-callstacks
This commit is contained in:
Коммит
82eced7a80
10
Dockerfile
10
Dockerfile
|
@ -34,17 +34,13 @@ RUN ./install-system.sh --no-flash
|
|||
RUN mv firefox-bin /opt/firefox-bin
|
||||
ENV FIREFOX_BINARY /opt/firefox-bin/firefox-bin
|
||||
|
||||
# Instead of running install-pip-and-packages.sh, the packages are installed
|
||||
# manually using pip and pip3 so that python2 and python3 are supported in the
|
||||
# final image.
|
||||
RUN apt-get -y install python-pip python3-pip
|
||||
|
||||
# For some reasons, python3-publicsuffix doesn't work with pip3 at the moment,
|
||||
# so install it from the ubuntu repository
|
||||
RUN apt-get -y install python3-publicsuffix
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip3 install -U -r requirements.txt
|
||||
COPY install-pip-and-packages.sh .
|
||||
RUN ./install-pip-and-packages.sh
|
||||
|
||||
COPY --from=extension /usr/src/app/dist/openwpm-*.zip automation/Extension/firefox/openwpm.xpi
|
||||
|
||||
|
@ -59,5 +55,5 @@ COPY . .
|
|||
# possible to run everything as root as well.
|
||||
RUN adduser --disabled-password --gecos "OpenWPM" openwpm
|
||||
|
||||
# Alternatively, python3 could be used here
|
||||
# Setting demo.py as the default command
|
||||
CMD python3 demo.py
|
||||
|
|
37
README.md
37
README.md
|
@ -7,10 +7,41 @@ of websites. OpenWPM is built on top of Firefox, with automation provided
|
|||
by Selenium. It includes several hooks for data collection. Check out
|
||||
the instrumentation section below for more details.
|
||||
|
||||
Table of Contents
|
||||
-----------------
|
||||
|
||||
* [Installation](#installation)
|
||||
* [Quick Start](#quick-start)
|
||||
* [Instrumentation and Data Access](#instrumentation-and-data-access)
|
||||
* [Output Formats](#output-format)
|
||||
* [Local Databases](#local-databases)
|
||||
* [Parquet on Amazon S3 (Experimental)](#parquet-on-amazon-s3-experimental)
|
||||
* [Browser and Platform Configuration](#browser-and-platform-configuration)
|
||||
* [Browser Configuration Options](#platform-configuration-options)
|
||||
* [Browser Profile Support](#browser-profile-support)
|
||||
* [Stateful vs Stateless crawls](#stateful-vs-stateless-crawls)
|
||||
* [Loading and saving a browser profile](#loading-and-saving-a-browser-profile)
|
||||
* [Development pointers](#development-pointers)
|
||||
* [Editing instrumentation](#editing-instrumentation)
|
||||
* [Debugging the platform](#debugging-the-platform)
|
||||
* [Managing requirements](#managing-requirements)
|
||||
* [Running tests](#running-tests)
|
||||
* [Mac OSX (Limited support for developers)](#mac-osx-limited-support-for-developers)
|
||||
* [Troubleshooting](#troubleshooting)
|
||||
* [Docker Deployment for OpenWPM](#docker-deployment-for-openwpm)
|
||||
* [Building the Docker Container](#building-the-docker-container)
|
||||
* [Running Measurements from inside the Container](#running-measurements-from-inside-the-container)
|
||||
* [MacOS GUI applications in Docker](#macos-gui-applications-in-docker)
|
||||
* [Disclaimer](#disclaimer)
|
||||
* [Citation](#citation)
|
||||
* [License](#license)
|
||||
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
OpenWPM has been developed and tested on Ubuntu 14.04/16.04. An installation
|
||||
OpenWPM is a Python 3 application developed and tested for Ubuntu 18.04.
|
||||
Python 2 is not supported. An installation
|
||||
script, `install.sh` is included to install both the system and python
|
||||
dependencies automatically. A few of the python dependencies require specific
|
||||
versions, so you should install the dependencies in a virtual environment if
|
||||
|
@ -419,8 +450,8 @@ OpenWPM should be placed in the former, while those only required to run the
|
|||
tests (or perform other development tasks) should be placed in the latter.
|
||||
|
||||
To update dependencies, run the following two commands **in order**:
|
||||
* `pip-compile --upgrade requirements.txt`
|
||||
* `pip-compile --upgrade requirements-dev.txt`
|
||||
* `pip-compile --upgrade requirements.in`
|
||||
* `pip-compile --upgrade requirements-dev.in`
|
||||
|
||||
It's important that these are run in order, as we layer the dev
|
||||
dependencies on the output of the pinned production dependencies as per
|
||||
|
|
|
@ -1,21 +1,19 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
import errno
|
||||
import logging
|
||||
import os
|
||||
import pickle
|
||||
import shutil
|
||||
import signal
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import traceback
|
||||
from queue import Empty as EmptyQueue
|
||||
|
||||
import psutil
|
||||
from multiprocess import Queue
|
||||
from selenium.common.exceptions import WebDriverException
|
||||
from six import reraise
|
||||
from six.moves import cPickle as pickle
|
||||
from six.moves.queue import Empty as EmptyQueue
|
||||
from tblib import pickling_support
|
||||
|
||||
from .Commands import command_executor
|
||||
|
@ -126,7 +124,7 @@ class Browser:
|
|||
launch_status[result[1]] = True
|
||||
return result[2]
|
||||
elif result[0] == 'CRITICAL':
|
||||
reraise(*pickle.loads(result[1]))
|
||||
raise pickle.loads(result[1])
|
||||
elif result[0] == 'FAILED':
|
||||
raise BrowserCrashError(
|
||||
'Browser spawn returned failure status')
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
from .Errors import CommandExecutionError
|
||||
|
||||
|
@ -23,7 +22,8 @@ class CommandSequence:
|
|||
called prior to that.
|
||||
"""
|
||||
|
||||
def __init__(self, url, reset=False, blocking=False, retry_number=None):
|
||||
def __init__(self, url, reset=False,
|
||||
blocking=False, retry_number=None, site_rank=None):
|
||||
"""Initialize command sequence.
|
||||
|
||||
Parameters
|
||||
|
@ -37,6 +37,9 @@ class CommandSequence:
|
|||
retry_number : int, optional
|
||||
Integer denoting the number of attempts that have been made to
|
||||
execute this command. Will be saved in `crawl_history`.
|
||||
site_rank : int, optional
|
||||
Integer indicating the ranking of the page to visit, saved
|
||||
to `site_visits`
|
||||
"""
|
||||
self.url = url
|
||||
self.reset = reset
|
||||
|
@ -45,6 +48,7 @@ class CommandSequence:
|
|||
self.commands_with_timeout = []
|
||||
self.total_timeout = 0
|
||||
self.contains_get_or_browse = False
|
||||
self.site_rank = site_rank
|
||||
|
||||
def get(self, sleep=0, timeout=60):
|
||||
""" goes to a url """
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
import gzip
|
||||
import json
|
||||
|
@ -17,7 +16,6 @@ from selenium.common.exceptions import (MoveTargetOutOfBoundsException,
|
|||
from selenium.webdriver.common.action_chains import ActionChains
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from six.moves import range
|
||||
|
||||
from ..SocketInterface import clientsocket
|
||||
from .utils.lso import get_flash_cookies
|
||||
|
@ -352,9 +350,8 @@ def recursive_dump_page_source(visit_id, driver, manager_params, suffix=''):
|
|||
page_source = dict()
|
||||
page_source['doc_url'] = doc_url
|
||||
source = driver.page_source
|
||||
import six
|
||||
if type(source) != six.text_type:
|
||||
source = six.text_type(source, 'utf-8')
|
||||
if type(source) != str:
|
||||
source = str(source, 'utf-8')
|
||||
page_source['source'] = source
|
||||
page_source['iframes'] = dict()
|
||||
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
from . import browser_commands, profile_commands
|
||||
|
||||
|
|
|
@ -1,12 +1,10 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
import logging
|
||||
import os
|
||||
import pickle
|
||||
import shutil
|
||||
import tarfile
|
||||
|
||||
from six.moves import cPickle as pickle
|
||||
|
||||
from ..Errors import ProfileLoadError
|
||||
from .utils.file_utils import rmsubtree
|
||||
from .utils.firefox_profile import sleep_until_sqlite_checkpoint
|
||||
|
|
|
@ -4,13 +4,11 @@
|
|||
#
|
||||
# Steven Englehardt (github.com/englehardt)
|
||||
|
||||
from __future__ import absolute_import, print_function
|
||||
|
||||
import re
|
||||
|
||||
import bs4
|
||||
from bs4 import BeautifulSoup as bs
|
||||
from six.moves import range
|
||||
|
||||
|
||||
def is_clickable(xpath):
|
||||
|
@ -150,7 +148,7 @@ def xp1_wildcard(attr, string, normalize=True):
|
|||
def main():
|
||||
# Output some sample XPaths
|
||||
print("--- Sample XPaths ---")
|
||||
from six.moves.urllib.request import urlopen
|
||||
from urllib.request import urlopen
|
||||
import re
|
||||
from random import choice
|
||||
rsp = urlopen('http://www.reddit.com/')
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
# A collection of file utilities
|
||||
from __future__ import absolute_import
|
||||
|
||||
import os
|
||||
import shutil
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
# This is code adapted from KU Leuven crawler code written by
|
||||
# Gunes Acar and Marc Juarez
|
||||
from __future__ import absolute_import, print_function
|
||||
|
||||
import os
|
||||
import time
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
# This is code adapted from KU Leuven crawler code written by
|
||||
# Gunes Acar and Marc Juarez
|
||||
|
||||
from __future__ import absolute_import, print_function
|
||||
|
||||
import fnmatch
|
||||
import os
|
||||
|
@ -14,11 +13,10 @@ from miniamf import sol
|
|||
|
||||
def ensure_unicode(val):
|
||||
"""Coerce VAL to a Unicode string by any means necessary."""
|
||||
import six
|
||||
if isinstance(val, six.text_type):
|
||||
if isinstance(val, str):
|
||||
return val
|
||||
if not isinstance(val, six.binary_type):
|
||||
return six.text_type(val)
|
||||
if not isinstance(val, bytes):
|
||||
return str(val)
|
||||
try:
|
||||
return val.decode("utf-8", "backslashescape")
|
||||
except (UnicodeDecodeError, TypeError):
|
||||
|
@ -52,9 +50,8 @@ class FlashCookie(_BaseFlashCookie):
|
|||
|
||||
|
||||
def parse_flash_cookies(lso_file):
|
||||
import six
|
||||
lso_dict = sol.load(lso_file)
|
||||
return [FlashCookie(lso_file, k, v) for k, v in six.iteritems(lso_dict)]
|
||||
return [FlashCookie(lso_file, k, v) for k, v in iter(lso_dict.items())]
|
||||
|
||||
|
||||
def gen_find_files(filepat, top):
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
# A set of extensions to the functions normally provided by the selenium
|
||||
# webdriver. These are primarily for parsing and searching.
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
from urllib import parse as urlparse
|
||||
|
||||
from selenium.common.exceptions import (ElementNotVisibleException,
|
||||
NoSuchElementException,
|
||||
|
@ -14,7 +14,6 @@ from selenium.common.exceptions import (ElementNotVisibleException,
|
|||
from selenium.webdriver.common.action_chains import ActionChains
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from six.moves.urllib import parse as urlparse
|
||||
|
||||
from ...utilities import domain_utils as du
|
||||
from . import XPathUtil
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
import abc
|
||||
import logging
|
||||
import queue
|
||||
import threading
|
||||
import time
|
||||
|
||||
from multiprocess import Queue
|
||||
from six.moves import queue
|
||||
|
||||
from ..SocketInterface import serversocket
|
||||
from ..utilities.multiprocess_utils import Process
|
||||
|
@ -19,10 +19,11 @@ STATUS_UPDATE_INTERVAL = 5 # seconds
|
|||
class BaseListener(object):
|
||||
"""Base class for the data aggregator listener process. This class is used
|
||||
alongside the BaseAggregator class to spawn an aggregator process that
|
||||
combines data collected in multiple crawl processes and write it to disk as
|
||||
specified in the child class. The BaseListener class is instantiated in the
|
||||
remote process, and sets up a listening socket to receive data. Classes
|
||||
which inherit from this base class define how that data is written to disk.
|
||||
combines data collected in multiple crawl processes and stores it
|
||||
persistently as specified in the child class. The BaseListener class
|
||||
is instantiated in the remote process, and sets up a listening socket to
|
||||
receive data. Classes which inherit from this base class define
|
||||
how that data is written to disk.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
from __future__ import absolute_import, print_function
|
||||
|
||||
import base64
|
||||
import json
|
||||
|
@ -9,8 +8,6 @@ from sqlite3 import (IntegrityError, InterfaceError, OperationalError,
|
|||
ProgrammingError)
|
||||
|
||||
import plyvel
|
||||
import six
|
||||
from six.moves import range
|
||||
|
||||
from .BaseAggregator import RECORD_TYPE_CONTENT, BaseAggregator, BaseListener
|
||||
|
||||
|
@ -102,10 +99,10 @@ class LocalListener(BaseListener):
|
|||
statement, args = self._generate_insert(
|
||||
table=record[0], data=record[1])
|
||||
for i in range(len(args)):
|
||||
if isinstance(args[i], six.binary_type):
|
||||
args[i] = six.text_type(args[i], errors='ignore')
|
||||
if isinstance(args[i], bytes):
|
||||
args[i] = str(args[i], errors='ignore')
|
||||
elif callable(args[i]):
|
||||
args[i] = six.text_type(args[i])
|
||||
args[i] = str(args[i])
|
||||
elif type(args[i]) == dict:
|
||||
print(args[i])
|
||||
args[i] = json.dumps(args[i])
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
from __future__ import absolute_import, print_function
|
||||
|
||||
import base64
|
||||
import gzip
|
||||
import hashlib
|
||||
import io
|
||||
import json
|
||||
import queue
|
||||
import random
|
||||
import time
|
||||
import uuid
|
||||
from collections import defaultdict
|
||||
|
||||
import boto3
|
||||
|
@ -13,11 +14,9 @@ import pandas as pd
|
|||
import pyarrow as pa
|
||||
import pyarrow.parquet as pq
|
||||
import s3fs
|
||||
import six
|
||||
from botocore.client import Config
|
||||
from botocore.exceptions import ClientError, EndpointConnectionError
|
||||
from pyarrow.filesystem import S3FSWrapper # noqa
|
||||
from six.moves import queue
|
||||
|
||||
from .BaseAggregator import RECORD_TYPE_CONTENT, BaseAggregator, BaseListener
|
||||
from .parquet_schema import PQ_SCHEMAS
|
||||
|
@ -174,15 +173,15 @@ class S3Listener(BaseListener):
|
|||
self.logger.debug(
|
||||
"File `%s` already exists on s3, skipping..." % filename)
|
||||
return
|
||||
if not isinstance(string, six.binary_type):
|
||||
if not isinstance(string, bytes):
|
||||
string = string.encode('utf-8')
|
||||
if compressed:
|
||||
out_f = six.BytesIO()
|
||||
out_f = io.BytesIO()
|
||||
with gzip.GzipFile(fileobj=out_f, mode='w') as writer:
|
||||
writer.write(string)
|
||||
out_f.seek(0)
|
||||
else:
|
||||
out_f = six.BytesIO(string)
|
||||
out_f = io.BytesIO(string)
|
||||
|
||||
# Upload to S3
|
||||
try:
|
||||
|
@ -206,7 +205,7 @@ class S3Listener(BaseListener):
|
|||
continue
|
||||
if table_name == SITE_VISITS_INDEX:
|
||||
out_str = '\n'.join([json.dumps(x) for x in batches])
|
||||
if not isinstance(out_str, six.binary_type):
|
||||
if not isinstance(out_str, bytes):
|
||||
out_str = out_str.encode('utf-8')
|
||||
fname = '%s/site_index/instance-%s-%s.json.gz' % (
|
||||
self.dir, self._instance_id,
|
||||
|
@ -287,10 +286,10 @@ class S3Listener(BaseListener):
|
|||
|
||||
# Convert data to text type
|
||||
for k, v in data.items():
|
||||
if isinstance(v, six.binary_type):
|
||||
data[k] = six.text_type(v, errors='ignore')
|
||||
if isinstance(v, bytes):
|
||||
data[k] = str(v, errors='ignore')
|
||||
elif callable(v):
|
||||
data[k] = six.text_type(v)
|
||||
data[k] = str(v)
|
||||
# TODO: Can we fix this in the extension?
|
||||
elif type(v) == dict:
|
||||
data[k] = json.dumps(v)
|
||||
|
@ -343,7 +342,7 @@ class S3Aggregator(BaseAggregator):
|
|||
self.dir = manager_params['s3_directory']
|
||||
self.bucket = manager_params['s3_bucket']
|
||||
self.s3 = boto3.client('s3')
|
||||
self._instance_id = (uuid.uuid4().int & (1 << 32) - 1) - 2**31
|
||||
self._instance_id = random.getrandbits(32)
|
||||
self._create_bucket()
|
||||
|
||||
def _create_bucket(self):
|
||||
|
@ -368,13 +367,13 @@ class S3Aggregator(BaseAggregator):
|
|||
# Config parameters for update
|
||||
out = dict()
|
||||
out['manager_params'] = self.manager_params
|
||||
out['openwpm_version'] = six.text_type(openwpm_version)
|
||||
out['browser_version'] = six.text_type(browser_version)
|
||||
out['openwpm_version'] = str(openwpm_version)
|
||||
out['browser_version'] = str(browser_version)
|
||||
out['browser_params'] = self.browser_params
|
||||
out_str = json.dumps(out)
|
||||
if not isinstance(out_str, six.binary_type):
|
||||
if not isinstance(out_str, bytes):
|
||||
out_str = out_str.encode('utf-8')
|
||||
out_f = six.BytesIO(out_str)
|
||||
out_f = io.BytesIO(out_str)
|
||||
|
||||
# Upload to S3 and delete local copy
|
||||
try:
|
||||
|
@ -384,22 +383,22 @@ class S3Aggregator(BaseAggregator):
|
|||
raise
|
||||
|
||||
def get_next_visit_id(self):
|
||||
"""Generate visit id as randomly generated 53bit UUIDs.
|
||||
"""Generate visit id as randomly generated positive integer less than 2^53.
|
||||
|
||||
Parquet can support integers up to 64 bits, but Javascript can only
|
||||
represent integers up to 53 bits:
|
||||
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/MAX_SAFE_INTEGER
|
||||
Thus, we cap these values at 53 bits.
|
||||
"""
|
||||
return (uuid.uuid4().int & (1 << 53) - 1) - 2**52
|
||||
return random.getrandbits(53)
|
||||
|
||||
def get_next_crawl_id(self):
|
||||
"""Generate crawl id as randomly generated 32bit UUIDs
|
||||
"""Generate crawl id as randomly generated positive 32bit integer
|
||||
|
||||
Note: Parquet's partitioned dataset reader only supports integer
|
||||
partition columns up to 32 bits.
|
||||
"""
|
||||
return (uuid.uuid4().int & (1 << 32) - 1) - 2**31
|
||||
return random.getrandbits(32)
|
||||
|
||||
def launch(self):
|
||||
"""Launch the aggregator listener process"""
|
||||
|
|
|
@ -5,17 +5,18 @@ PQ_SCHEMAS = dict()
|
|||
# site_visits
|
||||
fields = [
|
||||
pa.field('visit_id', pa.int64(), nullable=False),
|
||||
pa.field('crawl_id', pa.int32(), nullable=False),
|
||||
pa.field('instance_id', pa.int32(), nullable=False),
|
||||
pa.field('site_url', pa.string(), nullable=False)
|
||||
pa.field('crawl_id', pa.uint32(), nullable=False),
|
||||
pa.field('instance_id', pa.uint32(), nullable=False),
|
||||
pa.field('site_url', pa.string(), nullable=False),
|
||||
pa.field('site_rank', pa.uint32())
|
||||
]
|
||||
PQ_SCHEMAS['site_visits'] = pa.schema(fields)
|
||||
|
||||
# flash_cookies
|
||||
fields = [
|
||||
pa.field('crawl_id', pa.int32(), nullable=False),
|
||||
pa.field('crawl_id', pa.uint32(), nullable=False),
|
||||
pa.field('visit_id', pa.int64(), nullable=False),
|
||||
pa.field('instance_id', pa.int32(), nullable=False),
|
||||
pa.field('instance_id', pa.uint32(), nullable=False),
|
||||
pa.field('domain', pa.string()),
|
||||
pa.field('filename', pa.string()),
|
||||
pa.field('local_path', pa.string()),
|
||||
|
@ -26,9 +27,9 @@ PQ_SCHEMAS['flash_cookies'] = pa.schema(fields)
|
|||
|
||||
# crawl_history
|
||||
fields = [
|
||||
pa.field('crawl_id', pa.int32(), nullable=False),
|
||||
pa.field('crawl_id', pa.uint32(), nullable=False),
|
||||
pa.field('visit_id', pa.int64(), nullable=False),
|
||||
pa.field('instance_id', pa.int32(), nullable=False),
|
||||
pa.field('instance_id', pa.uint32(), nullable=False),
|
||||
pa.field('command', pa.string()),
|
||||
pa.field('arguments', pa.string()),
|
||||
pa.field('retry_number', pa.int8()),
|
||||
|
@ -41,9 +42,9 @@ PQ_SCHEMAS['crawl_history'] = pa.schema(fields)
|
|||
# http_requests
|
||||
fields = [
|
||||
pa.field('incognito', pa.int32()),
|
||||
pa.field('crawl_id', pa.int32()),
|
||||
pa.field('crawl_id', pa.uint32()),
|
||||
pa.field('visit_id', pa.int64()),
|
||||
pa.field('instance_id', pa.int32(), nullable=False),
|
||||
pa.field('instance_id', pa.uint32(), nullable=False),
|
||||
pa.field('extension_session_uuid', pa.string()),
|
||||
pa.field('event_ordinal', pa.int64()),
|
||||
pa.field('window_id', pa.int64()),
|
||||
|
@ -76,9 +77,9 @@ PQ_SCHEMAS['http_requests'] = pa.schema(fields)
|
|||
# http_responses
|
||||
fields = [
|
||||
pa.field('incognito', pa.int32()),
|
||||
pa.field('crawl_id', pa.int32()),
|
||||
pa.field('crawl_id', pa.uint32()),
|
||||
pa.field('visit_id', pa.int64()),
|
||||
pa.field('instance_id', pa.int32(), nullable=False),
|
||||
pa.field('instance_id', pa.uint32(), nullable=False),
|
||||
pa.field('extension_session_uuid', pa.string()),
|
||||
pa.field('event_ordinal', pa.int64()),
|
||||
pa.field('window_id', pa.int64()),
|
||||
|
@ -100,9 +101,9 @@ PQ_SCHEMAS['http_responses'] = pa.schema(fields)
|
|||
# http_redirects
|
||||
fields = [
|
||||
pa.field('incognito', pa.int32()),
|
||||
pa.field('crawl_id', pa.int32()),
|
||||
pa.field('crawl_id', pa.uint32()),
|
||||
pa.field('visit_id', pa.int64()),
|
||||
pa.field('instance_id', pa.int32(), nullable=False),
|
||||
pa.field('instance_id', pa.uint32(), nullable=False),
|
||||
pa.field('old_request_url', pa.string()),
|
||||
pa.field('old_request_id', pa.string()),
|
||||
pa.field('new_request_url', pa.string()),
|
||||
|
@ -121,9 +122,9 @@ PQ_SCHEMAS['http_redirects'] = pa.schema(fields)
|
|||
# javascript
|
||||
fields = [
|
||||
pa.field('incognito', pa.int32()),
|
||||
pa.field('crawl_id', pa.int32()),
|
||||
pa.field('crawl_id', pa.uint32()),
|
||||
pa.field('visit_id', pa.int64()),
|
||||
pa.field('instance_id', pa.int32(), nullable=False),
|
||||
pa.field('instance_id', pa.uint32(), nullable=False),
|
||||
pa.field('extension_session_uuid', pa.string()),
|
||||
pa.field('event_ordinal', pa.int64()),
|
||||
pa.field('page_scoped_event_ordinal', pa.int64()),
|
||||
|
@ -148,9 +149,9 @@ PQ_SCHEMAS['javascript'] = pa.schema(fields)
|
|||
|
||||
# javascript_cookies
|
||||
fields = [
|
||||
pa.field('crawl_id', pa.int32()),
|
||||
pa.field('crawl_id', pa.uint32()),
|
||||
pa.field('visit_id', pa.int64()),
|
||||
pa.field('instance_id', pa.int32(), nullable=False),
|
||||
pa.field('instance_id', pa.uint32(), nullable=False),
|
||||
pa.field('extension_session_uuid', pa.string()),
|
||||
pa.field('event_ordinal', pa.int64()),
|
||||
pa.field('record_type', pa.string()),
|
||||
|
@ -174,9 +175,9 @@ PQ_SCHEMAS['javascript_cookies'] = pa.schema(fields)
|
|||
# navigations
|
||||
fields = [
|
||||
pa.field('incognito', pa.int32()),
|
||||
pa.field('crawl_id', pa.int32()),
|
||||
pa.field('crawl_id', pa.uint32()),
|
||||
pa.field('visit_id', pa.int64()),
|
||||
pa.field('instance_id', pa.int32(), nullable=False),
|
||||
pa.field('instance_id', pa.uint32(), nullable=False),
|
||||
pa.field('extension_session_uuid', pa.string()),
|
||||
pa.field('process_id', pa.int64()),
|
||||
pa.field('window_id', pa.int64()),
|
||||
|
|
|
@ -24,6 +24,7 @@ CREATE TABLE IF NOT EXISTS site_visits (
|
|||
visit_id INTEGER PRIMARY KEY,
|
||||
crawl_id INTEGER NOT NULL,
|
||||
site_url VARCHAR(500) NOT NULL,
|
||||
site_rank INTEGER,
|
||||
FOREIGN KEY(crawl_id) REFERENCES crawl(id));
|
||||
|
||||
/*
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
""" Set prefs and load extensions in Firefox """
|
||||
|
||||
from __future__ import absolute_import, print_function
|
||||
|
||||
import os
|
||||
import shutil
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
from ..Errors import BrowserConfigError
|
||||
from . import deploy_firefox
|
||||
|
|
|
@ -1,11 +1,9 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os.path
|
||||
import random
|
||||
|
||||
import six
|
||||
from selenium import webdriver
|
||||
|
||||
from ..Commands.profile_commands import load_profile
|
||||
|
@ -120,7 +118,7 @@ def deploy_firefox(status_queue, browser_params, manager_params,
|
|||
fo.add_argument('--height={}'.format(DEFAULT_SCREEN_RES[1]))
|
||||
|
||||
if browser_params['save_content']:
|
||||
if isinstance(browser_params['save_content'], six.string_types):
|
||||
if isinstance(browser_params['save_content'], str):
|
||||
configured_types = set(browser_params['save_content'].split(','))
|
||||
if not configured_types.issubset(ALL_RESOURCE_TYPES):
|
||||
diff = configured_types.difference(ALL_RESOURCE_TYPES)
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
Workarounds for Selenium headaches.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import errno
|
||||
import json
|
||||
|
@ -20,7 +19,6 @@ from selenium.webdriver.firefox.firefox_profile import AddonFormatError
|
|||
from selenium.webdriver.firefox.firefox_profile import \
|
||||
FirefoxProfile as BaseFirefoxProfile
|
||||
from selenium.webdriver.firefox.options import Options
|
||||
from six.moves import range
|
||||
|
||||
__all__ = ['FirefoxBinary', 'FirefoxProfile', 'FirefoxLogInterceptor',
|
||||
'Options']
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
registry=https://registry.npmjs.org/
|
|
@ -8,19 +8,19 @@
|
|||
},
|
||||
"devDependencies": {
|
||||
"eslint": "^5.16.0",
|
||||
"eslint-plugin-import": "^2.17.3",
|
||||
"eslint-plugin-import": "^2.19.1",
|
||||
"eslint-plugin-json": "^1.4.0",
|
||||
"eslint-plugin-mozilla": "^0.14.0",
|
||||
"eslint-plugin-no-unsanitized": "^3.0.2",
|
||||
"npm-run-all": "^4.1.1",
|
||||
"ts-loader": "^5.4.5",
|
||||
"tslint": "^5.17.0",
|
||||
"tslint": "^5.20.1",
|
||||
"tslint-eslint-rules": "^5.4.0",
|
||||
"typescript": "^3.5.1",
|
||||
"typescript": "^3.7.3",
|
||||
"typescript-eslint-parser": "^20.0.0",
|
||||
"web-ext": "^3.1.1",
|
||||
"webpack": "^4.33.0",
|
||||
"webpack-cli": "^3.3.4"
|
||||
"web-ext": "^3.2.1",
|
||||
"webpack": "^4.41.3",
|
||||
"webpack-cli": "^3.3.10"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8.11.1"
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
registry=https://registry.npmjs.org/
|
|
@ -62,22 +62,22 @@
|
|||
"devDependencies": {
|
||||
"@types/firefox-webext-browser": "^63.0.0",
|
||||
"ava": "1.0.0-beta.7",
|
||||
"codecov": "^3.1.0",
|
||||
"codecov": "^3.6.1",
|
||||
"commitizen": "^4.0.3",
|
||||
"cz-conventional-changelog": "^2.1.0",
|
||||
"gh-pages": "^2.0.0",
|
||||
"gh-pages": "^2.1.1",
|
||||
"npm-run-all": "^4.1.5",
|
||||
"nyc": "^14.1.1",
|
||||
"opn-cli": "^3.1.0",
|
||||
"prettier": "^1.14.3",
|
||||
"prettier": "^1.19.1",
|
||||
"publish-please": "^5.5.1",
|
||||
"standard-version": "github:conventional-changelog/standard-version#master",
|
||||
"trash-cli": "^1.4.0",
|
||||
"tslint": "^5.11.0",
|
||||
"tslint": "^5.20.1",
|
||||
"tslint-config-prettier": "^1.15.0",
|
||||
"tslint-immutable": "^4.7.0",
|
||||
"typedoc": "^0.15.0",
|
||||
"typescript": "^3.0.3"
|
||||
"typedoc": "^0.15.5",
|
||||
"typescript": "^3.7.3"
|
||||
},
|
||||
"ava": {
|
||||
"failFast": true,
|
||||
|
|
|
@ -589,9 +589,13 @@ export function jsInstruments(event_id, sendMessagesToLogger) {
|
|||
// Store original descriptor in closure
|
||||
const propDesc = Object.getPropertyDescriptor(object, propertyName);
|
||||
|
||||
// Property descriptor must exist unless we are instrumenting a
|
||||
// non-existing property
|
||||
if (
|
||||
!propDesc &&
|
||||
logSettings.nonExistingPropertiesToInstrument.indexOf(propertyName) == -1
|
||||
(!logSettings.nonExistingPropertiesToInstrument ||
|
||||
logSettings.nonExistingPropertiesToInstrument.indexOf(propertyName) ==
|
||||
-1)
|
||||
) {
|
||||
console.error(
|
||||
"Property descriptor not found for",
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
from __future__ import absolute_import, print_function
|
||||
|
||||
import copy
|
||||
import json
|
||||
|
@ -10,13 +9,12 @@ import struct
|
|||
import sys
|
||||
import threading
|
||||
import time
|
||||
from queue import Empty as EmptyQueue
|
||||
|
||||
import dill
|
||||
import sentry_sdk
|
||||
import six
|
||||
from multiprocess import JoinableQueue
|
||||
from sentry_sdk.integrations.logging import BreadcrumbHandler, EventHandler
|
||||
from six.moves.queue import Empty as EmptyQueue
|
||||
from tblib import pickling_support
|
||||
|
||||
from .Commands.utils.webdriver_utils import parse_neterror
|
||||
|
@ -111,6 +109,7 @@ class MPLogger(object):
|
|||
# Configure log handlers
|
||||
self._status_queue = JoinableQueue()
|
||||
self._log_file = os.path.expanduser(log_file)
|
||||
|
||||
self._initialize_loggers()
|
||||
|
||||
# Configure sentry (if available)
|
||||
|
@ -266,9 +265,9 @@ class MPLogger(object):
|
|||
and those sent to Sentry.
|
||||
"""
|
||||
if obj['exc_info']:
|
||||
obj['exc_info'] = dill.loads(six.ensure_str(obj['exc_info']))
|
||||
obj['exc_info'] = dill.loads(obj['exc_info'])
|
||||
if obj['args']:
|
||||
obj['args'] = dill.loads(six.ensure_str(obj['args']))
|
||||
obj['args'] = dill.loads(obj['args'])
|
||||
record = logging.makeLogRecord(obj)
|
||||
self._file_handler.emit(record)
|
||||
if self._sentry_dsn:
|
||||
|
|
|
@ -1,19 +1,11 @@
|
|||
from __future__ import absolute_import, print_function
|
||||
|
||||
import json
|
||||
import socket
|
||||
import struct
|
||||
import threading
|
||||
import traceback
|
||||
from queue import Queue
|
||||
|
||||
import dill
|
||||
import six
|
||||
from six.moves import input
|
||||
from six.moves.queue import Queue
|
||||
|
||||
if six.PY2:
|
||||
class ConnectionAbortedError(Exception):
|
||||
pass
|
||||
|
||||
# TODO - Implement a cleaner shutdown for server socket
|
||||
# see: https://stackoverflow.com/a/1148237
|
||||
|
@ -142,10 +134,9 @@ class clientsocket:
|
|||
using dill if not string, and prepends msg len (4-bytes) and
|
||||
serialization type (1-byte).
|
||||
"""
|
||||
import six
|
||||
if isinstance(msg, six.binary_type):
|
||||
if isinstance(msg, bytes):
|
||||
serialization = b'n'
|
||||
elif isinstance(msg, six.text_type):
|
||||
elif isinstance(msg, str):
|
||||
serialization = b'u'
|
||||
msg = msg.encode('utf-8')
|
||||
elif self.serialization == 'dill':
|
||||
|
|
|
@ -1,19 +1,16 @@
|
|||
from __future__ import absolute_import, division
|
||||
|
||||
import copy
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import pickle
|
||||
import threading
|
||||
import time
|
||||
import traceback
|
||||
from queue import Empty as EmptyQueue
|
||||
|
||||
import psutil
|
||||
import tblib
|
||||
from six import reraise
|
||||
from six.moves import cPickle as pickle
|
||||
from six.moves import range
|
||||
from six.moves.queue import Empty as EmptyQueue
|
||||
|
||||
from . import CommandSequence, MPLogger
|
||||
from .BrowserManager import Browser
|
||||
|
@ -301,7 +298,7 @@ class TaskManager:
|
|||
"failure limit.", self.failure_status['CommandSequence']
|
||||
)
|
||||
if self.failure_status['ErrorType'] == 'CriticalChildException':
|
||||
reraise(*pickle.loads(self.failure_status['Exception']))
|
||||
raise pickle.loads(self.failure_status['Exception'])
|
||||
|
||||
# CRAWLER COMMAND CODE
|
||||
|
||||
|
@ -401,7 +398,8 @@ class TaskManager:
|
|||
self.sock.send(("site_visits", {
|
||||
"visit_id": browser.curr_visit_id,
|
||||
"crawl_id": browser.crawl_id,
|
||||
"site_url": command_sequence.url
|
||||
"site_url": command_sequence.url,
|
||||
"site_rank": command_sequence.site_rank
|
||||
}))
|
||||
|
||||
# Start command execution thread
|
||||
|
@ -469,6 +467,7 @@ class TaskManager:
|
|||
if status == "OK":
|
||||
command_status = 'ok'
|
||||
elif status[0] == "CRITICAL":
|
||||
command_status = 'critical'
|
||||
self.logger.critical(
|
||||
"BROWSER %i: Received critical error from browser "
|
||||
"process while executing command %s. Setting failure "
|
||||
|
|
|
@ -210,15 +210,12 @@ Finis.
|
|||
|
||||
# Import our required modules
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
|
||||
import re
|
||||
#
|
||||
import string
|
||||
import warnings
|
||||
|
||||
from six.moves import map, range
|
||||
from six.moves.cPickle import dumps, loads
|
||||
from pickle import dumps, loads
|
||||
|
||||
__all__ = ["CookieError", "BaseCookie", "SimpleCookie", "SerialCookie",
|
||||
"SmartCookie", "Cookie"]
|
||||
|
|
|
@ -1,12 +1,11 @@
|
|||
from __future__ import absolute_import, print_function
|
||||
|
||||
import json
|
||||
import os
|
||||
import sqlite3
|
||||
import time
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from netlib.odict import ODictCaseless
|
||||
from six.moves.urllib.parse import urlparse
|
||||
|
||||
# This should be the modified Cookie.py included
|
||||
# the standard lib Cookie.py has many bugs
|
||||
|
@ -24,14 +23,13 @@ def encode_to_unicode(string):
|
|||
Encode from UTF-8/ISO-8859-1 to Unicode.
|
||||
Ignore errors if both of these don't work
|
||||
"""
|
||||
import six
|
||||
try:
|
||||
encoded = six.text_type(string, 'UTF-8')
|
||||
encoded = str(string, 'UTF-8')
|
||||
except UnicodeDecodeError:
|
||||
try:
|
||||
encoded = six.text_type(string, 'ISO-8859-1')
|
||||
encoded = str(string, 'ISO-8859-1')
|
||||
except UnicodeDecodeError:
|
||||
encoded = six.text_type(string, 'UTF-8', errors='ignore')
|
||||
encoded = str(string, 'UTF-8', errors='ignore')
|
||||
return encoded
|
||||
|
||||
|
||||
|
@ -122,9 +120,8 @@ def parse_cookies(cookie_string, verbose, url=None, response_cookie=False):
|
|||
"""
|
||||
queries = list()
|
||||
attrs = ()
|
||||
import six
|
||||
try:
|
||||
if type(cookie_string) == six.text_type:
|
||||
if type(cookie_string) == str:
|
||||
cookie_string = cookie_string.encode('utf-8')
|
||||
cookie = Cookie.BaseCookie(cookie_string)
|
||||
for key in cookie.keys():
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
import os
|
||||
import sqlite3
|
||||
|
|
|
@ -1,14 +1,12 @@
|
|||
from __future__ import absolute_import, print_function
|
||||
|
||||
import codecs
|
||||
import os
|
||||
import tempfile
|
||||
from functools import wraps
|
||||
from ipaddress import ip_address
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from publicsuffix import PublicSuffixList, fetch
|
||||
from six.moves import range
|
||||
from six.moves.urllib.parse import urlparse
|
||||
|
||||
# We cache the Public Suffix List in temp directory
|
||||
PSL_CACHE_LOC = os.path.join(tempfile.gettempdir(), 'public_suffix_list.dat')
|
||||
|
@ -45,9 +43,8 @@ def is_ip_address(hostname):
|
|||
"""
|
||||
Check if the given string is a valid IP address
|
||||
"""
|
||||
import six
|
||||
try:
|
||||
ip_address(six.text_type(hostname))
|
||||
ip_address(str(hostname))
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
from __future__ import absolute_import, print_function
|
||||
|
||||
import json
|
||||
import os
|
||||
|
@ -89,13 +88,11 @@ def get_version():
|
|||
openwpm = f.readline().strip()
|
||||
|
||||
firefox_binary_path = get_firefox_binary_path()
|
||||
import six
|
||||
try:
|
||||
firefox = subprocess.check_output([firefox_binary_path, "--version"])
|
||||
except subprocess.CalledProcessError as e:
|
||||
six.raise_from(
|
||||
RuntimeError("Firefox not found. Did you run `./install.sh`?"),
|
||||
e)
|
||||
raise RuntimeError("Firefox not found. "
|
||||
" Did you run `./install.sh`?") from e
|
||||
|
||||
ff = firefox.split()[-1]
|
||||
return openwpm, ff
|
||||
|
|
|
@ -1,11 +1,10 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
|
||||
import boto3
|
||||
import sentry_sdk
|
||||
from six.moves import range
|
||||
|
||||
from automation import CommandSequence, MPLogger, TaskManager
|
||||
from automation.utilities import rediswq
|
||||
|
@ -22,6 +21,7 @@ NAVIGATION_INSTRUMENT = os.getenv('NAVIGATION_INSTRUMENT', '1') == '1'
|
|||
JS_INSTRUMENT = os.getenv('JS_INSTRUMENT', '1') == '1'
|
||||
JS_INSTRUMENT_MODULES = os.getenv('JS_INSTRUMENT_MODULES', None)
|
||||
SAVE_CONTENT = os.getenv('SAVE_CONTENT', '')
|
||||
PREFS = os.getenv('PREFS', None)
|
||||
DWELL_TIME = int(os.getenv('DWELL_TIME', '10'))
|
||||
TIMEOUT = int(os.getenv('TIMEOUT', '60'))
|
||||
SENTRY_DSN = os.getenv('SENTRY_DSN', None)
|
||||
|
@ -49,6 +49,8 @@ for i in range(NUM_BROWSERS):
|
|||
browser_params[i]['save_content'] = False
|
||||
else:
|
||||
browser_params[i]['save_content'] = SAVE_CONTENT
|
||||
if PREFS:
|
||||
browser_params[i]['prefs'] = json.loads(PREFS)
|
||||
browser_params[i]['headless'] = True
|
||||
|
||||
# Manager configuration
|
||||
|
@ -89,6 +91,7 @@ if SENTRY_DSN:
|
|||
scope.set_tag('CRAWL_REFERENCE', '%s/%s' %
|
||||
(S3_BUCKET, CRAWL_DIRECTORY))
|
||||
# context adds addition information that may be of interest
|
||||
scope.set_context("PREFS", PREFS)
|
||||
scope.set_context("crawl_config", {
|
||||
'REDIS_QUEUE_NAME': REDIS_QUEUE_NAME,
|
||||
})
|
||||
|
|
3
demo.py
3
demo.py
|
@ -1,6 +1,3 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
from six.moves import range
|
||||
|
||||
from automation import CommandSequence, TaskManager
|
||||
|
||||
|
|
|
@ -2,4 +2,4 @@
|
|||
set -e
|
||||
|
||||
# Dependencies for OpenWPM development -- NOT needed to run the platform.
|
||||
pip install --user -U -r requirements-dev.txt
|
||||
pip3 install --user -U -r requirements-dev.txt
|
||||
|
|
|
@ -31,8 +31,8 @@ brew install node || true
|
|||
# Use the Unbranded build that corresponds to a specific Firefox version (source: https://wiki.mozilla.org/Add-ons/Extension_Signing#Unbranded_Builds)
|
||||
brew install wget || true
|
||||
|
||||
UNBRANDED_FF70_RELEASE_MAC_BUILD="https://queue.taskcluster.net/v1/task/Kclp8treRdORNcAmrNLgJg/runs/0/artifacts/public/build/target.dmg"
|
||||
wget "$UNBRANDED_FF70_RELEASE_MAC_BUILD"
|
||||
UNBRANDED_FF71_RELEASE_MAC_BUILD="https://firefox-ci-tc.services.mozilla.com/api/queue/v1/task/LQgnuH1-R8a31vCSFufr2g/runs/0/artifacts/public/build/target.dmg"
|
||||
wget "$UNBRANDED_FF71_RELEASE_MAC_BUILD"
|
||||
# Install Firefox Nightly
|
||||
rm -rf Nightly.app || true
|
||||
hdiutil attach -nobrowse -mountpoint /Volumes/firefox-tmp target.dmg
|
||||
|
@ -42,7 +42,7 @@ rm target.dmg
|
|||
|
||||
# Selenium 3.3+ requires a 'geckodriver' helper executable, which is not yet
|
||||
# packaged.
|
||||
GECKODRIVER_VERSION=0.24.0
|
||||
GECKODRIVER_VERSION=0.26.0
|
||||
GECKODRIVER_ARCH=macos
|
||||
|
||||
wget https://github.com/mozilla/geckodriver/releases/download/v${GECKODRIVER_VERSION}/geckodriver-v${GECKODRIVER_VERSION}-${GECKODRIVER_ARCH}.tar.gz
|
||||
|
|
|
@ -39,7 +39,7 @@ if [ "$flash" = true ]; then
|
|||
fi
|
||||
sudo apt-get update
|
||||
|
||||
sudo apt-get install -y firefox htop git python-dev libxml2-dev libxslt-dev libffi-dev libssl-dev build-essential libboost-python-dev libleveldb-dev libjpeg-dev curl wget git bash vim
|
||||
sudo apt-get install -y firefox htop git libxml2-dev libxslt-dev libffi-dev libssl-dev build-essential libboost-python-dev libleveldb-dev libjpeg-dev curl wget git bash vim
|
||||
|
||||
# For some versions of ubuntu, the package libleveldb1v5 isn't available. Use libleveldb1 instead.
|
||||
sudo apt-get install -y libleveldb1v5 || sudo apt-get install -y libleveldb1
|
||||
|
@ -49,8 +49,8 @@ if [ "$flash" = true ]; then
|
|||
fi
|
||||
|
||||
# Use the Unbranded build that corresponds to a specific Firefox version (source: https://wiki.mozilla.org/Add-ons/Extension_Signing#Unbranded_Builds)
|
||||
UNBRANDED_FF70_RELEASE_LINUX_BUILD="https://queue.taskcluster.net/v1/task/S9x3bLGUQTOAUfopXQGLzg/runs/0/artifacts/public/build/target.tar.bz2"
|
||||
wget "$UNBRANDED_FF70_RELEASE_LINUX_BUILD"
|
||||
UNBRANDED_FF71_RELEASE_LINUX_BUILD="https://firefox-ci-tc.services.mozilla.com/api/queue/v1/task/QKKKcc7VQhq8ngUotlj6hA/runs/0/artifacts/public/build/target.tar.bz2"
|
||||
wget "$UNBRANDED_FF71_RELEASE_LINUX_BUILD"
|
||||
tar jxf target.tar.bz2
|
||||
rm -rf firefox-bin
|
||||
mv firefox firefox-bin
|
||||
|
@ -58,7 +58,7 @@ rm target.tar.bz2
|
|||
|
||||
# Selenium 3.3+ requires a 'geckodriver' helper executable, which is not yet
|
||||
# packaged.
|
||||
GECKODRIVER_VERSION=0.24.0
|
||||
GECKODRIVER_VERSION=0.26.0
|
||||
case $(uname -m) in
|
||||
(x86_64)
|
||||
GECKODRIVER_ARCH=linux64
|
||||
|
|
|
@ -7,41 +7,41 @@
|
|||
airspeed==0.5.10 # via localstack
|
||||
amazon_kclpy-ext==1.5.1 # via localstack
|
||||
argparse==1.4.0 # via amazon-kclpy-ext
|
||||
asn1crypto==1.0.1 # via cryptography
|
||||
attrs==19.2.0 # via jsonschema
|
||||
attrs==19.3.0 # via jsonschema
|
||||
autopep8==1.4.4
|
||||
aws-sam-translator==1.15.0 # via cfn-lint
|
||||
aws-sam-translator==1.15.1 # via cfn-lint
|
||||
aws-xray-sdk==2.4.2 # via moto-ext
|
||||
awscli==1.16.254 # via localstack
|
||||
boto3==1.9.244 # via aws-sam-translator, localstack, localstack-client, moto-ext
|
||||
awscli==1.16.289 # via localstack
|
||||
boto3==1.10.25 # via aws-sam-translator, localstack, localstack-client, moto-ext
|
||||
boto==2.49.0 # via amazon-kclpy-ext, localstack, moto-ext
|
||||
botocore==1.12.244 # via aws-xray-sdk, awscli, boto3, localstack, moto-ext, s3transfer
|
||||
botocore==1.13.25 # via aws-xray-sdk, awscli, boto3, localstack, moto-ext
|
||||
cachetools==3.1.1 # via airspeed
|
||||
certifi==2019.9.11 # via requests
|
||||
cffi==1.12.3 # via cryptography
|
||||
cfn-lint==0.24.4 # via moto-ext
|
||||
cffi==1.13.2 # via cryptography
|
||||
cfn-lint==0.25.2 # via moto-ext
|
||||
chardet==3.0.4 # via requests
|
||||
click==7.0 # via flask
|
||||
colorama==0.4.1 # via awscli
|
||||
coverage==4.5.4 # via localstack, python-coveralls
|
||||
cryptography==2.7 # via moto-ext, pyopenssl, sshpubkeys
|
||||
decorator==4.4.0 # via jsonpath-rw
|
||||
cryptography==2.8 # via moto-ext, pyopenssl, sshpubkeys
|
||||
decorator==4.4.1 # via jsonpath-rw
|
||||
dnslib==0.9.10 # via localstack-ext
|
||||
dnspython==1.16.0 # via localstack, localstack-ext
|
||||
docker==4.1.0 # via moto-ext
|
||||
docopt==0.6.2 # via localstack
|
||||
docutils==0.15.2 # via awscli, botocore
|
||||
ecdsa==0.13.3 # via python-jose, sshpubkeys
|
||||
ecdsa==0.14.1 # via python-jose, sshpubkeys
|
||||
elasticsearch==6.4.0 # via localstack
|
||||
entrypoints==0.3 # via flake8
|
||||
flake8-quotes==2.1.0 # via localstack
|
||||
flake8==3.7.8 # via flake8-quotes, localstack
|
||||
flake8-quotes==2.1.1 # via localstack
|
||||
flake8==3.7.9 # via flake8-quotes, localstack
|
||||
flask-cors==3.0.3 # via localstack
|
||||
flask==1.0.2 # via flask-cors, flask-swagger, localstack
|
||||
flask_swagger==0.2.12 # via localstack
|
||||
forbiddenfruit==0.1.3 # via localstack
|
||||
future==0.17.1 # via aws-xray-sdk, python-jose
|
||||
future==0.18.2 # via aws-xray-sdk, python-jose
|
||||
idna==2.8 # via moto-ext, requests
|
||||
importlib-metadata==0.23 # via jsonschema
|
||||
itsdangerous==1.1.0 # via flask
|
||||
jinja2==2.10.3 # via flask, moto-ext
|
||||
jmespath==0.9.4 # via boto3, botocore
|
||||
|
@ -50,44 +50,44 @@ jsonpatch==1.24 # via cfn-lint
|
|||
jsonpath-rw==1.4.0 # via localstack
|
||||
jsonpickle==1.2 # via aws-xray-sdk
|
||||
jsonpointer==2.0 # via jsonpatch
|
||||
jsonschema==3.0.2 # via aws-sam-translator, cfn-lint
|
||||
localstack-client==0.14 # via localstack
|
||||
localstack-ext==0.10.41 # via localstack
|
||||
localstack[full]==0.10.4.2
|
||||
jsonschema==3.2.0 # via aws-sam-translator, cfn-lint
|
||||
localstack-client==0.15 # via localstack
|
||||
localstack-ext==0.10.66 # via localstack
|
||||
localstack[full]==0.10.5
|
||||
markupsafe==1.1.1 # via jinja2
|
||||
mccabe==0.6.1 # via flake8
|
||||
mock==3.0.5 # via amazon-kclpy-ext, moto-ext
|
||||
moto-ext==1.3.14.1 # via localstack
|
||||
moto-ext==1.3.14.2 # via localstack
|
||||
nose-timer==0.7.5 # via localstack
|
||||
nose==1.3.7 # via localstack, nose-timer
|
||||
ply==3.11 # via jsonpath-rw
|
||||
psutil==5.4.8 # via localstack
|
||||
pyaes==1.6.0 # via localstack-ext
|
||||
pyasn1==0.4.7 # via rsa
|
||||
pyasn1==0.4.8 # via rsa
|
||||
pycodestyle==2.5.0 # via autopep8, flake8
|
||||
pycparser==2.19 # via cffi
|
||||
pyflakes==2.1.1 # via flake8
|
||||
pympler==0.7 # via localstack
|
||||
pympler==0.8 # via localstack
|
||||
pyopenssl==17.5.0 # via localstack
|
||||
pyrsistent==0.15.4 # via jsonschema
|
||||
pyrsistent==0.15.5 # via jsonschema
|
||||
python-coveralls==2.9.3 # via localstack
|
||||
python-dateutil==2.8.0 # via botocore, moto-ext
|
||||
python-jose==3.0.1 # via moto-ext
|
||||
pytz==2019.3 # via moto-ext
|
||||
pyyaml==5.1 # via awscli, cfn-lint, flask-swagger, localstack, moto-ext, python-coveralls
|
||||
requests-aws4auth==0.9 # via localstack
|
||||
requests==2.22.0 # via docker, localstack, moto-ext, python-coveralls, requests-aws4auth, responses
|
||||
requests==2.22.0 # via docker, localstack, localstack-ext, moto-ext, python-coveralls, requests-aws4auth, responses
|
||||
responses==0.10.6 # via moto-ext
|
||||
rsa==3.4.2 # via awscli, python-jose
|
||||
s3transfer==0.2.1 # via awscli, boto3
|
||||
six==1.12.0 # via airspeed, aws-sam-translator, cfn-lint, cryptography, docker, flask-cors, jsonpath-rw, jsonschema, localstack, mock, moto-ext, pyopenssl, pyrsistent, python-coveralls, python-dateutil, python-jose, responses, websocket-client
|
||||
six==1.13.0 # via airspeed, aws-sam-translator, cfn-lint, cryptography, docker, ecdsa, flask-cors, jsonpath-rw, jsonschema, localstack, mock, moto-ext, pyopenssl, pyrsistent, python-coveralls, python-jose, responses, websocket-client
|
||||
sshpubkeys==3.1.0 # via moto-ext
|
||||
subprocess32==3.5.4 # via localstack
|
||||
urllib3==1.25.6 # via botocore, elasticsearch, requests
|
||||
urllib3==1.25.7 # via botocore, elasticsearch, requests
|
||||
websocket-client==0.56.0 # via docker
|
||||
werkzeug==0.16.0 # via flask, moto-ext
|
||||
wrapt==1.11.2 # via aws-xray-sdk
|
||||
xmltodict==0.12.0 # via localstack, moto-ext
|
||||
|
||||
# The following packages are considered to be unsafe in a requirements file:
|
||||
# setuptools==41.4.0 # via cfn-lint, jsonschema
|
||||
# setuptools==41.6.0 # via jsonschema
|
||||
|
|
|
@ -23,7 +23,6 @@ s3fs
|
|||
selenium
|
||||
sentry-sdk
|
||||
setuptools
|
||||
six
|
||||
tabulate
|
||||
tblib
|
||||
tld
|
||||
|
|
|
@ -4,71 +4,61 @@
|
|||
#
|
||||
# pip-compile requirements.in
|
||||
#
|
||||
atomicwrites==1.3.0 # via pytest
|
||||
attrs==19.2.0 # via pytest
|
||||
attrs==19.3.0 # via pytest
|
||||
backcall==0.1.0 # via ipython
|
||||
beautifulsoup4==4.8.1
|
||||
boto3==1.9.244
|
||||
botocore==1.12.244 # via boto3, s3fs, s3transfer
|
||||
boto3==1.10.25
|
||||
botocore==1.13.25 # via boto3, s3fs
|
||||
certifi==2019.9.11 # via sentry-sdk
|
||||
cython==0.29.13
|
||||
decorator==4.4.0 # via ipython, traitlets
|
||||
defusedxml==0.6.0 # via mini-amf
|
||||
cython==0.29.14
|
||||
decorator==4.4.1 # via ipython
|
||||
dill==0.3.1.1
|
||||
docutils==0.15.2 # via botocore
|
||||
entrypoints==0.3 # via flake8
|
||||
flake8-isort==2.7.0
|
||||
flake8==3.7.8
|
||||
fsspec==0.5.2 # via s3fs
|
||||
flake8==3.7.9
|
||||
fsspec==0.6.0 # via s3fs
|
||||
importlib-metadata==0.23 # via pluggy, pytest
|
||||
ipython-genutils==0.2.0 # via traitlets
|
||||
ipython==7.8.0
|
||||
isort==4.3.21 # via flake8-isort
|
||||
ipython==7.9.0
|
||||
jedi==0.15.1 # via ipython
|
||||
jmespath==0.9.4 # via boto3, botocore
|
||||
mccabe==0.6.1 # via flake8
|
||||
mini-amf==0.9.1
|
||||
mmh3==2.5.1
|
||||
more-itertools==7.2.0 # via pytest, zipp
|
||||
more-itertools==7.2.0 # via pytest
|
||||
multiprocess==0.70.9
|
||||
numpy==1.17.2
|
||||
numpy==1.17.4
|
||||
packaging==19.2 # via pytest
|
||||
pandas==0.25.1
|
||||
parso==0.5.1 # via jedi
|
||||
pandas==0.25.3
|
||||
pexpect==4.7.0 # via ipython
|
||||
pickleshare==0.7.5 # via ipython
|
||||
pillow==6.2.0
|
||||
pluggy==0.13.0 # via pytest
|
||||
pillow==6.2.1
|
||||
pluggy==0.13.1 # via pytest
|
||||
plyvel==1.1.0
|
||||
prompt-toolkit==2.0.10 # via ipython
|
||||
psutil==5.4.8
|
||||
ptyprocess==0.6.0 # via pexpect
|
||||
publicsuffix==1.1.0
|
||||
py==1.8.0 # via pytest
|
||||
pyarrow==0.15.0
|
||||
pyasn1==0.4.7
|
||||
pyarrow==0.15.1
|
||||
pyasn1==0.4.8
|
||||
pycodestyle==2.5.0 # via flake8
|
||||
pyflakes==2.1.1 # via flake8
|
||||
pygments==2.4.2 # via ipython
|
||||
pyparsing==2.4.2 # via packaging
|
||||
pytest==5.2.1
|
||||
pytest==5.3.0
|
||||
python-dateutil==2.8.0
|
||||
pytz==2019.3 # via pandas
|
||||
redis==3.3.8
|
||||
s3fs==0.3.5
|
||||
redis==3.3.11
|
||||
s3fs==0.4.0
|
||||
s3transfer==0.2.1 # via boto3
|
||||
selenium==3.141.0
|
||||
sentry-sdk==0.12.3
|
||||
six==1.12.0
|
||||
soupsieve==1.9.4 # via beautifulsoup4
|
||||
tabulate==0.8.5
|
||||
tblib==1.4.0
|
||||
testfixtures==6.10.0 # via flake8-isort
|
||||
tld==0.9.6
|
||||
sentry-sdk==0.13.2
|
||||
six==1.13.0 # via pyarrow, tld
|
||||
tabulate==0.8.6
|
||||
tblib==1.5.0
|
||||
tld==0.9.8
|
||||
traitlets==4.3.3 # via ipython
|
||||
urllib3==1.25.6 # via botocore, selenium, sentry-sdk
|
||||
wcwidth==0.1.7 # via prompt-toolkit, pytest
|
||||
zipp==0.6.0 # via importlib-metadata
|
||||
urllib3==1.25.7 # via botocore, sentry-sdk
|
||||
wcwidth==0.1.7 # via pytest
|
||||
|
||||
# The following packages are considered to be unsafe in a requirements file:
|
||||
# setuptools==41.4.0
|
||||
# setuptools==41.6.0
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
from __future__ import absolute_import, print_function
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
""" Contains lists of expected data and or rows for tests """
|
||||
from __future__ import absolute_import
|
||||
|
||||
from .utilities import BASE_TEST_URL, BASE_TEST_URL_DOMAIN
|
||||
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
from __future__ import absolute_import, print_function
|
||||
|
||||
import atexit
|
||||
import subprocess
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
import os
|
||||
from os.path import isfile, join
|
||||
|
|
|
@ -1,10 +1,9 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
import os
|
||||
import tarfile
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import pytest
|
||||
from six.moves.urllib.parse import urlparse
|
||||
|
||||
from ..automation import TaskManager
|
||||
from ..automation.utilities import db_utils, domain_utils
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
from ..automation import CommandSequence, TaskManager
|
||||
from ..automation.utilities import db_utils
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
from __future__ import absolute_import, print_function
|
||||
|
||||
from os.path import dirname, isfile, realpath
|
||||
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
|
|
@ -1,17 +1,14 @@
|
|||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
from hashlib import sha256
|
||||
from time import sleep
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import pytest
|
||||
import six
|
||||
from six.moves import range
|
||||
from six.moves.urllib.parse import urlparse
|
||||
|
||||
from ..automation import CommandSequence, TaskManager
|
||||
from ..automation.utilities import db_utils
|
||||
|
@ -830,7 +827,7 @@ class TestPOSTInstrument(OpenWPMTest):
|
|||
encoding_type = "text/plain"
|
||||
db = self.visit('/post_request.html?encoding_type=' + encoding_type)
|
||||
post_body = self.get_post_request_body_from_db(db, True)
|
||||
if not isinstance(self.post_data_multiline_raw, six.text_type):
|
||||
if not isinstance(self.post_data_multiline_raw, str):
|
||||
expected = self.post_data_multiline_raw.decode('utf-8')
|
||||
else:
|
||||
expected = self.post_data_multiline_raw
|
||||
|
@ -880,7 +877,7 @@ class TestPOSTInstrument(OpenWPMTest):
|
|||
db = self.visit("/post_request_ajax.html?format=" + post_format)
|
||||
post_body = self.get_post_request_body_from_db(db, True)
|
||||
# Binary strings get put into the database as-if they were latin-1.
|
||||
assert six.binary_type(bytearray(range(100))) == post_body
|
||||
assert bytes(bytearray(range(100))) == post_body
|
||||
|
||||
@pytest.mark.skip(reason="Firefox is currently not able to return the "
|
||||
"file content for an upload, only the filename")
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
import re
|
||||
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
import re
|
||||
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
import re
|
||||
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
import re
|
||||
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
@ -67,6 +66,16 @@ def child_proc_with_exception(index):
|
|||
)
|
||||
|
||||
|
||||
def child_proc_logging_exception():
|
||||
logger = logging.getLogger('openwpm')
|
||||
try:
|
||||
raise Exception("This is my generic Test Exception")
|
||||
except Exception:
|
||||
logger.error(
|
||||
"I'm logging an exception", exc_info=True,
|
||||
)
|
||||
|
||||
|
||||
class TestMPLogger(OpenWPMTest):
|
||||
|
||||
def get_logfile_path(self, directory):
|
||||
|
@ -163,3 +172,14 @@ class TestMPLogger(OpenWPMTest):
|
|||
assert(log_content.count(CHILD_INFO_STR_1 % child) == 1)
|
||||
assert(log_content.count(CHILD_INFO_STR_2 % child) == 1)
|
||||
assert(log_content.count(CHILD_EXCEPTION_STR % child) == 1)
|
||||
|
||||
def test_child_process_logging(self, tmpdir):
|
||||
log_file = self.get_logfile_path(str(tmpdir))
|
||||
openwpm_logger = MPLogger.MPLogger(log_file)
|
||||
child_process = Process(target=child_proc_logging_exception())
|
||||
child_process.daemon = True
|
||||
child_process.start()
|
||||
openwpm_logger.close()
|
||||
child_process.join()
|
||||
log_content = self.get_logfile_contents(log_file)
|
||||
assert ("I'm logging an exception" in log_content)
|
||||
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -0,0 +1,42 @@
|
|||
<!-- sample code derived from https://www.browserleaks.com/canvas#how-does-it-work -->
|
||||
<html>
|
||||
<title>Large Payloads Test Page</title>
|
||||
<h1>Large Payloads Test Page</h1>
|
||||
<h3>Yields large payloads within the JS instrumentation</h3>
|
||||
|
||||
<canvas id="myCanvas"></canvas>
|
||||
<pre id="canvasDataURL"></pre>
|
||||
<img id="canvasDataURLImage" src="about:blank"/>
|
||||
|
||||
<script>
|
||||
console.log("Before", new Date().toISOString());
|
||||
|
||||
const str200kb = "01234567890".repeat(100 * 200);
|
||||
|
||||
// Text with lowercase/uppercase/punctuation symbols and a large string
|
||||
var txt = "BrowserLeaks,com <canvas> 1.0" + str200kb;
|
||||
|
||||
var canvas = document.getElementById('myCanvas');
|
||||
var ctx = canvas.getContext("2d");
|
||||
ctx.textBaseline = "top";
|
||||
// The most common type
|
||||
ctx.font = "14px 'Arial'";
|
||||
ctx.textBaseline = "alphabetic";
|
||||
ctx.fillStyle = "#f60";
|
||||
ctx.fillRect(125,1,62,20);
|
||||
// Some tricks for color mixing to increase the difference in rendering
|
||||
ctx.fillStyle = "#069";
|
||||
ctx.fillText(txt, 2, 15);
|
||||
ctx.fillStyle = "rgba(102, 204, 0, 0.7)";
|
||||
ctx.fillText(txt, 4, 17);
|
||||
ctx.fillStyle = str200kb;
|
||||
ctx.fillText(txt, 6, 18);
|
||||
var pre = document.getElementById("canvasDataURL");
|
||||
pre.innerHTML = canvas.toDataURL();
|
||||
var img = document.getElementById("canvasDataURLImage");
|
||||
img.src = canvas.toDataURL();
|
||||
|
||||
console.log("After", new Date().toISOString());
|
||||
</script>
|
||||
|
||||
</html>
|
|
@ -1,4 +1,3 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
from os.path import isfile, join
|
||||
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
import json
|
||||
import time
|
||||
|
@ -6,7 +5,6 @@ from collections import defaultdict
|
|||
|
||||
import boto3
|
||||
import pytest
|
||||
import six
|
||||
from localstack.services import infra
|
||||
|
||||
from ..automation import TaskManager
|
||||
|
@ -72,6 +70,8 @@ class TestS3Aggregator(OpenWPMTest):
|
|||
table = dataset.load_table(table_name)
|
||||
visit_ids[table_name] = table.visit_id.unique()
|
||||
assert len(visit_ids[table_name]) == NUM_VISITS * NUM_BROWSERS
|
||||
for vid in visit_ids[table_name]:
|
||||
assert(vid >= 0) and (vid < (1 << 53))
|
||||
for table_name, ids in visit_ids.items():
|
||||
assert set(ids) == set(visit_ids['site_visits'])
|
||||
|
||||
|
@ -83,7 +83,7 @@ class TestS3Aggregator(OpenWPMTest):
|
|||
# of configuration files
|
||||
config_file = dataset.list_files('config', prepend_root=True)
|
||||
assert len(config_file) == 1 # only one instance started in test
|
||||
config = json.loads(six.text_type(
|
||||
config = json.loads(str(
|
||||
dataset.get_file(config_file[0]), 'utf-8'))
|
||||
assert len(config['browser_params']) == NUM_BROWSERS
|
||||
|
||||
|
|
|
@ -1,13 +1,12 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
import glob
|
||||
import gzip
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from PIL import Image
|
||||
from six.moves.urllib.parse import urlparse
|
||||
|
||||
from ..automation import CommandSequence, TaskManager
|
||||
from ..automation.utilities import db_utils
|
||||
|
@ -152,9 +151,9 @@ class TestSimpleCommands(OpenWPMTest):
|
|||
manager = TaskManager.TaskManager(manager_params, browser_params)
|
||||
|
||||
# Set up two sequential browse commands to two URLS
|
||||
cs_a = CommandSequence.CommandSequence(url_a)
|
||||
cs_a = CommandSequence.CommandSequence(url_a, site_rank=0)
|
||||
cs_a.browse(num_links=1, sleep=1)
|
||||
cs_b = CommandSequence.CommandSequence(url_b)
|
||||
cs_b = CommandSequence.CommandSequence(url_b, site_rank=1)
|
||||
cs_b.browse(num_links=1, sleep=1)
|
||||
|
||||
manager.execute_command_sequence(cs_a)
|
||||
|
@ -162,13 +161,16 @@ class TestSimpleCommands(OpenWPMTest):
|
|||
manager.close()
|
||||
|
||||
qry_res = db_utils.query_db(manager_params['db'],
|
||||
"SELECT site_url FROM site_visits")
|
||||
"SELECT site_url, site_rank"
|
||||
" FROM site_visits")
|
||||
|
||||
# We had two separate page visits
|
||||
assert len(qry_res) == 2
|
||||
|
||||
assert qry_res[0][0] == url_a
|
||||
assert qry_res[0][1] == 0
|
||||
assert qry_res[1][0] == url_b
|
||||
assert qry_res[1][1] == 1
|
||||
|
||||
def test_browse_http_table_valid(self):
|
||||
"""Check CommandSequence.browse() works and populates http tables correctly.
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
import pytest
|
||||
|
||||
|
|
|
@ -1,18 +1,17 @@
|
|||
from __future__ import absolute_import, print_function
|
||||
|
||||
import os
|
||||
import socketserver
|
||||
import threading
|
||||
from http.server import SimpleHTTPRequestHandler
|
||||
from os.path import dirname, realpath
|
||||
from random import choice
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
import boto3
|
||||
import pyarrow.parquet as pq
|
||||
import s3fs
|
||||
from botocore.credentials import Credentials
|
||||
from pyarrow.filesystem import S3FSWrapper # noqa
|
||||
from six.moves import range, socketserver
|
||||
from six.moves.SimpleHTTPServer import SimpleHTTPRequestHandler
|
||||
from six.moves.urllib.parse import parse_qs, urlparse
|
||||
|
||||
LOCAL_WEBSERVER_PORT = 8000
|
||||
BASE_TEST_URL_DOMAIN = "localtest.me"
|
||||
|
|
Загрузка…
Ссылка в новой задаче