Merge branch 'master' of github.com:citp/OpenWPM

This commit is contained in:
englehardt 2016-10-19 16:05:55 -04:00
Родитель b48056b763 002a9b9d71
Коммит 3a14416c57
10 изменённых файлов: 137 добавлений и 24 удалений

Просмотреть файл

@ -1,4 +1,4 @@
OpenWPM [![Build Status](https://travis-ci.org/citp/OpenWPM.svg)](https://travis-ci.org/citp/OpenWPM)
OpenWPM [![Build Status](https://travis-ci.org/citp/OpenWPM.svg?branch=master)](https://travis-ci.org/citp/OpenWPM)
=======
OpenWPM is a web privacy measurement framework which makes it easy to collect
@ -24,7 +24,7 @@ us know!
Quick Start
-----------
Once installed, it's very easy to run a quick test of OpenWPM. Check out
Once installed, it is very easy to run a quick test of OpenWPM. Check out
`demo.py` for an example. This will the default setting specified in
`automation/default_manager_params.json` and
`automation/default_browser_params.json`, with the exception of the changes

Просмотреть файл

@ -38,7 +38,8 @@ class CommandSequence:
self.contains_get_or_browse = True
def dump_flash_cookies(self, timeout=60):
""" dumps the local storage vectors (flash, localStorage, cookies) to db """
""" dumps the local storage vectors (flash, localStorage, cookies) to db
Side effect: closes the current tab."""
self.total_timeout += timeout
if not self.contains_get_or_browse:
raise CommandExecutionError("No get or browse request preceding "
@ -47,7 +48,8 @@ class CommandSequence:
self.commands_with_timeout.append((command, timeout))
def dump_profile_cookies(self, timeout=60):
""" dumps from the profile path to a given file (absolute path) """
""" dumps from the profile path to a given file (absolute path)
Side effect: closes the current tab."""
self.total_timeout += timeout
if not self.contains_get_or_browse:
raise CommandExecutionError("No get or browse request preceding "
@ -69,3 +71,23 @@ class CommandSequence:
"the dump storage vectors command", self)
command = ('EXTRACT_LINKS',)
self.commands_with_timeout.append((command, timeout))
def save_screenshot(self, screenshot_name, timeout=30):
"""Saves screenshot of page to 'screenshots' directory in data directory."""
self.total_timeout += timeout
if not self.contains_get_or_browse:
raise CommandExecutionError("No get or browse request preceding "
"the save screenshot command", self)
command = ('SAVE_SCREENSHOT', screenshot_name,)
self.commands_with_timeout.append((command, timeout))
def dump_page_source(self, dump_name, timeout=30):
"""Dumps rendered source of current page visit to 'sources' directory."""
self.total_timeout += timeout
if not self.contains_get_or_browse:
raise CommandExecutionError("No get or browse request preceding "
"the dump page source command", self)
command = ('DUMP_PAGE_SOURCE', dump_name,)
self.commands_with_timeout.append((command, timeout))

Просмотреть файл

@ -4,6 +4,7 @@ from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import MoveTargetOutOfBoundsException
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.action_chains import ActionChains
import os
import random
import time
@ -225,3 +226,10 @@ def dump_profile_cookies(start_time, visit_id, webdriver, browser_params, manage
# Close connection to db
sock.close()
def save_screenshot(screenshot_name, webdriver, browser_params, manager_params):
webdriver.save_screenshot(os.path.join(manager_params['screenshot_path'], screenshot_name + '.png'))
def dump_page_source(dump_name, webdriver, browser_params, manager_params):
with open(os.path.join(manager_params['source_dump_path'], dump_name + '.html'), 'wb') as f:
f.write(webdriver.page_source.encode('utf8') + '\n')

Просмотреть файл

@ -40,3 +40,11 @@ def execute_command(command, webdriver, proxy_queue, browser_settings, browser_p
if command[0] == 'EXTRACT_LINKS':
browser_commands.extract_links(webdriver, browser_params, manager_params)
if command[0] == 'SAVE_SCREENSHOT':
browser_commands.save_screenshot(screenshot_name=command[1], webdriver=webdriver,
browser_params=browser_params, manager_params=manager_params)
if command[0] == 'DUMP_PAGE_SOURCE':
browser_commands.dump_page_source(dump_name=command[1], webdriver=webdriver,
browser_params=browser_params, manager_params=manager_params)

Просмотреть файл

@ -60,8 +60,16 @@ class TaskManager:
manager_params[path] = os.path.expanduser(manager_params[path])
manager_params['database_name'] = os.path.join(manager_params['data_directory'],manager_params['database_name'])
manager_params['log_file'] = os.path.join(manager_params['log_directory'],manager_params['log_file'])
manager_params['screenshot_path'] = os.path.join(manager_params['data_directory'], 'screenshots')
manager_params['source_dump_path'] = os.path.join(manager_params['data_directory'], 'sources')
self.manager_params = manager_params
# Create data directories if they do not exist
if not os.path.exists(manager_params['screenshot_path']):
os.makedirs(manager_params['screenshot_path'])
if not os.path.exists(manager_params['source_dump_path']):
os.makedirs(manager_params['source_dump_path'])
# check size of parameter dictionary
self.num_browsers = manager_params['num_browsers']
if len(browser_params) != self.num_browsers:

Просмотреть файл

@ -17,7 +17,11 @@ fi
sudo apt-get update
sudo apt-get install -y firefox htop git python-dev libxml2-dev libxslt-dev libffi-dev libssl-dev build-essential xvfb libboost-python-dev libleveldb1 libleveldb-dev libjpeg-dev
sudo apt-get install -y firefox htop git python-dev libxml2-dev libxslt-dev libffi-dev libssl-dev build-essential xvfb libboost-python-dev libleveldb-dev libjpeg-dev
# For some versions of ubuntu, the package libleveldb1v5 isn't available. Use libleveldb1 instead.
sudo apt-get install -y libleveldb1v5 || sudo apt-get install -y libleveldb1
if [ "$flash" = true ]; then
sudo apt-get install -y adobe-flashplugin
fi
@ -34,5 +38,6 @@ fi
# Install specific version of Firefox known to work well with the selenium version above
wget https://ftp.mozilla.org/pub/firefox/releases/45.0.1/linux-x86_64/en-US/firefox-45.0.1.tar.bz2
tar jxf firefox*.tar.bz2
rm -rf firefox-bin
mv firefox firefox-bin
rm firefox*.tar.bz2

Просмотреть файл

@ -125,22 +125,22 @@ set_property = [(SET_PROP_TEST_PAGE,
# AudioContext and AudioNode symbols we expect from our test script
audio = {
"AudioContext.createOscillator",
"AudioContext.createAnalyser",
"AudioContext.createGain",
"AudioContext.createScriptProcessor",
"GainNode.gain",
"OscillatorNode.type",
"OscillatorNode.connect",
"AnalyserNode.connect",
"ScriptProcessorNode.connect",
"AudioContext.destination",
"GainNode.connect",
"ScriptProcessorNode.onaudioprocess",
"OscillatorNode.start",
"AnalyserNode.frequencyBinCount",
"AnalyserNode.getFloatFrequencyData",
"AnalyserNode.disconnect",
"ScriptProcessorNode.disconnect",
"GainNode.disconnect",
"OscillatorNode.stop"}
u"AudioContext.createOscillator",
u"AudioContext.createAnalyser",
u"AudioContext.createGain",
u"AudioContext.createScriptProcessor",
u"GainNode.gain",
u"OscillatorNode.type",
u"OscillatorNode.connect",
u"AnalyserNode.connect",
u"ScriptProcessorNode.connect",
u"AudioContext.destination",
u"GainNode.connect",
u"ScriptProcessorNode.onaudioprocess",
u"OscillatorNode.start",
u"AnalyserNode.frequencyBinCount",
u"AnalyserNode.getFloatFrequencyData",
u"AnalyserNode.disconnect",
u"ScriptProcessorNode.disconnect",
u"GainNode.disconnect",
u"OscillatorNode.stop"}

Просмотреть файл

@ -96,6 +96,7 @@ class TestExtension(OpenWPMTest):
observed_rows.add(item)
assert set(expected.webrtc_calls) == observed_rows
@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", reason='Flaky on Travis CI')
def test_audio_fingerprinting(self, tmpdir):
db = self.visit('/audio_fingerprinting.html', str(tmpdir))
# Check that all calls and methods are recorded

Просмотреть файл

@ -0,0 +1,18 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml"><head>
<title>Simple Page A</title>
<script type="application/javascript">
function set_cookie() {
document.cookie = 'test_cookie=Test-Page-A; expires=Tue, 31 Dec 2030 00:00:00 UTC; path=/';
console.log(window.navigator.userAgent);
}
</script>
</head>
<body onload="set_cookie()">
<a href="http://localtest.me:8000/test_pages/simple_c.html">Click me!</a>
<a href="simple_d.html">Click me also!</a>
<a href="javascript:alert(1)">Click me for a JS alert!</a>
<a href="https://www.google.com">Go to google.com</a>
<a href="http://example.com/test.html?localtest.me">Go to example.com</a>
</body></html>

Просмотреть файл

@ -1,4 +1,7 @@
import pytest # noqa
from PIL import Image
import filecmp
import os
import utilities
from ..automation import CommandSequence
@ -9,6 +12,8 @@ url_b = utilities.BASE_TEST_URL + '/simple_b.html'
url_c = utilities.BASE_TEST_URL + '/simple_c.html'
url_d = utilities.BASE_TEST_URL + '/simple_d.html'
rendered_js_url = utilities.BASE_TEST_URL + '/property_enumeration.html'
class TestSimpleCommands():
"""Test correctness of simple commands and check
that resulting data is properly keyed.
@ -191,3 +196,41 @@ class TestSimpleCommands():
assert qry_res[0][0] == 4
def test_save_screenshot_valid(self, tmpdir):
"""Check that 'save_screenshot' works and screenshot is created properly."""
# Run the test crawl
manager_params, browser_params = self.get_config(str(tmpdir))
manager = TaskManager.TaskManager(manager_params, browser_params)
cs = CommandSequence.CommandSequence(url_a)
cs.get(sleep=1)
cs.save_screenshot('test_screenshot')
manager.execute_command_sequence(cs)
manager.close(post_process=False)
# Check that image is not blank
im = Image.open(os.path.join(str(tmpdir), 'screenshots', 'test_screenshot.png'))
bands = im.split()
isBlank = all(band.getextrema() == (255, 255) for band in bands)
assert not isBlank
def test_dump_page_source_valid(self, tmpdir):
"""Check that 'dump_page_source' works and source is saved properly."""
# Run the test crawl
manager_params, browser_params = self.get_config(str(tmpdir))
manager = TaskManager.TaskManager(manager_params, browser_params)
cs = CommandSequence.CommandSequence(url_a)
cs.get(sleep=1)
cs.dump_page_source('test_source')
manager.execute_command_sequence(cs)
manager.close(post_process=False)
with open(os.path.join(str(tmpdir), 'sources', 'test_source.html'), 'rb') as f:
actual_source = f.read()
with open('./test_pages/expected_source.html', 'rb') as f:
expected_source = f.read()
assert actual_source == expected_source