From 776dd154c4f6de619866f3ebe9c0b05075896bfc Mon Sep 17 00:00:00 2001 From: Nick Hurley Date: Thu, 14 Mar 2013 15:33:17 -0700 Subject: [PATCH] Update wpr installation This is being updated because the old one doesn't work on a modern mac (which is where I do the page recording for page load tests), so we need to have a more modern one. This required a bit of change to srnamed, but actually made the code wildly simpler (woohoo!) --- srnamed.py | 45 +- wpr/PKG-INFO | 10 + wpr/cachemissarchive.py | 260 +++++++ wpr/cachemissarchive_test.py | 123 ++++ wpr/customhandlers.py | 157 +++-- wpr/daemonserver.py | 0 wpr/deterministic.js | 42 ++ wpr/dnsproxy.py | 141 ++-- wpr/httparchive.py | 649 +++++++++++++----- wpr/httparchive_test.py | 347 ++++++++++ wpr/httpclient.py | 325 +++++++-- wpr/httpproxy.py | 115 ++-- wpr/httpzlib.py | 0 wpr/mock-archive.txt | 10 + wpr/mockhttprequest.py | 59 ++ wpr/perftracker/__init__.py | 0 wpr/perftracker/app/appengine_config.py | 14 + wpr/perftracker/extension/background.html | 2 + wpr/perftracker/runner.py | 115 +++- wpr/persistentmixin.py | 0 wpr/platformsettings.py | 373 +++++++--- wpr/platformsettings_test.py | 245 +++++++ wpr/replay.py | 418 +++++++---- wpr/replayspdyserver.py | 63 +- wpr/servermanager.py | 131 ++++ wpr/setup.cfg | 5 + wpr/setup.py | 57 ++ wpr/third_party/dns/README.web-page-replay | 12 + wpr/third_party/ipaddr/README.web-page-replay | 12 + wpr/third_party/ipaddr/ipaddr.py | 446 ++++++------ wpr/third_party/ipaddr/ipaddr_test.py | 164 +++-- wpr/third_party/ipaddr/test-2to3.sh | 16 +- wpr/third_party/ipfw_win32/LICENSE | 25 + .../ipfw_win32/README.web-page-replay | 12 + wpr/third_party/nbhttp/README.web-page-replay | 21 +- wpr/third_party/nbhttp/__init__.py | 22 + wpr/third_party/nbhttp/client.py | 43 +- wpr/third_party/nbhttp/push_tcp.py | 43 +- wpr/third_party/nbhttp/server.py | 43 +- wpr/third_party/nbhttp/spdy_client.py | 43 +- wpr/third_party/nbhttp/spdy_common.py | 12 +- wpr/third_party/nbhttp/spdy_server.py | 43 +- wpr/trafficshaper.py | 82 ++- wpr/trafficshaper_test.py | 22 +- wpr/util.py | 47 ++ wpr/webpagereplay.egg-info/PKG-INFO | 10 + wpr/webpagereplay.egg-info/SOURCES.txt | 209 ++++++ .../dependency_links.txt | 1 + wpr/webpagereplay.egg-info/entry_points.txt | 4 + wpr/webpagereplay.egg-info/requires.txt | 1 + wpr/webpagereplay.egg-info/top_level.txt | 3 + 51 files changed, 3868 insertions(+), 1174 deletions(-) create mode 100644 wpr/PKG-INFO create mode 100755 wpr/cachemissarchive.py create mode 100755 wpr/cachemissarchive_test.py mode change 100755 => 100644 wpr/daemonserver.py create mode 100644 wpr/deterministic.js mode change 100755 => 100644 wpr/dnsproxy.py create mode 100755 wpr/httparchive_test.py mode change 100755 => 100644 wpr/httpclient.py mode change 100755 => 100644 wpr/httpproxy.py mode change 100755 => 100644 wpr/httpzlib.py create mode 100644 wpr/mock-archive.txt create mode 100644 wpr/mockhttprequest.py create mode 100644 wpr/perftracker/__init__.py mode change 100755 => 100644 wpr/persistentmixin.py mode change 100755 => 100644 wpr/platformsettings.py create mode 100755 wpr/platformsettings_test.py create mode 100644 wpr/servermanager.py create mode 100644 wpr/setup.cfg create mode 100644 wpr/setup.py create mode 100644 wpr/third_party/dns/README.web-page-replay create mode 100644 wpr/third_party/ipaddr/README.web-page-replay create mode 100644 wpr/third_party/ipfw_win32/LICENSE create mode 100644 wpr/third_party/ipfw_win32/README.web-page-replay mode change 100755 => 100644 wpr/trafficshaper.py create mode 100644 wpr/util.py create mode 100644 wpr/webpagereplay.egg-info/PKG-INFO create mode 100644 wpr/webpagereplay.egg-info/SOURCES.txt create mode 100644 wpr/webpagereplay.egg-info/dependency_links.txt create mode 100644 wpr/webpagereplay.egg-info/entry_points.txt create mode 100644 wpr/webpagereplay.egg-info/requires.txt create mode 100644 wpr/webpagereplay.egg-info/top_level.txt diff --git a/srnamed.py b/srnamed.py index 370182f..60dcc09 100644 --- a/srnamed.py +++ b/srnamed.py @@ -3,13 +3,13 @@ import socket import sys import time -from dnsproxy import DnsProxyServer, UdpDnsHandler, DnsProxyException +from dnsproxy import DnsProxyServer, DnsProxyException import stoneridge listen_ip = None - +dnssrv = None IGNORE_HOSTS = ( 'puppet1.private.scl3.mozilla.com.', @@ -25,54 +25,31 @@ SR_HOSTS = { } -class NeckoDnsHandler(UdpDnsHandler): - def handle(self): - self.data = self.rfile.read() - self.transaction_id = self.data[0] - self.flags = self.data[1] - self.qa_counts = self.data[4:6] - self.domain = '' - operation_code = (ord(self.data[2]) >> 3) & 15 - if operation_code == self.STANDARD_QUERY_OPERATION_CODE: - self.wire_domain = self.data[12:] - self.domain = self._domain(self.wire_domain) - else: - logging.debug("DNS request with non-zero operation code: %s", - operation_code) - real_ip = self.server.passthrough_filter(self.domain) - if real_ip: - message = 'passthrough' - ip = real_ip - else: - message = 'handle' - ip = listen_ip - logging.debug('dnsproxy: %s(%s) -> %s', message, self.domain, ip) - self.reply(self.get_dns_reply(ip)) - - -def necko_passthrough(host): - logging.debug('passthrough: checking %s' % (host,)) +def srlookup(host): + logging.debug('srlookup: checking %s' % (host,)) if host in IGNORE_HOSTS: logging.debug('attempting to ignore %s' % (host,)) try: return socket.gethostbyname(host) except: - logging.error('Could not get actual IP for %s, faking it!' % - (host,)) + logging.error('Could not get actual IP for %s' % (host,)) + # This should result in NXDOMAIN + return None if host in SR_HOSTS: logging.debug('stone ridge host detected: %s' % (host,)) return SR_HOSTS[host] logging.debug('host not found in our exception lists') - return None + + return dnssrv.server_address[0] def daemon(): + global dnssrv logging.debug('about to start proxy server') try: - with(DnsProxyServer(False, handler=NeckoDnsHandler, - passthrough_filter=necko_passthrough)): + with DnsProxyServer(srlookup, listen_ip) as dnssrv: logging.debug('proxy server started') while True: time.sleep(1) diff --git a/wpr/PKG-INFO b/wpr/PKG-INFO new file mode 100644 index 0000000..9863d0b --- /dev/null +++ b/wpr/PKG-INFO @@ -0,0 +1,10 @@ +Metadata-Version: 1.0 +Name: webpagereplay +Version: 1.1.2 +Summary: Record and replay web content +Home-page: http://code.google.com/p/web-page-replay/ +Author: Web Page Replay Project Authors +Author-email: web-page-replay-dev@googlegroups.com +License: Apache License 2.0 +Description: UNKNOWN +Platform: UNKNOWN diff --git a/wpr/cachemissarchive.py b/wpr/cachemissarchive.py new file mode 100755 index 0000000..e54880f --- /dev/null +++ b/wpr/cachemissarchive.py @@ -0,0 +1,260 @@ +#!/usr/bin/env python +# Copyright 2011 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Create and view cache miss archives. + +Usage: +./cachemissarchive.py + +This will print out some statistics of the cache archive. +""" + +import logging +import os +import sys +from perftracker import runner_cfg +import persistentmixin + + +def format_request(request, join_val=' ', use_path=True, + use_request_body=False, headers=False): + if use_path: + request_parts = [request.command, request.host + request.path] + else: + request_parts = [request.command, request.host] + if use_request_body: + request_parts.append(request.request_body) + if headers: + request_parts.append(request.headers) + return join_val.join([str(x) for x in request_parts]) + + +class CacheMissArchive(persistentmixin.PersistentMixin): + """Archives cache misses from playback mode. + + Uses runner_cfg.urls for tracking the current page url. + + Attributes: + archive_file: output file to store cache miss data + current_page_url: any cache misses will be marked as caused by this URL + page_urls: the list of urls to record and keep track of + archive: dict of cache misses, where the key is a page URL and + the value is a list of ArchivedHttpRequest objects + request_counts: dict that records the number of times a request is issued in + both record and replay mode + """ + + def __init__(self, archive_file): + """Initialize CacheMissArchive. + + Args: + archive_file: output file to store data + """ + self.archive_file = archive_file + self.current_page_url = None + + # TODO: Pass in urls to CacheMissArchive without runner_cfg dependency + if runner_cfg.urls: + self.page_urls = runner_cfg.urls + + # { URL: [archived_http_request, ...], ... } + self.archive = {} + + # { archived_http_request: (num_record_requests, num_replay_requests), ... } + self.request_counts = {} + + def record_cache_miss(self, request, page_url=None): + """Records a cache miss for given request. + + Args: + request: instance of ArchivedHttpRequest that causes a cache miss + page_url: specify the referer URL that caused this cache miss + """ + if not page_url: + page_url = self.current_page_url + logging.debug('Cache miss on %s', request) + self._append_archive(page_url, request) + + def set_urls_list(self, urls): + self.page_urls = urls + + def record_request(self, request, is_record_mode, is_cache_miss=False): + """Records the request into the cache archive. + + Should be updated on every HTTP request. + + Also updates the current page_url contained in runner_cfg.urls. + + Args: + request: instance of ArchivedHttpRequest + is_record_mode: indicates whether WPR is on record mode + is_cache_miss: if True, records the request as a cache miss + """ + self._record_request(request, is_record_mode) + + page_url = request.host + request.path + + for url in self.page_urls: + if self._match_urls(page_url, url): + self.current_page_url = url + logging.debug('Updated current url to %s', self.current_page_url) + break + + if is_cache_miss: + self.record_cache_miss(request) + + def _record_request(self, request, is_record_mode): + """Adds 1 to the appropriate request count. + + Args: + request: instance of ArchivedHttpRequest + is_record_mode: indicates whether WPR is on record mode + """ + num_record, num_replay = self.request_counts.get(request, (0, 0)) + if is_record_mode: + num_record += 1 + else: + num_replay += 1 + self.request_counts[request] = (num_record, num_replay) + + def request_diff(self, is_show_all=False): + """Calculates if there are requests sent in record mode that are + not sent in replay mode and vice versa. + + Args: + is_show_all: If True, only includes instance where the number of requests + issued in record/replay mode differs. If False, includes all instances. + Returns: + A string displaying difference in requests between record and replay modes + """ + str_list = ['Diff of requests sent in record mode versus replay mode\n'] + less = [] + equal = [] + more = [] + + for request, (num_record, num_replay) in self.request_counts.items(): + format_req = format_request(request, join_val=' ', + use_path=True, use_request_body=False) + request_line = '%s record: %d, replay: %d' % ( + format_req, num_record, num_replay) + if num_record < num_replay: + less.append(request_line) + elif num_record == num_replay: + equal.append(request_line) + else: + more.append(request_line) + + if is_show_all: + str_list.extend(sorted(equal)) + + str_list.append('') + str_list.extend(sorted(less)) + str_list.append('') + str_list.extend(sorted(more)) + + return '\n'.join(str_list) + + def _match_urls(self, url_1, url_2): + """Returns true if urls match. + + Args: + url_1: url string (e.g. 'http://www.cnn.com') + url_2: same as url_1 + Returns: + True if the two urls match, false otherwise + """ + scheme = 'http://' + if url_1.startswith(scheme): + url_1 = url_1[len(scheme):] + if url_2.startswith(scheme): + url_2 = url_2[len(scheme):] + return url_1 == url_2 + + def _append_archive(self, page_url, request): + """Appends the corresponding (page_url,request) pair to archived dictionary. + + Args: + page_url: page_url string (e.g. 'http://www.cnn.com') + request: instance of ArchivedHttpRequest + """ + self.archive.setdefault(page_url, []) + self.archive[page_url].append(request) + + def __repr__(self): + return repr((self.archive_file, self.archive)) + + def Persist(self): + self.current_page_url = None + persistentmixin.PersistentMixin.Persist(self, self.archive_file) + + def get_total_referers(self): + return len(self.archive) + + def get_total_cache_misses(self): + count = 0 + for k in self.archive: + count += len(self.archive[k]) + return count + + def get_total_referer_cache_misses(self): + count = 0 + if self.page_urls: + count = sum(len(v) for k, v in self.archive.items() + if k in self.page_urls) + return count + + def get_cache_misses(self, page_url, join_val=' ', + use_path=False, use_request_body=False): + """Returns a list of cache miss requests from the page_url. + + Args: + page_url: url of the request (e.g. http://www.zappos.com/) + join_val: value to join output string with + use_path: true if path is to be included in output display + use_request_body: true if request_body is to be included in output display + Returns: + A list of cache miss requests (in textual representation) from page_url + """ + misses = [] + if page_url in self.archive: + cache_misses = self.archive[page_url] + for k in cache_misses: + misses.append(format_request(k, join_val, use_path, use_request_body)) + return misses + + def get_all_cache_misses(self, use_path=False): + """Format cache misses into concise visualization.""" + all_cache_misses = '' + for page_url in self.archive: + misses = self.get_cache_misses(page_url, use_path=use_path) + all_cache_misses = '%s%s --->\n %s\n\n' % ( + all_cache_misses, page_url, '\n '.join(misses)) + return all_cache_misses + + +if __name__ == '__main__': + archive_file = sys.argv[1] + cache_archive = CacheMissArchive.Load(archive_file) + + print 'Total cache misses: %d' % cache_archive.get_total_cache_misses() + print 'Total page_urls cache misses: %d' % ( + cache_archive.get_total_referer_cache_misses()) + print 'Total referers: %d\n' % cache_archive.get_total_referers() + print 'Referers are:' + for ref in cache_archive.archive: + print '%s with %d cache misses' % (ref, len(cache_archive.archive[ref])) + print + print cache_archive.get_all_cache_misses(use_path=True) + print diff --git a/wpr/cachemissarchive_test.py b/wpr/cachemissarchive_test.py new file mode 100755 index 0000000..c624e9d --- /dev/null +++ b/wpr/cachemissarchive_test.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python +# Copyright 2011 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ast +import cachemissarchive +from mockhttprequest import ArchivedHttpRequest +import os +import unittest +import util + + +def get_mock_requests(): + keepends = True + return util.resource_string('mock-archive.txt').splitlines(keepends) + + +class CacheMissArchiveTest(unittest.TestCase): + + HEADERS = [('accept-encoding', 'gzip,deflate')] + REQUEST = ArchivedHttpRequest( + 'GET', 'www.test.com', '/', None, HEADERS) + + def setUp(self): + self.load_mock_archive() + + def load_mock_archive(self): + self.cache_archive = cachemissarchive.CacheMissArchive('mock-archive') + self.num_requests = 0 + urls_list = [ + 'http://www.zappos.com/', + 'http://www.msn.com/', + 'http://www.amazon.com/', + 'http://www.google.com/', + ] + self.cache_archive.set_urls_list(urls_list) + for line in get_mock_requests(): + # Each line contains: (command, host, path, request_body, headers) + # Delimited by '%' + args = line.split('%') + headers = ast.literal_eval(args[4].strip('\n ')) + request = ArchivedHttpRequest( + args[0], args[1], args[2], args[3], headers) + self.cache_archive.record_request(request, is_record_mode=False, + is_cache_miss=True) + self.num_requests += 1 + + def test_init(self): + empty_archive = cachemissarchive.CacheMissArchive('empty-archive') + self.assert_(not empty_archive.archive) + + def test_record_cache_miss(self): + cache_archive = cachemissarchive.CacheMissArchive('empty-archive') + referer = 'mock_referer' + cache_archive.record_cache_miss(self.REQUEST, page_url=referer) + self.assert_(cache_archive.archive[referer]) + + def test__match_urls(self): + self.assert_(self.cache_archive._match_urls( + 'http://www.cnn.com', 'http://www.cnn.com')) + self.assert_(self.cache_archive._match_urls( + 'http://www.cnn.com', 'www.cnn.com')) + self.assert_(not self.cache_archive._match_urls( + 'http://www.zappos.com', 'http://www.cnn.com')) + self.assert_(not self.cache_archive._match_urls( + 'www.zappos.com', 'www.amazon.com')) + + def test_get_total_referers_small(self): + cache_archive = cachemissarchive.CacheMissArchive('empty-archive') + self.assertEqual(cache_archive.get_total_referers(), 0) + referer = 'mock_referer' + cache_archive.record_cache_miss(self.REQUEST, page_url=referer) + self.assertEqual(cache_archive.get_total_referers(), 1) + + def test_get_total_referers_large(self): + self.assertEqual(self.cache_archive.get_total_referers(), 4) + + def test_get_total_cache_misses(self): + self.assertEqual(self.cache_archive.get_total_cache_misses(), + self.num_requests) + + def test_get_total_referer_cache_misses(self): + self.assertEqual(self.cache_archive.get_total_referer_cache_misses(), + self.num_requests) + + def test_record_request(self): + request = self.REQUEST + cache_archive = cachemissarchive.CacheMissArchive('empty-archive') + self.assertEqual(len(cache_archive.request_counts), 0) + + cache_archive.record_request(request, is_record_mode=True, + is_cache_miss=False) + self.assertEqual(len(cache_archive.request_counts), 1) + self.assertEqual(cache_archive.request_counts[request], (1, 0)) + + cache_archive.record_request(request, is_record_mode=False, + is_cache_miss=False) + self.assertEqual(len(cache_archive.request_counts), 1) + self.assertEqual(cache_archive.request_counts[request], (1, 1)) + + def test_get_cache_misses(self): + self.assertEqual( + len(self.cache_archive.get_cache_misses('http://www.zappos.com/')), 5) + self.assertEqual( + len(self.cache_archive.get_cache_misses('http://www.msn.com/')), 3) + self.assertEqual( + len(self.cache_archive.get_cache_misses('http://www.google.com/')), 1) + self.assertEqual( + len(self.cache_archive.get_cache_misses('http://www.amazon.com/')), 1) + +if __name__ == '__main__': + unittest.main() diff --git a/wpr/customhandlers.py b/wpr/customhandlers.py index dad6324..0c4a358 100644 --- a/wpr/customhandlers.py +++ b/wpr/customhandlers.py @@ -13,64 +13,102 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Handle special HTTP requests. + +/web-page-replay-generate-[RESPONSE_CODE] + - Return the given RESPONSE_CODE. +/web-page-replay-post-image-[FILENAME] + - Save the posted image to local disk. +/web-page-replay-command-[record|replay|status] + - Optional. Enable by calling custom_handlers.add_server_manager_handler(...). + - Change the server mode to either record or replay. + + When switching to record, the http_archive is cleared. + + When switching to replay, the http_archive is maintained. +""" + import base64 +import httparchive +import httplib +import json import logging import os -GENERATOR_URL_PREFIX = '/web-page-replay-generate-' -POST_IMAGE_URL_PREFIX = '/web-page-replay-post-image-' +COMMON_URL_PREFIX = '/web-page-replay-' +COMMAND_URL_PREFIX = COMMON_URL_PREFIX + 'command-' +GENERATOR_URL_PREFIX = COMMON_URL_PREFIX + 'generate-' +POST_IMAGE_URL_PREFIX = COMMON_URL_PREFIX + 'post-image-' IMAGE_DATA_PREFIX = 'data:image/png;base64,' +def SimpleResponse(status): + """Return a ArchivedHttpResponse with |status| code and a simple text body.""" + return httparchive.create_response(status) + + +def JsonResponse(data): + """Return a ArchivedHttpResponse with |data| encoded as json in the body.""" + status = 200 + reason = 'OK' + headers = [('content-type', 'application/json')] + body = json.dumps(data) + return httparchive.create_response(status, reason, headers, body) + + class CustomHandlers(object): def __init__(self, screenshot_dir=None): - if screenshot_dir and not os.path.exists(screenshot_dir): - try: - os.makedirs(screenshot_dir) - except: - logging.error('%s does not exist and could not be created.', - screenshot_dir) - screenshot_dir = None - self.screenshot_dir = screenshot_dir + """Initialize CustomHandlers. + + Args: + screenshot_dir: a path to which screenshots are saved. + """ + self.handlers = [ + (GENERATOR_URL_PREFIX, self.get_generator_url_response_code)] + if screenshot_dir: + if not os.path.exists(screenshot_dir): + try: + os.makedirs(screenshot_dir) + except IOError: + logging.error('Unable to create screenshot dir: %s', screenshot_dir) + screenshot_dir = None + if screenshot_dir: + self.screenshot_dir = screenshot_dir + self.handlers.append( + (POST_IMAGE_URL_PREFIX, self.handle_possible_post_image)) def handle(self, request): - """Handles special URLs needed for the benchmark. + """Dispatches requests to matching handlers. Args: request: an http request Returns: - If request is for a special URL, a 3-digit integer like 404. - Otherwise, None. + ArchivedHttpResponse or None. """ - response_code = self.get_generator_url_response_code(request.path) - if response_code: - return response_code - - response_code = self.handle_possible_post_image(request) - if response_code: - return response_code - + for prefix, handler in self.handlers: + if request.path.startswith(prefix): + return handler(request, request.path[len(prefix):]) return None - def get_generator_url_response_code(self, request_path): + def get_generator_url_response_code(self, request, url_suffix): """Parse special generator URLs for the embedded response code. Clients like perftracker can use URLs of this form to request a response with a particular response code. Args: - request_path: a string like "/foo", or "/web-page-replay-generator-404" + request: an ArchivedHttpRequest instance + url_suffix: string that is after the handler prefix (e.g. 304) Returns: - On a match, a 3-digit integer like 404. + On a match, an ArchivedHttpResponse. Otherwise, None. """ - prefix, response_code = request_path[:-3], request_path[-3:] - if prefix == GENERATOR_URL_PREFIX and response_code.isdigit(): - return int(response_code) - return None + try: + response_code = int(url_suffix) + return SimpleResponse(response_code) + except ValueError: + return None - def handle_possible_post_image(self, request): + def handle_possible_post_image(self, request, url_suffix): """If sent, saves embedded image to local directory. Expects a special url containing the filename. If sent, saves the base64 @@ -78,24 +116,20 @@ class CustomHandlers(object): passing in screenshot_dir to the initializer for this class. Args: - request: an http request - + request: an ArchivedHttpRequest instance + url_suffix: string that is after the handler prefix (e.g. 'foo.png') Returns: - On a match, a 3-digit integer response code. - False otherwise. + On a match, an ArchivedHttpResponse. + Otherwise, None. """ - if not self.screenshot_dir: - return None - - prefix = request.path[:len(POST_IMAGE_URL_PREFIX)] - basename = request.path[len(POST_IMAGE_URL_PREFIX):] - if prefix != POST_IMAGE_URL_PREFIX or not basename: + basename = url_suffix + if not basename: return None data = request.request_body if not data.startswith(IMAGE_DATA_PREFIX): logging.error('Unexpected image format for: %s', basename) - return 400 + return SimpleResponse(400) data = data[len(IMAGE_DATA_PREFIX):] png = base64.b64decode(data) @@ -103,8 +137,47 @@ class CustomHandlers(object): '%s-%s.png' % (request.host, basename)) if not os.access(self.screenshot_dir, os.W_OK): logging.error('Unable to write to: %s', filename) - return 400 + return SimpleResponse(400) with file(filename, 'w') as f: f.write(png) - return 200 + return SimpleResponse(200) + + def add_server_manager_handler(self, server_manager): + """Add the ability to change the server mode (e.g. to record mode). + Args: + server_manager: a servermanager.ServerManager instance. + """ + self.server_manager = server_manager + self.handlers.append( + (COMMAND_URL_PREFIX, self.handle_server_manager_command)) + + def handle_server_manager_command(self, request, url_suffix): + """Parse special URLs for the embedded server manager command. + + Clients like webpagetest.org can use URLs of this form to change + the replay server from record mode to replay mode. + + This handler is not in the default list of handlers. Call + add_server_manager_handler to add it. + + In the future, this could be expanded to save or serve archive files. + + Args: + request: an ArchivedHttpRequest instance + url_suffix: string that is after the handler prefix (e.g. 'record') + Returns: + On a match, an ArchivedHttpResponse. + Otherwise, None. + """ + command = url_suffix + if command == 'record': + self.server_manager.SetRecordMode() + return SimpleResponse(200) + elif command == 'replay': + self.server_manager.SetReplayMode() + return SimpleResponse(200) + elif command == 'status': + is_record_mode = self.server_manager.IsRecordMode() + return JsonResponse({'is_record_mode': is_record_mode}) + return None diff --git a/wpr/daemonserver.py b/wpr/daemonserver.py old mode 100755 new mode 100644 diff --git a/wpr/deterministic.js b/wpr/deterministic.js new file mode 100644 index 0000000..291c0c5 --- /dev/null +++ b/wpr/deterministic.js @@ -0,0 +1,42 @@ +(function () { + var orig_date = Date; + var random_count = 0; + var date_count = 0; + var random_seed = 0.462; + var time_seed = 1204251968254; + var random_count_threshold = 25; + var date_count_threshold = 25; + Math.random = function() { + random_count++; + if (random_count > random_count_threshold){ + random_seed += 0.1; + random_count = 1; + } + return (random_seed % 1); + }; + Date = function() { + if (this instanceof Date) { + date_count++; + if (date_count > date_count_threshold){ + time_seed += 50; + date_count = 1; + } + switch (arguments.length) { + case 0: return new orig_date(time_seed); + case 1: return new orig_date(arguments[0]); + default: return new orig_date(arguments[0], arguments[1], + arguments.length >= 3 ? arguments[2] : 1, + arguments.length >= 4 ? arguments[3] : 0, + arguments.length >= 5 ? arguments[4] : 0, + arguments.length >= 6 ? arguments[5] : 0, + arguments.length >= 7 ? arguments[6] : 0); + } + } + return new Date().toString(); + }; + Date.__proto__ = orig_date; + Date.prototype.constructor = Date; + orig_date.now = function() { + return new Date().getTime(); + }; +})(); diff --git a/wpr/dnsproxy.py b/wpr/dnsproxy.py old mode 100755 new mode 100644 index 6f940aa..b8fe951 --- a/wpr/dnsproxy.py +++ b/wpr/dnsproxy.py @@ -16,13 +16,16 @@ import daemonserver import errno import logging -import platformsettings import socket import SocketServer import threading import third_party +import dns.flags +import dns.message +import dns.rcode import dns.resolver +import dns.rdatatype import ipaddr @@ -31,18 +34,21 @@ class DnsProxyException(Exception): class RealDnsLookup(object): - def __init__(self, name_servers=None): + def __init__(self, name_servers): + if '127.0.0.1' in name_servers: + raise DnsProxyException( + 'Invalid nameserver: 127.0.0.1 (causes an infinte loop)') self.resolver = dns.resolver.get_default_resolver() - self.resolver.nameservers = [ - platformsettings.get_platform_settings().get_original_primary_dns()] + self.resolver.nameservers = name_servers self.dns_cache_lock = threading.Lock() self.dns_cache = {} - def __call__(self, hostname): + def __call__(self, hostname, rdtype=dns.rdatatype.A): """Return real IP for a host. Args: host: a hostname ending with a period (e.g. "www.google.com.") + rdtype: the query type (1 for 'A', 28 for 'AAAA') Returns: the IP address as a string (e.g. "192.168.25.2") """ @@ -50,54 +56,72 @@ class RealDnsLookup(object): ip = self.dns_cache.get(hostname) self.dns_cache_lock.release() if ip: - logging.debug('_real_dns_lookup(%s) cache hit! -> %s', hostname, ip) return ip try: - answers = self.resolver.query(hostname, 'A') - except (dns.resolver.NoAnswer, - dns.resolver.NXDOMAIN, - dns.resolver.Timeout) as ex: + answers = self.resolver.query(hostname, rdtype) + except dns.resolver.NXDOMAIN: + return None + except (dns.resolver.NoAnswer, dns.resolver.Timeout) as ex: logging.debug('_real_dns_lookup(%s) -> None (%s)', hostname, ex.__class__.__name__) return None if answers: ip = str(answers[0]) - logging.debug('_real_dns_lookup(%s) -> %s', hostname, ip) self.dns_cache_lock.acquire() self.dns_cache[hostname] = ip self.dns_cache_lock.release() return ip + def ClearCache(self): + """Clearn the dns cache.""" + self.dns_cache_lock.acquire() + self.dns_cache.clear() + self.dns_cache_lock.release() -class DnsPrivatePassthroughFilter: - """Allow private hosts to resolve to their real IPs.""" - def __init__(self, real_dns_lookup, skip_passthrough_hosts=()): - """Initialize DnsPrivatePassthroughFilter. + +class PrivateIpDnsLookup(object): + """Resolve private hosts to their real IPs and others to the Web proxy IP. + + Hosts in the given http_archive will resolve to the Web proxy IP without + checking the real IP. + + This only supports IPv4 lookups. + """ + def __init__(self, web_proxy_ip, real_dns_lookup, http_archive): + """Initialize PrivateIpDnsLookup. Args: + web_proxy_ip: the IP address returned by __call__ for non-private hosts. real_dns_lookup: a function that resolves a host to an IP. - skip_passthrough_hosts: an iterable of hosts that skip - the private determination (i.e. avoids a real dns lookup - for them). + http_archive: an instance of a HttpArchive + Hosts is in the archive will always resolve to the web_proxy_ip """ + self.web_proxy_ip = web_proxy_ip self.real_dns_lookup = real_dns_lookup - self.skip_passthrough_hosts = set( - host + '.' for host in skip_passthrough_hosts) + self.http_archive = http_archive + self.InitializeArchiveHosts() def __call__(self, host): - """Return real IP for host if private. + """Return real IPv4 for private hosts and Web proxy IP otherwise. Args: host: a hostname ending with a period (e.g. "www.google.com.") Returns: - If private, the real IP address as a string (e.g. 192.168.25.2) - Otherwise, None. + IP address as a string or None (if lookup fails) """ - if host not in self.skip_passthrough_hosts: + ip = self.web_proxy_ip + if host not in self.archive_hosts: real_ip = self.real_dns_lookup(host) - if real_ip and ipaddr.IPv4Address(real_ip).is_private: - return real_ip - return None + if real_ip: + if ipaddr.IPAddress(real_ip).is_private: + ip = real_ip + else: + ip = None + return ip + + def InitializeArchiveHosts(self): + """Recompute the archive_hosts from the http_archive.""" + self.archive_hosts = set('%s.' % req.host for req in self.http_archive) class UdpDnsHandler(SocketServer.DatagramRequestHandler): @@ -110,6 +134,13 @@ class UdpDnsHandler(SocketServer.DatagramRequestHandler): STANDARD_QUERY_OPERATION_CODE = 0 def handle(self): + """Handle a DNS query. + + IPv6 requests (with rdtype AAAA) receive mismatched IPv4 responses + (with rdtype A). To properly support IPv6, the http proxy would + need both types of addresses. By default, Windows XP does not + support IPv6. + """ self.data = self.rfile.read() self.transaction_id = self.data[0] self.flags = self.data[1] @@ -122,15 +153,17 @@ class UdpDnsHandler(SocketServer.DatagramRequestHandler): else: logging.debug("DNS request with non-zero operation code: %s", operation_code) - real_ip = self.server.passthrough_filter(self.domain) - if real_ip: - message = 'passthrough' - ip = real_ip + ip = self.server.dns_lookup(self.domain) + if ip is None: + logging.debug('dnsproxy: %s -> NXDOMAIN', self.domain) + response = self.get_dns_no_such_name_response() else: - message = 'handle' - ip = self.server.server_address[0] - logging.debug('dnsproxy: %s(%s) -> %s', message, self.domain, ip) - self.reply(self.get_dns_reply(ip)) + if ip == self.server.server_address[0]: + logging.debug('dnsproxy: %s -> %s (replay web proxy)', self.domain, ip) + else: + logging.debug('dnsproxy: %s -> %s', self.domain, ip) + response = self.get_dns_response(ip) + self.wfile.write(response) @classmethod def _domain(cls, wire_domain): @@ -143,10 +176,7 @@ class UdpDnsHandler(SocketServer.DatagramRequestHandler): length = ord(wire_domain[index]) return domain - def reply(self, buf): - self.wfile.write(buf) - - def get_dns_reply(self, ip): + def get_dns_response(self, ip): packet = '' if self.domain: packet = ( @@ -164,48 +194,35 @@ class UdpDnsHandler(SocketServer.DatagramRequestHandler): ) return packet + def get_dns_no_such_name_response(self): + query_message = dns.message.from_wire(self.data) + response_message = dns.message.make_response(query_message) + response_message.flags |= dns.flags.AA | dns.flags.RA + response_message.set_rcode(dns.rcode.NXDOMAIN) + return response_message.to_wire() class DnsProxyServer(SocketServer.ThreadingUDPServer, daemonserver.DaemonServer): - def __init__(self, use_forwarding, passthrough_filter=None, host='', port=53, handler=UdpDnsHandler): + def __init__(self, dns_lookup=None, host='', port=53): """Initialize DnsProxyServer. Args: - use_forwarding: a boolean that if true, changes primary DNS to host. - passthrough_filter: a function that resolves a host to its real IP, - or None, if it should resolve to the dnsproxy's address. + dns_lookup: a function that resolves a host to an IP address. host: a host string (name or IP) to bind the dns proxy and to which DNS requests will be resolved. port: an integer port on which to bind the proxy. """ - self.use_forwarding = use_forwarding - self.passthrough_filter = passthrough_filter or (lambda host: None) - self.platform_settings = platformsettings.get_platform_settings() try: SocketServer.ThreadingUDPServer.__init__( - self, (host, port), handler) + self, (host, port), UdpDnsHandler) except socket.error, (error_number, msg): if error_number == errno.EACCES: raise DnsProxyException( 'Unable to bind DNS server on (%s:%s)' % (host, port)) raise + self.dns_lookup = dns_lookup or (lambda host: self.server_address[0]) logging.info('Started DNS server on %s...', self.server_address) - if self.use_forwarding: - self.platform_settings.set_primary_dns(host) def cleanup(self): - if self.use_forwarding: - self.platform_settings.restore_primary_dns() self.shutdown() logging.info('Shutdown DNS server') - - -class DummyDnsServer(): - def __init__(self, use_forwarding, passthrough_filter=None, host='', port=53): - pass - - def __enter__(self): - pass - - def __exit__(self, unused_exc_type, unused_exc_val, unused_exc_tb): - pass diff --git a/wpr/httparchive.py b/wpr/httparchive.py index 96e25ea..efd693c 100755 --- a/wpr/httparchive.py +++ b/wpr/httparchive.py @@ -32,125 +32,185 @@ To edit a particular URL: """ import difflib +import email.utils +import httplib import httpzlib +import json import logging import optparse import os import persistentmixin -import re import StringIO import subprocess +import sys import tempfile +import urlparse - -HTML_RE = re.compile(r']*>', re.IGNORECASE) -HEAD_RE = re.compile(r']*>', re.IGNORECASE) -DETERMINISTIC_SCRIPT = """ - -""" +import platformsettings class HttpArchiveException(Exception): + """Base class for all exceptions in httparchive.""" pass -class InjectionFailedException(HttpArchiveException): - def __init__(self, text): - self.text = text - - def __str__(self): - return repr(text) - -def _InsertScriptAfter(matchobj): - return matchobj.group(0) + DETERMINISTIC_SCRIPT - class HttpArchive(dict, persistentmixin.PersistentMixin): """Dict with ArchivedHttpRequest keys and ArchivedHttpResponse values. PersistentMixin adds CreateNew(filename), Load(filename), and Persist(). + + Attributes: + server_rtt: dict of {hostname, server rtt in milliseconds} """ - def get_requests(self, command=None, host=None, path=None): - """Retruns a list of all requests matching giving params.""" - return [r for r in self if r.matches(command, host, path)] + def __init__(self): + self.server_rtt = {} + + def get_server_rtt(self, server): + """Retrieves the round trip time (rtt) to the server + + Args: + server: the hostname of the server + + Returns: + round trip time to the server in seconds, or 0 if unavailable + """ + if server not in self.server_rtt: + platform_settings = platformsettings.get_platform_settings() + self.server_rtt[server] = platform_settings.ping(server) + return self.server_rtt[server] + + def get(self, request, default=None): + """Return the archived response for a given request. + + Does extra checking for handling some HTTP request headers. + + Args: + request: instance of ArchivedHttpRequest + default: default value to return if request is not found + + Returns: + Instance of ArchivedHttpResponse or default if no matching + response is found + """ + if request in self: + return self[request] + return self.get_conditional_response(request, default) + + def get_conditional_response(self, request, default): + """Get the response based on the conditional HTTP request headers. + + Args: + request: an ArchivedHttpRequest representing the original request. + default: default ArchivedHttpResponse + original request with matched headers removed. + + Returns: + an ArchivedHttpResponse with a status of 200, 302 (not modified), or + 412 (precondition failed) + """ + response = default + if request.is_conditional(): + stripped_request = request.create_request_without_conditions() + if stripped_request in self: + response = self[stripped_request] + if response.status == 200: + status = self.get_conditional_status(request, response) + if status != 200: + response = create_response(status) + return response + + def get_conditional_status(self, request, response): + status = 200 + last_modified = email.utils.parsedate( + response.get_header_case_insensitive('last-modified')) + response_etag = response.get_header_case_insensitive('etag') + is_get_or_head = request.command.upper() in ('GET', 'HEAD') + + match_value = request.headers.get('if-match', None) + if match_value: + if self.is_etag_match(match_value, response_etag): + status = 200 + else: + status = 412 # precondition failed + none_match_value = request.headers.get('if-none-match', None) + if none_match_value: + if self.is_etag_match(none_match_value, response_etag): + status = 304 + elif is_get_or_head: + status = 200 + else: + status = 412 + if is_get_or_head and last_modified: + for header in ('if-modified-since', 'if-unmodified-since'): + date = email.utils.parsedate(request.headers.get(header, None)) + if date: + if ((header == 'if-modified-since' and last_modified > date) or + (header == 'if-unmodified-since' and last_modified < date)): + if status != 412: + status = 200 + else: + status = 304 # not modified + return status + + def is_etag_match(self, request_etag, response_etag): + """Determines whether the entity tags of the request/response matches. + + Args: + request_etag: the value string of the "if-(none)-match:" + portion of the request header + response_etag: the etag value of the response + + Returns: + True on match, False otherwise + """ + response_etag = response_etag.strip('" ') + for etag in request_etag.split(','): + etag = etag.strip('" ') + if etag in ('*', response_etag): + return True + return False + + def get_requests(self, command=None, host=None, path=None, use_query=True): + """Return a list of requests that match the given args.""" + return [r for r in self if r.matches(command, host, path, + use_query=use_query)] def ls(self, command=None, host=None, path=None): """List all URLs that match given params.""" - out = StringIO.StringIO() - for request in self.get_requests(command, host, path): - print >>out, '%s %s%s %s' % (request.command, request.host, request.path, - request.headers) - return out.getvalue() + return ''.join(sorted( + '%s\n' % r for r in self.get_requests(command, host, path))) def cat(self, command=None, host=None, path=None): """Print the contents of all URLs that match given params.""" out = StringIO.StringIO() for request in self.get_requests(command, host, path): - print >>out, '%s %s %s\nrequest headers:\n' % ( - request.command, request.host, request.path) - for k, v in sorted(request.headers): - print >>out, " %s: %s" % (k, v) + print >>out, str(request) + print >>out, 'Untrimmed request headers:' + for k in request.headers: + print >>out, ' %s: %s' % (k, request.headers[k]) if request.request_body: print >>out, request.request_body - print >>out, '-' * 70 + print >>out, '---- Response Info', '-' * 51 response = self[request] - print >>out, 'Status: %s\nReason: %s\nheaders:\n' % ( - response.status, response.reason) - for k, v in sorted(response.headers): - print >>out, " %s: %s" % (k, v) - headers = dict(response.headers) + chunk_lengths = [len(x) for x in response.response_data] + print >>out, ('Status: %s\n' + 'Reason: %s\n' + 'Headers delay: %s\n' + 'Response headers:') % ( + response.status, response.reason, response.delays['headers']) + for k, v in response.headers: + print >>out, ' %s: %s' % (k, v) + print >>out, ('Chunk count: %s\n' + 'Chunk lengths: %s\n' + 'Chunk delays: %s') % ( + len(chunk_lengths), chunk_lengths, response.delays['data']) body = response.get_data_as_text() + print >>out, '---- Response Data', '-' * 51 if body: - print >>out, '-' * 70 print >>out, body + else: + print >>out, '[binary data]' print >>out, '=' * 70 return out.getvalue() @@ -172,76 +232,209 @@ class HttpArchive(dict, persistentmixin.PersistentMixin): response = self[matching_requests[0]] tmp_file = tempfile.NamedTemporaryFile(delete=False) - tmp_file.write(response.get_data_as_text()) + tmp_file.write(response.get_response_as_text()) tmp_file.close() subprocess.check_call([editor, tmp_file.name]) - response.set_data(''.join(open(tmp_file.name).readlines())) + response.set_response_from_text(''.join(open(tmp_file.name).readlines())) os.remove(tmp_file.name) - def diff(self, request): - request_repr = request.verbose_repr() - best_similarity = None - best_candidate_repr = None - for candidate in self.get_requests(request.command, request.host): - candidate_repr = candidate.verbose_repr() - similarity = difflib.SequenceMatcher(a=request_repr, - b=candidate_repr).ratio() - if best_similarity is None or similarity > best_similarity: - best_similarity = similarity - best_candidate_repr = candidate_repr + def _format_request_lines(self, req): + """Format request to make diffs easier to read. - delta = None - if best_candidate_repr: - delta = ''.join(difflib.ndiff(best_candidate_repr.splitlines(1), - request_repr.splitlines(1))) - return delta + Args: + req: an ArchivedHttpRequest + Returns: + Example: + ['GET www.example.com/path\n', 'Header-Key: header value\n', ...] + """ + parts = ['%s %s%s\n' % (req.command, req.host, req.path)] + if req.request_body: + parts.append('%s\n' % req.request_body) + for k, v in req.trimmed_headers: + k = '-'.join(x.capitalize() for x in k.split('-')) + parts.append('%s: %s\n' % (k, v)) + return parts + + def find_closest_request(self, request, use_path=False): + """Find the closest matching request in the archive to the given request. + + Args: + request: an ArchivedHttpRequest + use_path: If True, closest matching request's path component must match. + (Note: this refers to the 'path' component within the URL, not the + query string component.) + If use_path=False, candidate will NOT match in example below + e.g. request = GET www.test.com/path?aaa + candidate = GET www.test.com/diffpath?aaa + Returns: + If a close match is found, return the instance of ArchivedHttpRequest. + Otherwise, return None. + """ + best_match = None + request_lines = self._format_request_lines(request) + matcher = difflib.SequenceMatcher(b=''.join(request_lines)) + path = None + if use_path: + path = request.path + for candidate in self.get_requests(request.command, request.host, path, + use_query=not use_path): + candidate_lines = self._format_request_lines(candidate) + matcher.set_seq1(''.join(candidate_lines)) + best_match = max(best_match, (matcher.ratio(), candidate)) + if best_match: + return best_match[1] + return None + + def diff(self, request): + """Diff the given request to the closest matching request in the archive. + + Args: + request: an ArchivedHttpRequest + Returns: + If a close match is found, return a textual diff between the requests. + Otherwise, return None. + """ + request_lines = self._format_request_lines(request) + closest_request = self.find_closest_request(request) + if closest_request: + closest_request_lines = self._format_request_lines(closest_request) + return ''.join(difflib.ndiff(closest_request_lines, request_lines)) + return None class ArchivedHttpRequest(object): - def __init__(self, command, host, path, request_body, headers): + """Record all the state that goes into a request. + + ArchivedHttpRequest instances are considered immutable so they can + serve as keys for HttpArchive instances. + (The immutability is not enforced.) + + Upon creation, the headers are "trimmed" (i.e. edited or dropped) + and saved to self.trimmed_headers to allow requests to match in a wider + variety of playback situations (e.g. using different user agents). + + For unpickling, 'trimmed_headers' is recreated from 'headers'. That + allows for changes to the trim function and can help with debugging. + """ + CONDITIONAL_HEADERS = [ + 'if-none-match', 'if-match', + 'if-modified-since', 'if-unmodified-since'] + + def __init__(self, command, host, path, request_body, headers, is_ssl=False): + """Initialize an ArchivedHttpRequest. + + Args: + command: a string (e.g. 'GET' or 'POST'). + host: a host name (e.g. 'www.google.com'). + path: a request path (e.g. '/search?q=dogs'). + request_body: a request body string for a POST or None. + headers: {key: value, ...} where key and value are strings. + is_ssl: a boolean which is True iff request is make via SSL. + """ self.command = command self.host = host self.path = path self.request_body = request_body - self.headers = self._FuzzHeaders(headers) + self.headers = headers + self.is_ssl = is_ssl + self.trimmed_headers = self._TrimHeaders(headers) + + def __str__(self): + scheme = 'https' if self.is_ssl else 'http' + return '%s %s://%s%s %s' % ( + self.command, scheme, self.host, self.path, self.trimmed_headers) def __repr__(self): return repr((self.command, self.host, self.path, self.request_body, - self.headers)) + self.trimmed_headers, self.is_ssl)) def __hash__(self): - return hash(self.__repr__()) + """Return a integer hash to use for hashed collections including dict.""" + return hash(repr(self)) def __eq__(self, other): - return self.__repr__() == other.__repr__() + """Define the __eq__ method to match the hash behavior.""" + return repr(self) == repr(other) def __setstate__(self, state): - if 'headers' not in state: - error_msg = ('Archived HTTP requests are missing headers. Your HTTP ' - 'archive is likely from a previous version and must be ' - 'recorded again.') - raise Exception(error_msg) - self.__dict__ = state + """Influence how to unpickle. - def matches(self, command=None, host=None, path=None): - """Returns true iff the request matches all parameters.""" + "headers" are the original request headers. + "trimmed_headers" are the trimmed headers used for matching requests + during replay. + + Args: + state: a dictionary for __dict__ + """ + if 'full_headers' in state: + # Fix older version of archive. + state['headers'] = state['full_headers'] + del state['full_headers'] + if 'headers' not in state: + raise HttpArchiveException( + 'Archived HTTP request is missing "headers". The HTTP archive is' + ' likely from a previous version and must be re-recorded.') + state['trimmed_headers'] = self._TrimHeaders(dict(state['headers'])) + if 'is_ssl' not in state: + state['is_ssl'] = False + self.__dict__.update(state) + + def __getstate__(self): + """Influence how to pickle. + + Returns: + a dict to use for pickling + """ + state = self.__dict__.copy() + del state['trimmed_headers'] + return state + + def matches(self, command=None, host=None, path_with_query=None, + use_query=True): + """Returns true iff the request matches all parameters. + + Args: + command: a string (e.g. 'GET' or 'POST'). + host: a host name (e.g. 'www.google.com'). + path_with_query: a request path with query string (e.g. '/search?q=dogs') + use_query: + If use_query is True, request matching uses both the hierarchical path + and query string component. + If use_query is False, request matching only uses the hierarchical path + + e.g. req1 = GET www.test.com/index?aaaa + req2 = GET www.test.com/index?bbbb + + If use_query is True, req1.matches(req2) evaluates to False + If use_query is False, req1.matches(req2) evaluates to True + + Returns: + True iff the request matches all parameters + """ + path_match = path_with_query == self.path + if not use_query: + self_path = urlparse.urlparse('http://%s%s' % ( + self.host or '', self.path or '')).path + other_path = urlparse.urlparse('http://%s%s' % ( + host or '', path_with_query or '')).path + path_match = self_path == other_path return ((command is None or command == self.command) and (host is None or host == self.host) and - (path is None or path == self.path)) + (path_with_query is None or path_match)) - def verbose_repr(self): - return '\n'.join([str(x) for x in - [self.command, self.host, self.path, self.request_body] + self.headers]) - - def _FuzzHeaders(self, headers): + @classmethod + def _TrimHeaders(cls, headers): """Removes headers that are known to cause problems during replay. These headers are removed for the following reasons: - accept: Causes problems with www.bing.com. During record, CSS is fetched with *. During replay, it's text/css. + - accept-charset, accept-language, referer: vary between clients. - connection, method, scheme, url, version: Cause problems with spdy. - cookie: Extremely sensitive to request/response order. + - keep-alive: Not supported by Web Page Replay. - user-agent: Changes with every Chrome version. + - proxy-connection: Sent for proxy requests. Another variant to consider is dropping only the value from the header. However, this is particularly bad for the cookie header, because the @@ -249,53 +442,131 @@ class ArchivedHttpRequest(object): is made. Args: - headers: Dictionary of String -> String headers to values. + headers: {header_key: header_value, ...} Returns: - Dictionary of headers, with undesirable headers removed. + [(header_key, header_value), ...] # (with undesirable headers removed) """ - fuzzed_headers = headers.copy() - undesirable_keys = ['accept', 'connection', 'cookie', 'method', 'scheme', - 'url', 'version', 'user-agent'] - keys_to_delete = [] - for key in fuzzed_headers: - if key.lower() in undesirable_keys: - keys_to_delete.append(key) - for key in keys_to_delete: - del fuzzed_headers[key] - return [(k, fuzzed_headers[k]) for k in sorted(fuzzed_headers.keys())] + # TODO(tonyg): Strip sdch from the request headers because we can't + # guarantee that the dictionary will be recorded, so replay may not work. + if 'accept-encoding' in headers: + headers['accept-encoding'] = headers['accept-encoding'].replace( + 'sdch', '') + # A little clean-up + if headers['accept-encoding'].endswith(','): + headers['accept-encoding'] = headers['accept-encoding'][:-1] + undesirable_keys = [ + 'accept', 'accept-charset', 'accept-language', + 'connection', 'cookie', 'keep-alive', 'method', + 'referer', 'scheme', 'url', 'version', 'user-agent', 'proxy-connection'] + return sorted([(k, v) for k, v in headers.items() + if k.lower() not in undesirable_keys]) + def is_conditional(self): + """Return list of headers that match conditional headers.""" + for header in self.CONDITIONAL_HEADERS: + if header in self.headers: + return True + return False + + def create_request_without_conditions(self): + stripped_headers = dict((k, v) for k, v in self.headers.iteritems() + if k.lower() not in self.CONDITIONAL_HEADERS) + return ArchivedHttpRequest( + self.command, self.host, self.path, self.request_body, + stripped_headers, self.is_ssl) class ArchivedHttpResponse(object): - """HTTPResponse objects. - - ArchivedHttpReponse instances have the following attributes: - version: HTTP protocol version used by server. - 10 for HTTP/1.0, 11 for HTTP/1.1 (same as httplib). - status: Status code returned by server (e.g. 200). - reason: Reason phrase returned by server (e.g. "OK"). - headers: list of (header, value) tuples. - response_data: list of content chunks. Concatenating all the content chunks - gives the complete contents (i.e. the chunks do not have any lengths or - delimiters). - """ + """All the data needed to recreate all HTTP response.""" # CHUNK_EDIT_SEPARATOR is used to edit and view text content. # It is not sent in responses. It is added by get_data_as_text() # and removed by set_data(). CHUNK_EDIT_SEPARATOR = '[WEB_PAGE_REPLAY_CHUNK_BOUNDARY]' - def __init__(self, version, status, reason, headers, response_data): + # DELAY_EDIT_SEPARATOR is used to edit and view server delays. + DELAY_EDIT_SEPARATOR = ('\n[WEB_PAGE_REPLAY_EDIT_ARCHIVE --- ' + 'Delays are above. Response content is below.]\n') + + def __init__(self, version, status, reason, headers, response_data, + delays=None): + """Initialize an ArchivedHttpResponse. + + Args: + version: HTTP protocol version used by server. + 10 for HTTP/1.0, 11 for HTTP/1.1 (same as httplib). + status: Status code returned by server (e.g. 200). + reason: Reason phrase returned by server (e.g. "OK"). + headers: list of (header, value) tuples. + response_data: list of content chunks. + Concatenating the chunks gives the complete contents + (i.e. the chunks do not have any lengths or delimiters). + Do not include the final, zero-length chunk that marks the end. + delays: dict of (ms) delays before "headers" and "data". For example, + {'headers': 50, 'data': [0, 10, 10]} + """ self.version = version self.status = status self.reason = reason self.headers = headers self.response_data = response_data + self.delays = delays + self.fix_delays() - def get_header(self, key): + def fix_delays(self): + """Initialize delays, or check the number of data delays.""" + expected_num_delays = len(self.response_data) + if not self.delays: + self.delays = { + 'headers': 0, + 'data': [0] * expected_num_delays + } + else: + num_delays = len(self.delays['data']) + if num_delays != expected_num_delays: + raise HttpArchiveException( + 'Server delay length mismatch: %d (expected %d): %s', + num_delays, expected_num_delays, self.delays['data']) + + def __repr__(self): + return repr((self.version, self.status, self.reason, sorted(self.headers), + self.response_data)) + + def __hash__(self): + """Return a integer hash to use for hashed collections including dict.""" + return hash(repr(self)) + + def __eq__(self, other): + """Define the __eq__ method to match the hash behavior.""" + return repr(self) == repr(other) + + def __setstate__(self, state): + """Influence how to unpickle. + + Args: + state: a dictionary for __dict__ + """ + if 'server_delays' in state: + state['delays'] = { + 'headers': 0, + 'data': state['server_delays'] + } + del state['server_delays'] + elif 'delays' not in state: + state['delays'] = None + self.__dict__.update(state) + self.fix_delays() + + def get_header(self, key, default=None): for k, v in self.headers: if key == k: return v + return default + + def get_header_case_insensitive(self, key): + for k, v in self.headers: + if key.lower() == k.lower(): + return v return None def set_header(self, key, value): @@ -317,6 +588,9 @@ class ArchivedHttpResponse(object): def is_compressed(self): return self.get_header('content-encoding') in ('gzip', 'deflate') + def is_chunked(self): + return self.get_header('transfer-encoding') == 'chunked' + def get_data_as_text(self): """Return content as a single string. @@ -334,8 +608,25 @@ class ArchivedHttpResponse(object): uncompressed_chunks = self.response_data return self.CHUNK_EDIT_SEPARATOR.join(uncompressed_chunks) + def get_delays_as_text(self): + """Return delays as editable text.""" + return json.dumps(self.delays, indent=2) + + def get_response_as_text(self): + """Returns response content as a single string. + + Server delays are separated on a per-chunk basis. Delays are in seconds. + Response content begins after DELAY_EDIT_SEPARATOR + """ + data = self.get_data_as_text() + if data is None: + logging.warning('Data can not be represented as text.') + data = '' + delays = self.get_delays_as_text() + return self.DELAY_EDIT_SEPARATOR.join((delays, data)) + def set_data(self, text): - """Inverse of set_data_as_text(). + """Inverse of get_data_as_text(). Split on CHUNK_EDIT_SEPARATOR and compress if needed. """ @@ -344,26 +635,55 @@ class ArchivedHttpResponse(object): self.response_data = httpzlib.compress_chunks(text_chunks, self.is_gzip()) else: self.response_data = text_chunks - if not self.get_header('transfer-encoding'): + if not self.is_chunked(): content_length = sum(len(c) for c in self.response_data) self.set_header('content-length', str(content_length)) - def inject_deterministic_script(self): - """Inject deterministic script immediately after or .""" - content_type = self.get_header('content-type') - if not content_type or not content_type.startswith('text/html'): + def set_delays(self, delays_text): + """Inverse of get_delays_as_text(). + + Args: + delays_text: JSON encoded text such as the following: + { + headers: 80, + data: [6, 55, 0] + } + Times are in milliseconds. + Each data delay corresponds with one response_data value. + """ + try: + self.delays = json.loads(delays_text) + except (ValueError, KeyError) as e: + logging.critical('Unable to parse delays %s: %s', delays_text, e) + self.fix_delays() + + def set_response_from_text(self, text): + """Inverse of get_response_as_text(). + + Modifies the state of the archive according to the textual representation. + """ + try: + delays, data = text.split(self.DELAY_EDIT_SEPARATOR) + except ValueError: + logging.critical( + 'Error parsing text representation. Skipping edits.') return - text = self.get_data_as_text() - if text: - text, is_injected = HEAD_RE.subn(_InsertScriptAfter, text, 1) - if not is_injected: - text, is_injected = HTML_RE.subn(_InsertScriptAfter, text, 1) - if not is_injected: - raise InjectionFailedException(text) - self.set_data(text) + self.set_delays(delays) + self.set_data(data) -if __name__ == '__main__': +def create_response(status, reason=None, headers=None, body=None): + """Convenience method for creating simple ArchivedHttpResponse objects.""" + if reason is None: + reason = httplib.responses.get(status, 'Unknown') + if headers is None: + headers = [('content-type', 'text/plain')] + if body is None: + body = "%s %s" % (status, reason) + return ArchivedHttpResponse(11, status, reason, headers, [body]) + + +def main(): class PlainHelpFormatter(optparse.IndentedHelpFormatter): def format_description(self, description): if description: @@ -412,3 +732,8 @@ if __name__ == '__main__': http_archive.Persist(replay_file) else: option_parser.error('Unknown command "%s"' % command) + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/wpr/httparchive_test.py b/wpr/httparchive_test.py new file mode 100755 index 0000000..5a7b759 --- /dev/null +++ b/wpr/httparchive_test.py @@ -0,0 +1,347 @@ +#!/usr/bin/env python +# Copyright 2011 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ast +import httparchive +import os +import unittest + + +def create_request(headers): + return httparchive.ArchivedHttpRequest( + 'GET', 'www.test.com', '/', None, headers) + +def create_response(headers): + return httparchive.ArchivedHttpResponse( + 11, 200, 'OK', headers, '') + + +class HttpArchiveTest(unittest.TestCase): + + REQUEST_HEADERS = {} + REQUEST = create_request(REQUEST_HEADERS) + + # Used for if-(un)modified-since checks + DATE_PAST = 'Wed, 13 Jul 2011 03:58:08 GMT' + DATE_PRESENT = 'Wed, 20 Jul 2011 04:58:08 GMT' + DATE_FUTURE = 'Wed, 27 Jul 2011 05:58:08 GMT' + DATE_INVALID = 'This is an invalid date!!' + + # etag values + ETAG_VALID = 'etag' + ETAG_INVALID = 'This is an invalid etag value!!' + + RESPONSE_HEADERS = [('last-modified', DATE_PRESENT), ('etag', ETAG_VALID)] + RESPONSE = create_response(RESPONSE_HEADERS) + + def setUp(self): + self.archive = httparchive.HttpArchive() + self.archive[self.REQUEST] = self.RESPONSE + + # Also add an identical POST request for testing + request = httparchive.ArchivedHttpRequest( + 'POST', 'www.test.com', '/', None, self.REQUEST_HEADERS) + self.archive[request] = self.RESPONSE + + def tearDown(self): + pass + + def test_init(self): + archive = httparchive.HttpArchive() + self.assertEqual(len(archive), 0) + + def test__TrimHeaders(self): + request = httparchive.ArchivedHttpRequest + header1 = {'accept-encoding': 'gzip,deflate'} + self.assertEqual(request._TrimHeaders(header1), + [(k, v) for k, v in header1.items()]) + + header2 = {'referer': 'www.google.com'} + self.assertEqual(request._TrimHeaders(header2), []) + + header3 = {'referer': 'www.google.com', 'cookie': 'cookie_monster!', + 'hello': 'world'} + self.assertEqual(request._TrimHeaders(header3), [('hello', 'world')]) + + def test_matches(self): + headers = {} + request1 = httparchive.ArchivedHttpRequest( + 'GET', 'www.test.com', '/index.html?hello=world', None, headers) + request2 = httparchive.ArchivedHttpRequest( + 'GET', 'www.test.com', '/index.html?foo=bar', None, headers) + + self.assert_(not request1.matches( + request2.command, request2.host, request2.path, use_query=True)) + self.assert_(request1.matches( + request2.command, request2.host, request2.path, use_query=False)) + + self.assert_(request1.matches( + request2.command, request2.host, None, use_query=True)) + self.assert_(request1.matches( + request2.command, None, request2.path, use_query=False)) + + empty_request = httparchive.ArchivedHttpRequest( + None, None, None, None, headers) + self.assert_(not empty_request.matches( + request2.command, request2.host, None, use_query=True)) + self.assert_(not empty_request.matches( + request2.command, None, request2.path, use_query=False)) + + def setup_find_closest_request(self): + headers = {} + request1 = httparchive.ArchivedHttpRequest( + 'GET', 'www.test.com', '/a?hello=world', None, headers) + request2 = httparchive.ArchivedHttpRequest( + 'GET', 'www.test.com', '/a?foo=bar', None, headers) + request3 = httparchive.ArchivedHttpRequest( + 'GET', 'www.test.com', '/b?hello=world', None, headers) + + archive = httparchive.HttpArchive() + # Add requests 2 and 3 and find closest match with request1 + archive[request2] = self.RESPONSE + archive[request3] = self.RESPONSE + + return archive, request1, request2, request3 + + def test_find_closest_request(self): + archive, request1, request2, request3 = self.setup_find_closest_request() + + # Request 3 is the closest match to request 1 + self.assertEqual( + request3, archive.find_closest_request(request1, use_path=False)) + # However, if we match strictly on path, request2 is the only match + self.assertEqual( + request2, archive.find_closest_request(request1, use_path=True)) + + def test_find_closest_request_delete_simple(self): + archive, request1, request2, request3 = self.setup_find_closest_request() + + del archive[request3] + self.assertEqual( + request2, archive.find_closest_request(request1, use_path=False)) + self.assertEqual( + request2, archive.find_closest_request(request1, use_path=True)) + + def test_find_closest_request_delete_complex(self): + archive, request1, request2, request3 = self.setup_find_closest_request() + + del archive[request2] + self.assertEqual( + request3, archive.find_closest_request(request1, use_path=False)) + self.assertEqual( + None, archive.find_closest_request(request1, use_path=True)) + + def test_get_simple(self): + request = self.REQUEST + response = self.RESPONSE + archive = self.archive + + self.assertEqual(archive.get(request), response) + + false_request_headers = {'foo': 'bar'} + false_request = create_request(false_request_headers) + self.assertEqual(archive.get(false_request, default=None), None) + + def test_get_modified_headers(self): + request = self.REQUEST + response = self.RESPONSE + archive = self.archive + not_modified_response = httparchive.create_response(304) + + # Fail check and return response again + request_headers = {'if-modified-since': self.DATE_PAST} + request = create_request(request_headers) + self.assertEqual(archive.get(request), response) + + # Succeed check and return 304 Not Modified + request_headers = {'if-modified-since': self.DATE_FUTURE} + request = create_request(request_headers) + self.assertEqual(archive.get(request), not_modified_response) + + # Succeed check and return 304 Not Modified + request_headers = {'if-modified-since': self.DATE_PRESENT} + request = create_request(request_headers) + self.assertEqual(archive.get(request), not_modified_response) + + # Invalid date, fail check and return response again + request_headers = {'if-modified-since': self.DATE_INVALID} + request = create_request(request_headers) + self.assertEqual(archive.get(request), response) + + # fail check since the request is not a GET or HEAD request (as per RFC) + request_headers = {'if-modified-since': self.DATE_FUTURE} + request = httparchive.ArchivedHttpRequest( + 'POST', 'www.test.com', '/', None, request_headers) + self.assertEqual(archive.get(request), response) + + def test_get_unmodified_headers(self): + request = self.REQUEST + response = self.RESPONSE + archive = self.archive + not_modified_response = httparchive.create_response(304) + + # Succeed check + request_headers = {'if-unmodified-since': self.DATE_PAST} + request = create_request(request_headers) + self.assertEqual(archive.get(request), not_modified_response) + + # Fail check + request_headers = {'if-unmodified-since': self.DATE_FUTURE} + request = create_request(request_headers) + self.assertEqual(archive.get(request), response) + + # Succeed check + request_headers = {'if-unmodified-since': self.DATE_PRESENT} + request = create_request(request_headers) + self.assertEqual(archive.get(request), not_modified_response) + + # Fail check + request_headers = {'if-unmodified-since': self.DATE_INVALID} + request = create_request(request_headers) + self.assertEqual(archive.get(request), response) + + # Fail check since the request is not a GET or HEAD request (as per RFC) + request_headers = {'if-modified-since': self.DATE_PAST} + request = httparchive.ArchivedHttpRequest( + 'POST', 'www.test.com', '/', None, request_headers) + self.assertEqual(archive.get(request), response) + + def test_get_etags(self): + request = self.REQUEST + response = self.RESPONSE + archive = self.archive + not_modified_response = httparchive.create_response(304) + precondition_failed_response = httparchive.create_response(412) + + # if-match headers + request_headers = {'if-match': self.ETAG_VALID} + request = create_request(request_headers) + self.assertEqual(archive.get(request), response) + + request_headers = {'if-match': self.ETAG_INVALID} + request = create_request(request_headers) + self.assertEqual(archive.get(request), precondition_failed_response) + + # if-none-match headers + request_headers = {'if-none-match': self.ETAG_VALID} + request = create_request(request_headers) + self.assertEqual(archive.get(request), not_modified_response) + + request_headers = {'if-none-match': self.ETAG_INVALID} + request = create_request(request_headers) + self.assertEqual(archive.get(request), response) + + def test_get_multiple_match_headers(self): + request = self.REQUEST + response = self.RESPONSE + archive = self.archive + not_modified_response = httparchive.create_response(304) + precondition_failed_response = httparchive.create_response(412) + + # if-match headers + # If the request would, without the If-Match header field, + # result in anything other than a 2xx or 412 status, + # then the If-Match header MUST be ignored. + + request_headers = { + 'if-match': self.ETAG_VALID, + 'if-modified-since': self.DATE_PAST, + } + request = create_request(request_headers) + self.assertEqual(archive.get(request), response) + + # Invalid etag, precondition failed + request_headers = { + 'if-match': self.ETAG_INVALID, + 'if-modified-since': self.DATE_PAST, + } + request = create_request(request_headers) + self.assertEqual(archive.get(request), precondition_failed_response) + + # 304 response; ignore if-match header + request_headers = { + 'if-match': self.ETAG_VALID, + 'if-modified-since': self.DATE_FUTURE, + } + request = create_request(request_headers) + self.assertEqual(archive.get(request), not_modified_response) + + # 304 response; ignore if-match header + request_headers = { + 'if-match': self.ETAG_INVALID, + 'if-modified-since': self.DATE_PRESENT, + } + request = create_request(request_headers) + self.assertEqual(archive.get(request), not_modified_response) + + # Invalid etag, precondition failed + request_headers = { + 'if-match': self.ETAG_INVALID, + 'if-modified-since': self.DATE_INVALID, + } + request = create_request(request_headers) + self.assertEqual(archive.get(request), precondition_failed_response) + + def test_get_multiple_none_match_headers(self): + request = self.REQUEST + response = self.RESPONSE + archive = self.archive + not_modified_response = httparchive.create_response(304) + precondition_failed_response = httparchive.create_response(412) + + # if-none-match headers + # If the request would, without the If-None-Match header field, + # result in anything other than a 2xx or 304 status, + # then the If-None-Match header MUST be ignored. + + request_headers = { + 'if-none-match': self.ETAG_VALID, + 'if-modified-since': self.DATE_PAST, + } + request = create_request(request_headers) + self.assertEqual(archive.get(request), response) + + request_headers = { + 'if-none-match': self.ETAG_INVALID, + 'if-modified-since': self.DATE_PAST, + } + request = create_request(request_headers) + self.assertEqual(archive.get(request), response) + + # etag match, precondition failed + request_headers = { + 'if-none-match': self.ETAG_VALID, + 'if-modified-since': self.DATE_FUTURE, + } + request = create_request(request_headers) + self.assertEqual(archive.get(request), not_modified_response) + + request_headers = { + 'if-none-match': self.ETAG_INVALID, + 'if-modified-since': self.DATE_PRESENT, + } + request = create_request(request_headers) + self.assertEqual(archive.get(request), not_modified_response) + + request_headers = { + 'if-none-match': self.ETAG_INVALID, + 'if-modified-since': self.DATE_INVALID, + } + request = create_request(request_headers) + self.assertEqual(archive.get(request), response) + + +if __name__ == '__main__': + unittest.main() diff --git a/wpr/httpclient.py b/wpr/httpclient.py old mode 100755 new mode 100644 index 2790288..7bcc3a5 --- a/wpr/httpclient.py +++ b/wpr/httpclient.py @@ -15,9 +15,71 @@ """Retrieve web resources over http.""" +import copy import httparchive import httplib import logging +import os +import platformsettings +import re +import util + + +HTML_RE = re.compile(r'^.{,256}?', re.IGNORECASE | re.DOTALL) +HEAD_RE = re.compile(r'^.{,256}?', re.IGNORECASE | re.DOTALL) +TIMER = platformsettings.get_platform_settings().timer + + +class HttpClientException(Exception): + """Base class for all exceptions in httpclient.""" + pass + + +def GetInjectScript(scripts): + """Loads |scripts| from disk and returns a string of their content.""" + lines = [] + for script in scripts: + if os.path.exists(script): + lines += open(script).read() + elif util.resource_exists(script): + lines += util.resource_string(script) + else: + raise HttpClientException('Script does not exist: %s', script) + return ''.join(lines) + + +def _InjectScripts(response, inject_script): + """Injects |inject_script| immediately after or . + + Copies |response| if it is modified. + + Args: + response: an ArchivedHttpResponse + inject_script: JavaScript string (e.g. "Math.random = function(){...}") + Returns: + an ArchivedHttpResponse + """ + if type(response) == tuple: + logging.warn('tuple response: %s', response) + content_type = response.get_header('content-type') + if content_type and content_type.startswith('text/html'): + text = response.get_data_as_text() + + def InsertScriptAfter(matchobj): + return '%s' % (matchobj.group(0), inject_script) + + if text and not inject_script in text: + text, is_injected = HEAD_RE.subn(InsertScriptAfter, text, 1) + if not is_injected: + text, is_injected = HTML_RE.subn(InsertScriptAfter, text, 1) + if not is_injected: + logging.warning('Failed to inject scripts.') + logging.debug('Response content: %s', text) + else: + response = copy.deepcopy(response) + response.set_data(text) + return response + class DetailedHTTPResponse(httplib.HTTPResponse): """Preserve details relevant to replaying responses. @@ -27,21 +89,31 @@ class DetailedHTTPResponse(httplib.HTTPResponse): """ def read_chunks(self): - """Return an array of data. + """Return the response body content and timing data. - The returned chunked have the chunk size and CRLFs stripped off. + The returned chunks have the chunk size and CRLFs stripped off. If the response was compressed, the returned data is still compressed. Returns: - [response_body] # non-chunked responses - [response_body_chunk_1, response_body_chunk_2, ...] # chunked responses + (chunks, delays) + chunks: + [response_body] # non-chunked responses + [chunk_1, chunk_2, ...] # chunked responses + delays: + [0] # non-chunked responses + [chunk_1_first_byte_delay, ...] # chunked responses + + The delay for the first body item should be recorded by the caller. """ buf = [] + chunks = [] + delays = [] if not self.chunked: - chunks = [self.read()] + chunks.append(self.read()) + delays.append(0) else: + start = TIMER() try: - chunks = [] while True: line = self.fp.readline() chunk_size = self._read_chunk_size(line) @@ -49,8 +121,10 @@ class DetailedHTTPResponse(httplib.HTTPResponse): raise httplib.IncompleteRead(''.join(chunks)) if chunk_size == 0: break + delays.append(TIMER() - start) chunks.append(self._safe_read(chunk_size)) self._safe_read(2) # skip the CRLF at the end of the chunk + start = TIMER() # Ignore any trailers. while True: @@ -59,7 +133,7 @@ class DetailedHTTPResponse(httplib.HTTPResponse): break finally: self.close() - return chunks + return chunks, delays @classmethod def _read_chunk_size(cls, line): @@ -78,118 +152,223 @@ class DetailedHTTPConnection(httplib.HTTPConnection): response_class = DetailedHTTPResponse -class RealHttpFetch(object): - def __init__(self, real_dns_lookup): - self._real_dns_lookup = real_dns_lookup +class DetailedHTTPSResponse(DetailedHTTPResponse): + """Preserve details relevant to replaying SSL responses.""" + pass - def __call__(self, request, headers): - """Fetch an HTTP request and return the response and response_body. +class DetailedHTTPSConnection(httplib.HTTPSConnection): + """Preserve details relevant to replaying SSL connections.""" + response_class = DetailedHTTPSResponse + + +class RealHttpFetch(object): + def __init__(self, real_dns_lookup, get_server_rtt): + """Initialize RealHttpFetch. Args: - request: an instance of an ArchivedHttpRequest - headers: a dict of HTTP headers - Returns: - (instance of httplib.HTTPResponse, - [response_body_chunk_1, response_body_chunk_2, ...]) - # If the response did not use chunked encoding, there is only one chunk. + real_dns_lookup: a function that resolves a host to an IP. + get_server_rtt: a function that returns the round-trip time of a host. """ - # TODO(tonyg): Strip sdch from the request headers because we can't - # guarantee that the dictionary will be recorded, so replay may not work. - if 'accept-encoding' in headers: - headers['accept-encoding'] = headers['accept-encoding'].replace( - 'sdch', '') + self._real_dns_lookup = real_dns_lookup + self._get_server_rtt = get_server_rtt - logging.debug('RealHttpRequest: %s %s', request.host, request.path) + def __call__(self, request): + """Fetch an HTTP request. + + Args: + request: an ArchivedHttpRequest + Returns: + an ArchivedHttpResponse + """ + logging.debug('RealHttpFetch: %s %s', request.host, request.path) host_ip = self._real_dns_lookup(request.host) if not host_ip: logging.critical('Unable to find host ip for name: %s', request.host) - return None, None - try: - connection = DetailedHTTPConnection(host_ip) - connection.request( - request.command, - request.path, - request.request_body, - headers) - response = connection.getresponse() - chunks = response.read_chunks() - return response, chunks - except Exception, e: - logging.critical('Could not fetch %s: %s', request, e) - import traceback - logging.critical(traceback.format_exc()) - return None, None + return None + retries = 3 + while True: + try: + if request.is_ssl: + connection = DetailedHTTPSConnection(host_ip) + else: + connection = DetailedHTTPConnection(host_ip) + start = TIMER() + connection.request( + request.command, + request.path, + request.request_body, + request.headers) + response = connection.getresponse() + headers_delay = int((TIMER() - start) * 1000) + headers_delay -= self._get_server_rtt(request.host) + + chunks, chunk_delays = response.read_chunks() + delays = { + 'headers': headers_delay, + 'data': chunk_delays + } + archived_http_response = httparchive.ArchivedHttpResponse( + response.version, + response.status, + response.reason, + response.getheaders(), + chunks, + delays) + return archived_http_response + except Exception, e: + if retries: + retries -= 1 + logging.warning('Retrying fetch %s: %s', request, e) + continue + logging.critical('Could not fetch %s: %s', request, e) + return None class RecordHttpArchiveFetch(object): """Make real HTTP fetches and save responses in the given HttpArchive.""" - def __init__(self, http_archive, real_dns_lookup, use_deterministic_script): + def __init__(self, http_archive, real_dns_lookup, inject_script, + cache_misses=None): """Initialize RecordHttpArchiveFetch. Args: - http_archve: an instance of a HttpArchive + http_archive: an instance of a HttpArchive real_dns_lookup: a function that resolves a host to an IP. - use_deterministic_script: If True, attempt to inject a script, - when appropriate, to make JavaScript more deterministic. + inject_script: script string to inject in all pages + cache_misses: instance of CacheMissArchive """ self.http_archive = http_archive - self.real_http_fetch = RealHttpFetch(real_dns_lookup) - self.use_deterministic_script = use_deterministic_script + self.real_http_fetch = RealHttpFetch(real_dns_lookup, + http_archive.get_server_rtt) + self.inject_script = inject_script + self.cache_misses = cache_misses - def __call__(self, request, request_headers): + def __call__(self, request): """Fetch the request and return the response. Args: - request: an instance of an ArchivedHttpRequest. - request_headers: a dict of HTTP headers. + request: an ArchivedHttpRequest. + Returns: + an ArchivedHttpResponse """ - response, response_chunks = self.real_http_fetch(request, request_headers) - if response is None: - return None - archived_http_response = httparchive.ArchivedHttpResponse( - response.version, - response.status, - response.reason, - response.getheaders(), - response_chunks) - if self.use_deterministic_script: - try: - archived_http_response.inject_deterministic_script() - except httparchive.InjectionFailedException as err: - logging.error('Failed to inject deterministic script for %s', request) - logging.debug('Request content: %s', err.text) + if self.cache_misses: + self.cache_misses.record_request( + request, is_record_mode=True, is_cache_miss=False) + + # If request is already in the archive, return the archived response. + if request in self.http_archive: + logging.debug('Repeated request found: %s', request) + response = self.http_archive[request] + else: + response = self.real_http_fetch(request) + if response is None: + return None + self.http_archive[request] = response + if self.inject_script: + response = _InjectScripts(response, self.inject_script) logging.debug('Recorded: %s', request) - self.http_archive[request] = archived_http_response - return archived_http_response + return response class ReplayHttpArchiveFetch(object): """Serve responses from the given HttpArchive.""" - def __init__(self, http_archive, use_diff_on_unknown_requests=False): + def __init__(self, http_archive, inject_script, + use_diff_on_unknown_requests=False, cache_misses=None, + use_closest_match=False): """Initialize ReplayHttpArchiveFetch. Args: - http_archve: an instance of a HttpArchive + http_archive: an instance of a HttpArchive + inject_script: script string to inject in all pages use_diff_on_unknown_requests: If True, log unknown requests with a diff to requests that look similar. + cache_misses: Instance of CacheMissArchive. + Callback updates archive on cache misses + use_closest_match: If True, on replay mode, serve the closest match + in the archive instead of giving a 404. """ self.http_archive = http_archive + self.inject_script = inject_script self.use_diff_on_unknown_requests = use_diff_on_unknown_requests + self.cache_misses = cache_misses + self.use_closest_match = use_closest_match - def __call__(self, request, request_headers=None): + def __call__(self, request): """Fetch the request and return the response. Args: request: an instance of an ArchivedHttpRequest. - request_headers: a dict of HTTP headers. + Returns: + Instance of ArchivedHttpResponse (if found) or None """ response = self.http_archive.get(request) + + if self.use_closest_match and not response: + closest_request = self.http_archive.find_closest_request( + request, use_path=True) + if closest_request: + response = self.http_archive.get(closest_request) + if response: + logging.info('Request not found: %s\nUsing closest match: %s', + request, closest_request) + + if self.cache_misses: + self.cache_misses.record_request( + request, is_record_mode=False, is_cache_miss=not response) + if not response: + reason = str(request) if self.use_diff_on_unknown_requests: - reason = self.http_archive.diff(request) or request - else: - reason = request + diff = self.http_archive.diff(request) + if diff: + reason += ( + "\nNearest request diff " + "('-' for archived request, '+' for current request):\n%s" % diff) logging.warning('Could not replay: %s', reason) + else: + response = _InjectScripts(response, self.inject_script) return response + + +class ControllableHttpArchiveFetch(object): + """Controllable fetch function that can swap between record and replay.""" + + def __init__(self, http_archive, real_dns_lookup, + inject_script, use_diff_on_unknown_requests, + use_record_mode, cache_misses, use_closest_match): + """Initialize HttpArchiveFetch. + + Args: + http_archive: an instance of a HttpArchive + real_dns_lookup: a function that resolves a host to an IP. + inject_script: script string to inject in all pages. + use_diff_on_unknown_requests: If True, log unknown requests + with a diff to requests that look similar. + use_record_mode: If True, start in server in record mode. + cache_misses: Instance of CacheMissArchive. + use_closest_match: If True, on replay mode, serve the closest match + in the archive instead of giving a 404. + """ + self.record_fetch = RecordHttpArchiveFetch( + http_archive, real_dns_lookup, inject_script, + cache_misses) + self.replay_fetch = ReplayHttpArchiveFetch( + http_archive, inject_script, use_diff_on_unknown_requests, cache_misses, + use_closest_match) + if use_record_mode: + self.SetRecordMode() + else: + self.SetReplayMode() + + def SetRecordMode(self): + self.fetch = self.record_fetch + self.is_record_mode = True + + def SetReplayMode(self): + self.fetch = self.replay_fetch + self.is_record_mode = False + + def __call__(self, *args, **kwargs): + """Forward calls to Replay/Record fetch functions depending on mode.""" + return self.fetch(*args, **kwargs) diff --git a/wpr/httpproxy.py b/wpr/httpproxy.py old mode 100755 new mode 100644 index 42546db..a375fcd --- a/wpr/httpproxy.py +++ b/wpr/httpproxy.py @@ -16,13 +16,22 @@ import BaseHTTPServer import daemonserver import httparchive -import httpclient # wpr httplib wrapper import logging import os -import socket import SocketServer +import ssl import subprocess import time +import urlparse + + +class HttpProxyError(Exception): + """Module catch-all error.""" + pass + +class HttpProxyServerError(HttpProxyError): + """Raised for errors like 'Address already in use'.""" + pass class HttpArchiveHandler(BaseHTTPServer.BaseHTTPRequestHandler): @@ -52,20 +61,25 @@ class HttpArchiveHandler(BaseHTTPServer.BaseHTTPRequestHandler): logging.error('Request without host header') return None + parsed = urlparse.urlparse(self.path) + query = '?%s' % parsed.query if parsed.query else '' + fragment = '#%s' % parsed.fragment if parsed.fragment else '' + full_path = '%s%s%s' % (parsed.path, query, fragment) + return httparchive.ArchivedHttpRequest( self.command, host, - self.path, + full_path, self.read_request_body(), - self.get_header_dict()) + self.get_header_dict(), + self.server.is_ssl) def send_archived_http_response(self, response): try: # We need to set the server name before we start the response. - headers = dict(response.headers) - use_chunked = 'transfer-encoding' in headers - has_content_length = 'content-length' in headers - self.server_version = headers.get('server', 'WebPageReplay') + is_chunked = response.is_chunked() + has_content_length = response.get_header('content-length') is not None + self.server_version = response.get_header('server', 'WebPageReplay') self.sys_version = '' if response.version == 10: @@ -73,10 +87,15 @@ class HttpArchiveHandler(BaseHTTPServer.BaseHTTPRequestHandler): # If we don't have chunked encoding and there is no content length, # we need to manually compute the content-length. - if not use_chunked and not has_content_length: + if not is_chunked and not has_content_length: content_length = sum(len(c) for c in response.response_data) response.headers.append(('content-length', str(content_length))) + use_delays = (self.server.use_delays and + not self.server.http_archive_fetch.is_record_mode) + if use_delays: + logging.debug('Using delays: %s', response.delays) + time.sleep(response.delays['headers'] / 1000.0) self.send_response(response.status, response.reason) # TODO(mbelshe): This is lame - each write is a packet! for header, value in response.headers: @@ -84,16 +103,16 @@ class HttpArchiveHandler(BaseHTTPServer.BaseHTTPRequestHandler): self.send_header(header, value) self.end_headers() - for chunk in response.response_data: - if use_chunked: + for chunk, delay in zip(response.response_data, response.delays['data']): + if use_delays: + time.sleep(delay / 1000.0) + if is_chunked: # Write chunk length (hex) and data (e.g. "A\r\nTESSELATED\r\n"). self.wfile.write('%x\r\n%s\r\n' % (len(chunk), chunk)) else: self.wfile.write(chunk) - if use_chunked and (not response.response_data or - response.response_data[-1]): - # Write last chunk as a zero-length chunk with no data. - self.wfile.write('0\r\n\r\n') + if is_chunked: + self.wfile.write('0\r\n\r\n') # write final, zero-length chunk. self.wfile.flush() # TODO(mbelshe): This connection close doesn't seem to work. @@ -102,9 +121,7 @@ class HttpArchiveHandler(BaseHTTPServer.BaseHTTPRequestHandler): except Exception, e: logging.error('Error sending response for %s/%s: %s', - self.headers['host'], - self.path, - e) + self.headers['host'], self.path, e) def do_POST(self): self.do_GET() @@ -112,17 +129,12 @@ class HttpArchiveHandler(BaseHTTPServer.BaseHTTPRequestHandler): def do_HEAD(self): self.do_GET() - def send_error(self, response_code, message=None): + def send_error(self, status): """Override the default send error with a version that doesn't unnecessarily close the connection. """ - body = "Not Found" - self.send_response(response_code, message) - self.send_header('content-type', 'text/plain') - self.send_header('content-length', str(len(body))) - self.end_headers() - self.wfile.write(body) - self.wfile.flush() + response = httparchive.create_response(status) + self.send_archived_http_response(response) def do_GET(self): start_time = time.time() @@ -130,11 +142,9 @@ class HttpArchiveHandler(BaseHTTPServer.BaseHTTPRequestHandler): if request is None: self.send_error(500) return - response_code = self.server.custom_handlers.handle(request) - if response_code: - self.send_error(response_code) - return - response = self.server.http_archive_fetch(request, self.get_header_dict()) + response = self.server.custom_handlers.handle(request) + if not response: + response = self.server.http_archive_fetch(request) if response: self.send_archived_http_response(response) request_time_ms = (time.time() - start_time) * 1000.0; @@ -146,21 +156,29 @@ class HttpArchiveHandler(BaseHTTPServer.BaseHTTPRequestHandler): class HttpProxyServer(SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer, daemonserver.DaemonServer): + HANDLER = HttpArchiveHandler + + # Increase the request queue size. The default value, 5, is set in + # SocketServer.TCPServer (the parent of BaseHTTPServer.HTTPServer). + # Since we're intercepting many domains through this single server, + # it is quite possible to get more than 5 concurrent requests. + request_queue_size = 128 + def __init__(self, http_archive_fetch, custom_handlers, - host='localhost', port=80): + host='localhost', port=80, use_delays=False, + is_ssl=False): + try: + BaseHTTPServer.HTTPServer.__init__(self, (host, port), self.HANDLER) + except Exception, e: + raise HttpProxyServerError('Could not start HTTPServer on port %d: %s' % + (port, e)) self.http_archive_fetch = http_archive_fetch self.custom_handlers = custom_handlers + self.use_delays = use_delays + self.is_ssl = is_ssl - # Increase the listen queue size. The default, 5, is set in - # SocketServer.TCPServer (the parent of BaseHTTPServer.HTTPServer). - # Since we're intercepting many domains through this single server, - # it is quite possible to get more than 5 concurrent connection requests. - self.request_queue_size = 128 - - try: - BaseHTTPServer.HTTPServer.__init__(self, (host, port), HttpArchiveHandler) - except Exception, e: - logging.critical('Could not start HTTPServer on port %d: %s', port, e) + protocol = 'HTTPS' if self.is_ssl else 'HTTP' + logging.info('Started %s server on %s...', protocol, self.server_address) def cleanup(self): try: @@ -168,3 +186,16 @@ class HttpProxyServer(SocketServer.ThreadingMixIn, except KeyboardInterrupt, e: pass logging.info('Stopped HTTP server') + + +class HttpsProxyServer(HttpProxyServer): + """SSL server.""" + + def __init__(self, http_archive_fetch, custom_handlers, certfile, + host='localhost', port=443, use_delays=False): + HttpProxyServer.__init__( + self, http_archive_fetch, custom_handlers, host, port, + use_delays, is_ssl=True) + self.socket = ssl.wrap_socket( + self.socket, certfile=certfile, server_side=True) + # Ancestor class, deamonserver, calls serve_forever() during its __init__. diff --git a/wpr/httpzlib.py b/wpr/httpzlib.py old mode 100755 new mode 100644 diff --git a/wpr/mock-archive.txt b/wpr/mock-archive.txt new file mode 100644 index 0000000..a90bb03 --- /dev/null +++ b/wpr/mock-archive.txt @@ -0,0 +1,10 @@ +GET%www.zappos.com%/%%[('accept-encoding', 'gzip,deflate'), ('host', 'www.zappos.com')] +GET%www.zappos.com%/css/print.20110525145237.css%%[('accept-encoding', 'gzip,deflate'), ('host', 'www.zappos.com')] +GET%www.zappos.com%/favicon.ico%%[('accept-encoding', 'gzip,deflate'), ('host', 'www.zappos.com')] +GET%www.zappos.com%/hydra/hydra.p.20110607.js%%[('accept-encoding', 'gzip,deflate'), ('host', 'www.zappos.com')] +GET%www.zappos.com%/imgs/shadebg.20110525145241.png%%[('accept-encoding', 'gzip,deflate'), ('host', 'www.zappos.com')] +GET%www.msn.com%/%%[('accept-encoding', 'gzip,deflate'), ('host', 'www.msn.com')] +GET%www.msn.com%/?euid=&userGroup=W:default&PM=z:1%%[('accept-encoding', 'gzip,deflate'), ('host', 'www.msn.com'), ('x-requested-with', 'XMLHttpRequest')] +GET%www.msn.com%/?euid=342%%[('accept-encoding', 'gzip,deflate'), ('host', 'www.msn.com'), ('x-requested-with', 'XMLHttpRequest')] +GET%www.amazon.com%/%%[('accept-encoding', 'gzip,deflate'), ('host', 'www.amazon.com')] +GET%www.google.com%/%%[('accept-encoding', 'gzip,deflate'), ('host', 'www.google.com')] diff --git a/wpr/mockhttprequest.py b/wpr/mockhttprequest.py new file mode 100644 index 0000000..ac5df99 --- /dev/null +++ b/wpr/mockhttprequest.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +# Copyright 2010 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Mock instance of ArchivedHttpRequest used for testing.""" + + +class ArchivedHttpRequest(object): + """Mock instance of ArchivedHttpRequest in HttpArchive.""" + + def __init__(self, command, host, path, request_body, headers): + """Initialize an ArchivedHttpRequest. + + Args: + command: a string (e.g. 'GET' or 'POST'). + host: a host name (e.g. 'www.google.com'). + path: a request path (e.g. '/search?q=dogs'). + request_body: a request body string for a POST or None. + headers: [(header1, value1), ...] list of tuples + """ + self.command = command + self.host = host + self.path = path + self.request_body = request_body + self.headers = headers + self.trimmed_headers = headers + + def __str__(self): + return '%s %s%s %s' % (self.command, self.host, self.path, + self.trimmed_headers) + + def __repr__(self): + return repr((self.command, self.host, self.path, self.request_body, + self.trimmed_headers)) + + def __hash__(self): + """Return a integer hash to use for hashed collections including dict.""" + return hash(repr(self)) + + def __eq__(self, other): + """Define the __eq__ method to match the hash behavior.""" + return repr(self) == repr(other) + + def matches(self, command=None, host=None, path=None): + """Returns true iff the request matches all parameters.""" + return ((command is None or command == self.command) and + (host is None or host == self.host) and + (path is None or path == self.path)) diff --git a/wpr/perftracker/__init__.py b/wpr/perftracker/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/wpr/perftracker/app/appengine_config.py b/wpr/perftracker/app/appengine_config.py index e6c00bf..1e21316 100644 --- a/wpr/perftracker/app/appengine_config.py +++ b/wpr/perftracker/app/appengine_config.py @@ -1,3 +1,17 @@ +#!/usr/bin/env python +# Copyright 2012 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. def webapp_add_wsgi_middleware(app): from google.appengine.ext.appstats import recording diff --git a/wpr/perftracker/extension/background.html b/wpr/perftracker/extension/background.html index 171a66c..595fe6c 100644 --- a/wpr/perftracker/extension/background.html +++ b/wpr/perftracker/extension/background.html @@ -410,6 +410,8 @@ function Benchmark(url, setIds, callbackWhenFinished) { setIds_[LoadType.cold]); chrome.benchmarking.clearCache(); + chrome.benchmarking.clearHostResolverCache(); + chrome.benchmarking.clearPredictorCache(); chrome.benchmarking.closeConnections(); me_.asyncClearCookies(); diff --git a/wpr/perftracker/runner.py b/wpr/perftracker/runner.py index ce12f3c..2da4730 100755 --- a/wpr/perftracker/runner.py +++ b/wpr/perftracker/runner.py @@ -15,6 +15,11 @@ description = """ This is a script for running automated network tests of chrome. + + There is an optional -e flag that instead runs an automated + web-page-replay test. It runs WPR record mode on the set of URLs specified + in the config file, then runs replay mode on the same set of URLs and + records any cache misses to . """ import sys @@ -129,6 +134,7 @@ def _XvfbPidFilename(slave_build_name): """ return os.path.join(tempfile.gettempdir(), 'xvfb-%s.pid' % slave_build_name) + def StartVirtualX(slave_build_name, build_dir): """Start a virtual X server and set the DISPLAY environment variable so sub processes will use the virtual X server. Also start icewm. This only works @@ -224,7 +230,11 @@ def GetVersion(): class TestInstance: def __init__(self, network, log_level, log_file, record, - diff_unknown_requests, screenshot_dir): + diff_unknown_requests, screenshot_dir, cache_miss_file=None, + use_deterministic_script=False, + use_chrome_deterministic_js=True, + use_closest_match=False, + use_server_delay=False): self.network = network self.log_level = log_level self.log_file = log_file @@ -233,6 +243,11 @@ class TestInstance: self.spdy_proxy_process = None self.diff_unknown_requests = diff_unknown_requests self.screenshot_dir = screenshot_dir + self.cache_miss_file = cache_miss_file + self.use_deterministic_script = use_deterministic_script + self.use_chrome_deterministic_js = use_chrome_deterministic_js + self.use_closest_match = use_closest_match + self.use_server_delay = use_server_delay def GenerateConfigFile(self, notes=''): # The PerfTracker extension requires this name in order to kick off. @@ -289,21 +304,28 @@ setTimeout(function() { init_cwnd = 10 protocol = self.network['protocol'] if 'spdy' in protocol: - port = BACKEND_SERVER_PORT - init_cwnd = 32 + port = BACKEND_SERVER_PORT + init_cwnd = 32 if protocol == 'http-base': - init_cwnd = 3 # See RFC3390 + init_cwnd = 3 # See RFC3390 cmdline = [ REPLAY_PATH, '--no-dns_forwarding', - '--no-deterministic_script', '--port', str(port), '--shaping_port', str(SERVER_PORT), - '--init_cwnd', str(init_cwnd), '--log_level', self.log_level, + '--init_cwnd', str(init_cwnd), ] + if self.cache_miss_file: + cmdline += ['-e', self.cache_miss_file] + if self.use_closest_match: + cmdline += ['--use_closest_match'] + if self.use_server_delay: + cmdline += ['--use_server_delay'] + if not self.use_deterministic_script: + cmdline += ['--inject_scripts=""'] if self.log_file: cmdline += ['--log_file', self.log_file] if self.network['bandwidth_kbps']['down']: @@ -314,15 +336,15 @@ setTimeout(function() { cmdline += ['-m', str(self.network['round_trip_time_ms'])] if self.network['packet_loss_percent']: cmdline += ['-p', str(self.network['packet_loss_percent'] / 100.0)] - if self.diff_unknown_requests: - cmdline.append('--diff_unknown_requests') + if not self.diff_unknown_requests: + cmdline.append('--no-diff_unknown_requests') if self.screenshot_dir: cmdline += ['-I', self.screenshot_dir] if self.record: cmdline.append('-r') cmdline.append(runner_cfg.replay_data_archive) - logging.debug('Starting Web-Page-Replay: %s', ' '.join(cmdline)) + logging.info('Starting Web-Page-Replay: %s', ' '.join(cmdline)) self.proxy_process = subprocess.Popen(cmdline) def StopProxy(self): @@ -404,16 +426,10 @@ setTimeout(function() { runner_cfg.chrome_path, '--activate-on-launch', '--disable-background-networking', - # Stop the translate bar from appearing at the top of the page. When # it's there, the screenshots are shorter than they should be. '--disable-translate', - # TODO(tonyg): These are disabled to reduce noise. It would be nice to - # make the model realistic and stable enough to enable them. - '--disable-preconnect', - '--dns-prefetch-disable', - '--enable-benchmarking', '--enable-logging', '--enable-experimental-extension-apis', @@ -423,11 +439,14 @@ setTimeout(function() { '--load-extension=' + PERFTRACKER_EXTENSION_PATH, '--log-level=0', '--no-first-run', - '--no-js-randomness', '--no-proxy-server', '--start-maximized', '--user-data-dir=' + profile_dir, ] + if self.use_chrome_deterministic_js: + cmdline += ['--no-js-randomness'] + if self.cache_miss_file: + cmdline += ['--no-sandbox'] spdy_mode = None if self.network['protocol'] == 'spdy': @@ -441,7 +460,7 @@ setTimeout(function() { cmdline.extend(chrome_cmdline.split(' ')) cmdline.append(start_file_url) - logging.debug('Starting Chrome: %s', ' '.join(cmdline)) + logging.info('Starting Chrome: %s', ' '.join(cmdline)) chrome = subprocess.Popen(cmdline, preexec_fn=switch_away_from_root) returncode = chrome.wait() if returncode: @@ -491,7 +510,7 @@ def ConfigureLogging(log_level_name, log_file_name): logging.getLogger().addHandler(fh) -def main(options): +def main(options, cache_miss_file): # When in record mode, override most of the configuration. if options.record: runner_cfg.replay_data_archive = options.record @@ -513,7 +532,10 @@ def main(options): logging.debug("Running network configuration: %s", network) test = TestInstance( network, options.log_level, options.log_file, options.record, - options.diff_unknown_requests, options.screenshot_dir) + options.diff_unknown_requests, options.screenshot_dir, + cache_miss_file, options.use_deterministic_script, + options.use_chrome_deterministic_js, options.use_closest_match, + options.use_server_delay) test.RunTest(options.notes, options.chrome_cmdline) if not options.infinite or options.record: break @@ -547,10 +569,10 @@ if __name__ == '__main__': action='store', type='string', help='Log file to use in addition to writting logs to stderr.') - option_parser.add_option('-r', '--record', default='', - action='store', - type='string', - help=('Record URLs in runner_cfg to this file.')) + option_parser.add_option('-r', '--record', default=False, + action='store_true', + dest='do_record', + help=('Record URLs to file specified by runner_cfg.')) option_parser.add_option('-i', '--infinite', default=False, action='store_true', help='Loop infinitely, repeating the test.') @@ -566,14 +588,43 @@ if __name__ == '__main__': action='store', type='string', help='Username for logging into appengine.') - option_parser.add_option('-D', '--diff_unknown_requests', default=False, - action='store_true', - help='During replay, show a unified diff of any unknown requests against ' + option_parser.add_option('-D', '--no-diff_unknown_requests', default=True, + action='store_false', + dest='diff_unknown_requests', + help='During replay, do not show a diff of any unknown requests against ' 'their nearest match in the archive.') option_parser.add_option('-I', '--screenshot_dir', default=None, action='store', type='string', help='Save PNG images of the loaded page in the given directory.') + option_parser.add_option('-d', '--deterministic_script', default=False, + action='store_true', + dest='use_deterministic_script', + help='During a record, inject JavaScript to make sources of ' + 'entropy such as Date() and Math.random() deterministic. CAUTION: ' + 'Without this option many web pages will not replay properly.') + option_parser.add_option('-j', '--no_chrome_deterministic_js', default=True, + action='store_false', + dest='use_chrome_deterministic_js', + help='Enable Chrome\'s deterministic implementations of javascript.' + 'This makes sources of entropy such as Date() and Math.random()' + 'deterministic.') + option_parser.add_option('-e', '--cache_miss_file', default=None, + action='store', + dest='cache_miss_file', + type='string', + help='Archive file to record cache misses in replay mode.') + option_parser.add_option('-C', '--use_closest_match', default=False, + action='store_true', + dest='use_closest_match', + help='During replay, if a request is not found, serve the closest match' + 'in the archive instead of giving a 404.') + option_parser.add_option('-U', '--use_server_delay', default=False, + action='store_true', + dest='use_server_delay', + help='During replay, simulate server delay by delaying response time to' + 'requests.') + options, args = option_parser.parse_args() @@ -593,4 +644,14 @@ if __name__ == '__main__': else: options.login_url = '' - sys.exit(main(options)) + # run the recording round, if specified + if options.do_record and options.cache_miss_file: + logging.debug("Running on record mode") + options.record = runner_cfg.replay_data_archive + main(options, options.cache_miss_file) + options.do_record = False + + options.record = None + # run the replay round + logging.debug("Running on replay mode") + sys.exit(main(options, options.cache_miss_file)) diff --git a/wpr/persistentmixin.py b/wpr/persistentmixin.py old mode 100755 new mode 100644 diff --git a/wpr/platformsettings.py b/wpr/platformsettings.py old mode 100755 new mode 100644 index b1e34c8..9d023d3 --- a/wpr/platformsettings.py +++ b/wpr/platformsettings.py @@ -21,7 +21,9 @@ import platform import re import socket import subprocess +import sys import tempfile +import time class PlatformSettingsError(Exception): @@ -39,6 +41,22 @@ class DnsUpdateError(PlatformSettingsError): pass +class NotAdministratorError(PlatformSettingsError): + """Raised when not running as administrator.""" + pass + + +class CalledProcessError(PlatformSettingsError): + """Raised when a _check_output() process returns a non-zero exit status.""" + def __init__(self, returncode, cmd): + self.returncode = returncode + self.cmd = cmd + + def __str__(self): + return 'Command "%s" returned non-zero exit status %d' % ( + ' '.join(self.cmd), self.returncode) + + def _check_output(*args): """Run Popen(*args) and return its output as a byte string. @@ -49,7 +67,7 @@ def _check_output(*args): Args: *args: sequence of program arguments Raises: - subprocess.CalledProcessError if the program returns non-zero exit status. + CalledProcessError if the program returns non-zero exit status. Returns: output as a byte string. """ @@ -60,31 +78,34 @@ def _check_output(*args): output = process.communicate()[0] retcode = process.poll() if retcode: - raise subprocess.CalledProcessError(retcode, command_args, output=output) + raise CalledProcessError(retcode, command_args) return output class PlatformSettings(object): - _IPFW_BIN = None - _IPFW_QUEUE_SLOTS = 100 + _CERT_FILE = 'wpr_cert.pem' # Some platforms do not shape traffic with the loopback address. _USE_REAL_IP_FOR_TRAFFIC_SHAPING = False def __init__(self): self.original_primary_dns = None + self.original_cwnd = None # original TCP congestion window def get_primary_dns(self): - raise NotImplementedError() + raise NotImplementedError + + def _set_primary_dns(self): + raise NotImplementedError def get_original_primary_dns(self): - if not self.original_primary_dns: + if self.original_primary_dns is None: self.original_primary_dns = self.get_primary_dns() + logging.info('Saved original system DNS (%s)', self.original_primary_dns) return self.original_primary_dns def set_primary_dns(self, dns): - if not self.original_primary_dns: - self.original_primary_dns = self.get_primary_dns() + self.get_original_primary_dns() self._set_primary_dns(dns) if self.get_primary_dns() == dns: logging.info('Changed system DNS to %s', dns) @@ -92,30 +113,40 @@ class PlatformSettings(object): raise self._get_dns_update_error() def restore_primary_dns(self): - if not self.original_primary_dns: - raise DnsUpdateError('Cannot restore because never set.') - self.set_primary_dns(self.original_primary_dns) - self.original_primary_dns = None - - def ipfw(self, *args): - if self._IPFW_BIN: - ipfw_args = [self._IPFW_BIN] + [str(a) for a in args] - logging.debug(' '.join(ipfw_args)) - subprocess.check_call(ipfw_args) - else: - raise NotImplementedError() - - def is_cwnd_available(self): - return False - - def set_cwnd(self, args): - logging.error("Platform does not support setting cwnd.") + if self.original_primary_dns is not None: + self.set_primary_dns(self.original_primary_dns) + self.original_primary_dns = None def get_cwnd(self): - logging.error("Platform does not support getting cwnd.") + return None - def get_ipfw_queue_slots(self): - return self._IPFW_QUEUE_SLOTS + def _set_cwnd(self, args): + pass + + def get_original_cwnd(self): + if not self.original_cwnd: + self.original_cwnd = self.get_cwnd() + return self.original_cwnd + + def set_cwnd(self, cwnd): + self.get_original_cwnd() + self._set_cwnd(cwnd) + if self.get_cwnd() == cwnd: + logging.info("Changed cwnd to %s", cwnd) + else: + logging.error("Unable to update cwnd to %s", cwnd) + + def restore_cwnd(self): + if self.original_cwnd is not None: + self.set_cwnd(self.original_cwnd) + self.original_cwnd = None + + def _ipfw_bin(self): + raise NotImplementedError + + def ipfw(self, *args): + ipfw_args = [self._ipfw_bin()] + [str(a) for a in args] + return _check_output(*ipfw_args) def get_server_ip_address(self, is_server_mode=False): """Returns the IP address to use for dnsproxy, httpproxy, and ipfw.""" @@ -135,14 +166,54 @@ class PlatformSettings(object): def unconfigure_loopback(self): pass + def get_system_logging_handler(self): + """Return a handler for the logging module (optional).""" + return None + + def ping(self, hostname): + """Pings the hostname by calling the OS system ping command. + Also stores the result internally. + + Args: + hostname: hostname of the server to be pinged + Returns: + round trip time to the server in seconds, or 0 if unable to calculate RTT + """ + raise NotImplementedError + + def rerun_as_administrator(self): + """If needed, rerun the program with administrative privileges. + + Raises NotAdministratorError if unable to rerun. + """ + pass + + def get_certfile_name(self): + """Get the file name for a temporary self-signed certificate.""" + raise NotImplementedError + + def create_certfile(self, certfile): + """Create a certfile for serving SSL traffic.""" + raise NotImplementedError + + def timer(self): + """Return the current time in seconds as a floating point number.""" + return time.time() + class PosixPlatformSettings(PlatformSettings): - _IPFW_BIN = 'ipfw' + PING_PATTERN = r'rtt min/avg/max/mdev = \d+\.\d+/(\d+\.\d+)/\d+\.\d+/\d+\.\d+' + PING_CMD = ('ping', '-c', '3', '-i', '0.2', '-W', '1') + # For OsX Lion non-root: + PING_RESTRICTED_CMD = ('ping', '-c', '1', '-i', '1', '-W', '1') def _get_dns_update_error(self): return DnsUpdateError('Did you run under sudo?') def _sysctl(self, *args): + sysctl = '/usr/sbin/sysctl' + if not os.path.exists(sysctl): + sysctl = '/sbin/sysctl' sysctl = subprocess.Popen( ['sysctl'] + [str(a) for a in args], stdin=subprocess.PIPE, stdout=subprocess.PIPE) @@ -150,7 +221,11 @@ class PosixPlatformSettings(PlatformSettings): return sysctl.returncode, stdout def has_sysctl(self, name): - return self._sysctl(name)[0] == 0 + if not hasattr(self, 'has_sysctl_cache'): + self.has_sysctl_cache = {} + if name not in self.has_sysctl_cache: + self.has_sysctl_cache[name] = self._sysctl(name)[0] == 0 + return self.has_sysctl_cache[name] def set_sysctl(self, name, value): rv = self._sysctl('%s=%s' % (name, value))[0] @@ -165,17 +240,97 @@ class PosixPlatformSettings(PlatformSettings): logging.error("Unable to get sysctl %s: %s", name, rv) return None + def _check_output(self, *args): + """Allow tests to override this.""" + return _check_output(*args) + + def _ping(self, hostname): + """Return ping output or None if ping fails. + + Initially pings 'localhost' to test for ping command that works. + If the tests fails, subsequent calls will return None without calling ping. + + Args: + hostname: host to ping + Returns: + ping stdout string, or None if ping unavailable + Raises: + CalledProcessError if ping returns non-zero exit + """ + if not hasattr(self, 'ping_cmd'): + test_host = 'localhost' + for self.ping_cmd in (self.PING_CMD, self.PING_RESTRICTED_CMD): + try: + if self._ping(test_host): + break + except (CalledProcessError, OSError) as e: + last_ping_error = e + else: + logging.critical('Ping configuration failed: %s', last_ping_error) + self.ping_cmd = None + if self.ping_cmd: + cmd = list(self.ping_cmd) + [hostname] + return self._check_output(*cmd) + return None + + def ping(self, hostname): + """Pings the hostname by calling the OS system ping command. + + Args: + hostname: hostname of the server to be pinged + Returns: + round trip time to the server in milliseconds, or 0 if unavailable + """ + rtt = 0 + output = None + try: + output = self._ping(hostname) + except CalledProcessError as e: + logging.critical('Ping failed: %s', e) + if output: + match = re.search(self.PING_PATTERN, output) + if match: + rtt = float(match.groups()[0]) + else: + logging.warning('Unable to ping %s: %s', hostname, output) + return rtt + + def rerun_as_administrator(self): + """If needed, rerun the program with administrative privileges. + + Raises NotAdministratorError if unable to rerun. + """ + if os.geteuid() != 0: + logging.warn("Rerunning with sudo: %s", sys.argv) + os.execv('/usr/bin/sudo', ['--'] + sys.argv) + + def get_certfile_name(self): + """Get the file name for a temporary self-signed certificate.""" + return os.path.join(tempfile.gettempdir(), self._CERT_FILE) + + def create_certfile(self, certfile): + """Create a certfile for serving SSL traffic.""" + if not os.path.exists(certfile): + _check_output( + '/usr/bin/openssl', 'req', '-batch', '-new', '-x509', '-days', '365', + '-nodes', '-out', certfile, '-keyout', certfile) + + def _ipfw_bin(self): + for ipfw in ['/usr/local/sbin/ipfw', '/sbin/ipfw']: + if os.path.exists(ipfw): + return ipfw + raise PlatformSettingsError("ipfw not found.") class OsxPlatformSettings(PosixPlatformSettings): LOCAL_SLOWSTART_MIB_NAME = 'net.inet.tcp.local_slowstart_flightsize' def _scutil(self, cmd): scutil = subprocess.Popen( - ['scutil'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) + ['/usr/sbin/scutil'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) return scutil.communicate(cmd)[0] def _ifconfig(self, *args): - return _check_output('ifconfig', *args) + return _check_output('/sbin/ifconfig', *args) def set_sysctl(self, name, value): rv = self._sysctl('-w', '%s=%s' % (name, value))[0] @@ -194,7 +349,7 @@ class OsxPlatformSettings(PosixPlatformSettings): key_value = line.split(' : ') if key_value[0] == ' PrimaryService': return 'State:/Network/Service/%s/DNS' % key_value[1] - raise self._get_dns_update_error() + raise DnsReadError('Unable to find DNS service key: %s', output) def get_primary_dns(self): # { @@ -205,9 +360,14 @@ class OsxPlatformSettings(PosixPlatformSettings): # DomainName : apple.co.uk # } output = self._scutil('show %s' % self._get_dns_service_key()) - primary_line = output.split('\n')[2] - line_parts = primary_line.split(' ') - return line_parts[-1] + match = re.search( + br'ServerAddresses\s+:\s+\s+{\s+0\s+:\s+((\d{1,3}\.){3}\d{1,3})', + output) + if match: + return match.group(1) + else: + raise DnsReadError('Unable to find primary DNS server: %s', output) + def _set_primary_dns(self, dns): command = '\n'.join([ @@ -217,22 +377,18 @@ class OsxPlatformSettings(PosixPlatformSettings): ]) self._scutil(command) + def get_cwnd(self): + return int(self.get_sysctl(self.LOCAL_SLOWSTART_MIB_NAME)) + + def _set_cwnd(self, size): + self.set_sysctl(self.LOCAL_SLOWSTART_MIB_NAME, size) + def get_loopback_mtu(self): config = self._ifconfig('lo0') match = re.search(r'\smtu\s+(\d+)', config) if match: return int(match.group(1)) - else: - return None - - def is_cwnd_available(self): - return True - - def set_cwnd(self, size): - self.set_sysctl(self.LOCAL_SLOWSTART_MIB_NAME, size) - - def get_cwnd(self): - return int(self.get_sysctl(self.LOCAL_SLOWSTART_MIB_NAME)) + return None def configure_loopback(self): """Configure loopback to use reasonably sized frames. @@ -240,19 +396,20 @@ class OsxPlatformSettings(PosixPlatformSettings): OS X uses jumbo frames by default (16KB). """ TARGET_LOOPBACK_MTU = 1500 - loopback_mtu = self.get_loopback_mtu() - if loopback_mtu and loopback_mtu != TARGET_LOOPBACK_MTU: - self.saved_loopback_mtu = loopback_mtu + self.original_loopback_mtu = self.get_loopback_mtu() + if self.original_loopback_mtu == TARGET_LOOPBACK_MTU: + self.original_loopback_mtu = None + if self.original_loopback_mtu is not None: self._ifconfig('lo0', 'mtu', TARGET_LOOPBACK_MTU) logging.debug('Set loopback MTU to %d (was %d)', - TARGET_LOOPBACK_MTU, loopback_mtu) + TARGET_LOOPBACK_MTU, self.original_loopback_mtu) else: logging.error('Unable to read loopback mtu. Setting left unchanged.') def unconfigure_loopback(self): - if hasattr(self, 'saved_loopback_mtu') and self.saved_loopback_mtu: - self._ifconfig('lo0', 'mtu', self.saved_loopback_mtu) - logging.debug('Restore loopback MTU to %d', self.saved_loopback_mtu) + if self.original_loopback_mtu is not None: + self._ifconfig('lo0', 'mtu', self.original_loopback_mtu) + logging.debug('Restore loopback MTU to %d', self.original_loopback_mtu) class LinuxPlatformSettings(PosixPlatformSettings): @@ -280,7 +437,6 @@ class LinuxPlatformSettings(PosixPlatformSettings): TCP_INIT_CWND = 'net.ipv4.tcp_init_cwnd' TCP_BASE_MSS = 'net.ipv4.tcp_base_mss' TCP_MTU_PROBING = 'net.ipv4.tcp_mtu_probing' - _IPFW_QUEUE_SLOTS = 500 def get_primary_dns(self): try: @@ -294,7 +450,12 @@ class LinuxPlatformSettings(PosixPlatformSettings): def _set_primary_dns(self, dns): """Replace the first nameserver entry with the one given.""" - self._write_resolve_conf(dns) + try: + self._write_resolve_conf(dns) + except OSError, e: + if 'Permission denied' in e: + raise self._get_dns_update_error() + raise def _write_resolve_conf(self, dns): is_first_nameserver_replaced = False @@ -306,17 +467,19 @@ class LinuxPlatformSettings(PosixPlatformSettings): else: print line, if not is_first_nameserver_replaced: - raise DnsUpdateError('Could not find a suitable namserver entry in %s' % + raise DnsUpdateError('Could not find a suitable nameserver entry in %s' % self.RESOLV_CONF) - def is_cwnd_available(self): - return self.has_sysctl(self.TCP_INIT_CWND) - - def set_cwnd(self, args): - self.set_sysctl(self.TCP_INIT_CWND, str(args)) - def get_cwnd(self): - return self.get_sysctl(self.TCP_INIT_CWND) + if self.has_sysctl(self.TCP_INIT_CWND): + return self.get_sysctl(self.TCP_INIT_CWND) + else: + return None + + def _set_cwnd(self, args): + if self.has_sysctl(self.TCP_INIT_CWND): + self.set_sysctl(self.TCP_INIT_CWND, str(args)) + def configure_loopback(self): """ @@ -351,27 +514,26 @@ class WindowsPlatformSettings(PlatformSettings): """Return DNS information: Example output: + Configuration for interface "Local Area Connection 3" + DNS servers configured through DHCP: None + Register with which suffix: Primary only - Configuration for interface "Local Area Connection 3" - DNS servers configured through DHCP: None - Register with which suffix: Primary only - - Configuration for interface "Wireless Network Connection 2" - DNS servers configured through DHCP: 192.168.1.1 - Register with which suffix: Primary only + Configuration for interface "Wireless Network Connection 2" + DNS servers configured through DHCP: 192.168.1.1 + Register with which suffix: Primary only """ return _check_output('netsh', 'interface', 'ip', 'show', 'dns') - def _netsh_get_interface_names(self): - return re.findall(r'"(.+?)"', self._netsh_show_dns()) - def get_primary_dns(self): match = re.search(r':\s+(\d+\.\d+\.\d+\.\d+)', self._netsh_show_dns()) return match and match.group(1) or None def _set_primary_dns(self, dns): - vbs = """Set objWMIService = GetObject("winmgmts:{impersonationLevel=impersonate}!\\\\.\\root\\cimv2") -Set colNetCards = objWMIService.ExecQuery("Select * From Win32_NetworkAdapterConfiguration Where IPEnabled = True") + vbs = """ +Set objWMIService = GetObject( _ + "winmgmts:{impersonationLevel=impersonate}!\\\\.\\root\\cimv2") +Set colNetCards = objWMIService.ExecQuery( _ + "Select * From Win32_NetworkAdapterConfiguration Where IPEnabled = True") For Each objNetCard in colNetCards arrDNSServers = Array("%s") objNetCard.SetDNSServerSearchOrder(arrDNSServers) @@ -394,14 +556,16 @@ Next def get_mac_address(self, ip): """Return the MAC address for the given ip.""" + ip_re = re.compile(r'^\s*IP(?:v4)? Address[ .]+:\s+([0-9.]+)') for line in self._ipconfig('/all').splitlines(): if line[:1].isalnum(): current_ip = None current_mac = None elif ':' in line: line = line.strip() - if line.startswith('IP Address'): - current_ip = line.split(':', 1)[1].lstrip() + ip_match = ip_re.match(line) + if ip_match: + current_ip = ip_match.group(1) elif line.startswith('Physical Address'): current_mac = line.split(':', 1)[1].lstrip() if current_ip == ip and current_mac: @@ -409,7 +573,6 @@ Next return None def configure_loopback(self): - # TODO(slamm): use/set ip address that is compat with replay.py self.ip = self.get_server_ip_address() self.mac_address = self.get_mac_address(self.ip) if self.mac_address: @@ -424,8 +587,56 @@ Next self._arp('-d', self.ip) self._route('delete', self.ip, self.ip, 'mask', '255.255.255.255') + def get_system_logging_handler(self): + """Return a handler for the logging module (optional). + + For Windows, output can be viewed with DebugView. + http://technet.microsoft.com/en-us/sysinternals/bb896647.aspx + """ + import ctypes + output_debug_string = ctypes.windll.kernel32.OutputDebugStringA + output_debug_string.argtypes = [ctypes.c_char_p] + class DebugViewHandler(logging.Handler): + def emit(self, record): + output_debug_string("[wpr] " + self.format(record)) + return DebugViewHandler() + + def rerun_as_administrator(self): + """If needed, rerun the program with administrative privileges. + + Raises NotAdministratorError if unable to rerun. + """ + import ctypes + if ctypes.windll.shell32.IsUserAnAdmin(): + raise NotAdministratorError('Rerun with administrator privileges.') + #os.execv('runas', sys.argv) # TODO: replace needed Windows magic + + def get_certfile_name(self): + """Get the file name for a temporary self-signed certificate.""" + raise PlatformSettingsError('Certificate file does not exist.') + + def create_certfile(self, certfile): + """Create a certfile for serving SSL traffic and return its name. + + TODO: Check for Windows SDK makecert.exe tool. + """ + raise PlatformSettingsError('Certificate file does not exist.') + + def timer(self): + """Return the current time in seconds as a floating point number. + + From time module documentation: + On Windows, this function [time.clock()] returns wall-clock + seconds elapsed since the first call to this function, as a + floating point number, based on the Win32 function + QueryPerformanceCounter(). The resolution is typically better + than one microsecond. + """ + return time.clock() + class WindowsXpPlatformSettings(WindowsPlatformSettings): - _IPFW_BIN = r'third_party\ipfw_win32\ipfw.exe' + def _ipfw_bin(self): + return r'third_party\ipfw_win32\ipfw.exe' def _new_platform_settings(): diff --git a/wpr/platformsettings_test.py b/wpr/platformsettings_test.py new file mode 100755 index 0000000..9142a23 --- /dev/null +++ b/wpr/platformsettings_test.py @@ -0,0 +1,245 @@ +#!/usr/bin/env python +# Copyright 2011 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests for platformsettings. + +Usage: +$ ./platformsettings_test.py +""" + +import unittest + +import platformsettings + +WINDOWS_7_IP = '172.11.25.170' +WINDOWS_7_MAC = '00-1A-44-DA-88-C0' +WINDOWS_7_IPCONFIG = """ +Windows IP Configuration + + Host Name . . . . . . . . . . . . : THEHOST1-W + Primary Dns Suffix . . . . . . . : something.example.com + Node Type . . . . . . . . . . . . : Hybrid + IP Routing Enabled. . . . . . . . : No + WINS Proxy Enabled. . . . . . . . : No + DNS Suffix Search List. . . . . . : example.com + another.example.com + +Ethernet adapter Local Area Connection: + + Connection-specific DNS Suffix . : somethingexample.com + Description . . . . . . . . . . . : Int PRO/1000 MT Network Connection + Physical Address. . . . . . . . . : %(mac_addr)s + DHCP Enabled. . . . . . . . . . . : Yes + Autoconfiguration Enabled . . . . : Yes + IPv6 Address. . . . . . . . . . . : 1234:0:1000:1200:839f:d256:3a6c:210(Preferred) + Temporary IPv6 Address. . . . . . : 2143:0:2100:1800:38f9:2d65:a3c6:120(Preferred) + Link-local IPv6 Address . . . . . : abcd::1234:1a33:b2cc:238%%18(Preferred) + IPv4 Address. . . . . . . . . . . : %(ip_addr)s(Preferred) + Subnet Mask . . . . . . . . . . . : 255.255.248.0 + Lease Obtained. . . . . . . . . . : Thursday, April 28, 2011 9:40:22 PM + Lease Expires . . . . . . . . . . : Tuesday, May 10, 2011 12:15:48 PM + Default Gateway . . . . . . . . . : abcd::2:37ee:ef70:56%%18 + 172.11.25.254 + DHCP Server . . . . . . . . . . . : 172.11.22.33 + DNS Servers . . . . . . . . . . . : 8.8.4.4 + NetBIOS over Tcpip. . . . . . . . : Enabled +""" % { 'ip_addr': WINDOWS_7_IP, 'mac_addr': WINDOWS_7_MAC } + +WINDOWS_XP_IP = '172.1.2.3' +WINDOWS_XP_MAC = '00-34-B8-1F-FA-70' +WINDOWS_XP_IPCONFIG = """ +Windows IP Configuration + + Host Name . . . . . . . . . . . . : HOSTY-0 + Primary Dns Suffix . . . . . . . : + Node Type . . . . . . . . . . . . : Unknown + IP Routing Enabled. . . . . . . . : No + WINS Proxy Enabled. . . . . . . . : No + DNS Suffix Search List. . . . . . : example.com + +Ethernet adapter Local Area Connection 2: + + Connection-specific DNS Suffix . : example.com + Description . . . . . . . . . . . : Int Adapter (PILA8470B) + Physical Address. . . . . . . . . : %(mac_addr)s + Dhcp Enabled. . . . . . . . . . . : Yes + Autoconfiguration Enabled . . . . : Yes + IP Address. . . . . . . . . . . . : %(ip_addr)s + Subnet Mask . . . . . . . . . . . : 255.255.254.0 + Default Gateway . . . . . . . . . : 172.1.2.254 + DHCP Server . . . . . . . . . . . : 172.1.3.241 + DNS Servers . . . . . . . . . . . : 172.1.3.241 + 8.8.8.8 + 8.8.4.4 + Lease Obtained. . . . . . . . . . : Thursday, April 07, 2011 9:14:55 AM + Lease Expires . . . . . . . . . . : Thursday, April 07, 2011 1:14:55 PM +""" % { 'ip_addr': WINDOWS_XP_IP, 'mac_addr': WINDOWS_XP_MAC } + + +# scutil show State:/Network/Global/IPv4 +OSX_IPV4_STATE = """ + { + PrimaryInterface : en1 + PrimaryService : 8824452C-FED4-4C09-9256-40FB146739E0 + Router : 192.168.1.1 +} +""" + +# scutil show State:/Network/Service/[PRIMARY_SERVICE_KEY]/DNS +OSX_DNS_STATE_LION = """ + { + DomainName : mtv.corp.google.com + SearchDomains : { + 0 : mtv.corp.google.com + 1 : corp.google.com + 2 : prod.google.com + 3 : prodz.google.com + 4 : google.com + } + ServerAddresses : { + 0 : 172.72.255.1 + 1 : 172.49.117.57 + 2 : 172.54.116.57 + } +} +""" + +OSX_DNS_STATE_SNOW_LEOPARD = """ + { + ServerAddresses : { + 0 : 172.27.1.1 + 1 : 172.94.117.57 + 2 : 172.45.116.57 + } + DomainName : mtv.corp.google.com + SearchDomains : { + 0 : mtv.corp.google.com + 1 : corp.google.com + 2 : prod.google.com + 3 : prodz.google.com + 4 : google.com + } +} +""" + + +class Win7Settings(platformsettings.WindowsPlatformSettings): + @classmethod + def _ipconfig(cls, *args): + if args == ('/all',): + return WINDOWS_7_IPCONFIG + raise RuntimeError + +class WinXpSettings(platformsettings.WindowsPlatformSettings): + @classmethod + def _ipconfig(cls, *args): + if args == ('/all',): + return WINDOWS_XP_IPCONFIG + raise RuntimeError + + +class WindowsPlatformSettingsTest(unittest.TestCase): + def test_get_mac_address_xp(self): + self.assertEqual(WINDOWS_XP_MAC, + WinXpSettings().get_mac_address(WINDOWS_XP_IP)) + + def test_get_mac_address_7(self): + self.assertEqual(WINDOWS_7_MAC, + Win7Settings().get_mac_address(WINDOWS_7_IP)) + + +class OsxSettings(platformsettings.OsxPlatformSettings): + def __init__(self): + super(OsxSettings, self).__init__() + self.ipv4_state = OSX_IPV4_STATE + self.dns_state = None # varies by test + + def _scutil(self, cmd): + if cmd == 'show State:/Network/Global/IPv4': + return self.ipv4_state + elif cmd.startswith('show State:/Network/Service/'): + return self.dns_state + raise RuntimeError("Unrecognized cmd: %s", cmd) + + +class OsxPlatformSettingsTest(unittest.TestCase): + def setUp(self): + self.settings = OsxSettings() + + def test_get_primary_dns_lion(self): + self.settings.dns_state = OSX_DNS_STATE_LION + self.assertEqual('172.72.255.1', self.settings.get_primary_dns()) + + def test_get_primary_dns_snow_leopard(self): + self.settings.dns_state = OSX_DNS_STATE_SNOW_LEOPARD + self.assertEqual('172.27.1.1', self.settings.get_primary_dns()) + + def test_get_primary_dns_unexpected_ipv4_state_raises(self): + self.settings.ipv4_state = 'Some error' + self.settings.dns_state = OSX_DNS_STATE_SNOW_LEOPARD + self.assertRaises(platformsettings.DnsReadError, + self.settings.get_primary_dns) + + def test_get_primary_dns_unexpected_dns_state_raises(self): + self.settings.dns_state = 'Some other error' + self.assertRaises(platformsettings.DnsReadError, + self.settings.get_primary_dns) + + +PING_OUTPUT = '''PING www.a.shifen.com (119.75.218.77) 56(84) bytes of data. + +--- www.a.shifen.com ping statistics --- +3 packets transmitted, 3 received, 0% packet loss, time 2204ms +rtt min/avg/max/mdev = 191.206/191.649/191.980/0.325 ms +''' +PING_AVG = 191.649 + +class PingSettings(platformsettings.PosixPlatformSettings): + def __init__(self): + super(PingSettings, self).__init__() + self.working_cmd = None + self.working_output = None + + def _check_output(self, *args): + if self.working_cmd and ' '.join(self.working_cmd) == ' '.join(args[:-1]): + return self.working_output + raise platformsettings.CalledProcessError(99, args) + +class PingTest(unittest.TestCase): + def setUp(self): + self.settings = PingSettings() + + def testNoWorkingPingReturnsZero(self): + self.assertEqual(0, self.settings.ping('www.noworking.com')) + + def testRegularPingCmdReturnsValue(self): + self.settings.working_cmd = self.settings.PING_CMD + self.settings.working_output = PING_OUTPUT + self.assertEqual(PING_AVG, self.settings.ping('www.regular.com')) + + def testRestrictedPingCmdReturnsValue(self): + self.settings.working_cmd = self.settings.PING_RESTRICTED_CMD + self.settings.working_output = PING_OUTPUT + self.assertEqual(PING_AVG, self.settings.ping('www.restricted.com')) + + def testNoWorkingPingConfiguresOnce(self): + self.settings.ping('www.first.com') + def AssertNotCalled(*args): + self.fail('Unexpected _check_output call.') + self.settings._check_output = AssertNotCalled + self.settings.ping('www.second.com') + +if __name__ == '__main__': + unittest.main() diff --git a/wpr/replay.py b/wpr/replay.py index 0c00ed5..63b7307 100755 --- a/wpr/replay.py +++ b/wpr/replay.py @@ -41,11 +41,11 @@ Network simulation examples: import logging import optparse -import socket +import os import sys -import time import traceback +import cachemissarchive import customhandlers import dnsproxy import httparchive @@ -53,111 +53,15 @@ import httpclient import httpproxy import platformsettings import replayspdyserver +import servermanager import trafficshaper - if sys.version < '2.6': print 'Need Python 2.6 or greater.' sys.exit(1) -def resolve_dns_to_remote_replay_server(platform_settings, dnsproxy_ip): - """Set the primary dns nameserver to the replay dnsproxy. - - Restore the original primary dns nameserver on exit. - - Args: - platform_settings: an instance of platformsettings.PlatformSettings - dnsproxy_ip: the ip address to use as the primary dns server. - """ - try: - platform_settings.set_primary_dns(dnsproxy_ip) - while True: - time.sleep(1) - except KeyboardInterrupt: - logging.info('Shutting down.') - finally: - platform_settings.restore_primary_dns() - - -def main(options, replay_filename): - exit_status = 0 - platform_settings = platformsettings.get_platform_settings() - if options.server: - resolve_dns_to_remote_replay_server(platform_settings, options.server) - return exit_status - host = platform_settings.get_server_ip_address(options.server_mode) - - web_server_class = httpproxy.HttpProxyServer - web_server_kwargs = { - 'host': host, - 'port': options.port, - } - if options.spdy: - assert not options.record, 'spdy cannot be used with --record.' - web_server_class = replayspdyserver.ReplaySpdyServer - web_server_kwargs['use_ssl'] = options.spdy != 'no-ssl' - web_server_kwargs['certfile'] = options.certfile - web_server_kwargs['keyfile'] = options.keyfile - - if options.record: - http_archive = httparchive.HttpArchive() - http_archive.AssertWritable(replay_filename) - else: - http_archive = httparchive.HttpArchive.Load(replay_filename) - logging.info('Loaded %d responses from %s', - len(http_archive), replay_filename) - - custom_handlers = customhandlers.CustomHandlers(options.screenshot_dir) - - real_dns_lookup = dnsproxy.RealDnsLookup() - if options.record: - http_archive_fetch = httpclient.RecordHttpArchiveFetch( - http_archive, real_dns_lookup, options.deterministic_script) - else: - http_archive_fetch = httpclient.ReplayHttpArchiveFetch( - http_archive, options.diff_unknown_requests) - - dns_passthrough_filter = None - if options.dns_private_passthrough: - skip_passthrough_hosts = set(request.host for request in http_archive) - dns_passthrough_filter = dnsproxy.DnsPrivatePassthroughFilter( - real_dns_lookup, skip_passthrough_hosts) - - dns_class = dnsproxy.DummyDnsServer - if options.dns_forwarding: - dns_class = dnsproxy.DnsProxyServer - - try: - with dns_class(options.dns_forwarding, dns_passthrough_filter, host): - with web_server_class(http_archive_fetch, custom_handlers, - **web_server_kwargs): - with trafficshaper.TrafficShaper( - host=host, - port=options.shaping_port, - up_bandwidth=options.up, - down_bandwidth=options.down, - delay_ms=options.delay_ms, - packet_loss_rate=options.packet_loss_rate, - init_cwnd=options.init_cwnd): - while True: - time.sleep(1) - except KeyboardInterrupt: - logging.info('Shutting down.') - except (dnsproxy.DnsProxyException, - trafficshaper.TrafficShaperException) as e: - logging.critical(e) - exit_status = 1 - except: - print traceback.format_exc() - exit_status = 2 - if options.record: - http_archive.Persist(replay_filename) - logging.info('Saved %d responses to %s', len(http_archive), replay_filename) - return exit_status - - -def configure_logging(log_level_name, log_file_name=None): +def configure_logging(platform_settings, log_level_name, log_file_name=None): """Configure logging level and format. Args: @@ -170,14 +74,225 @@ def configure_logging(log_level_name, log_file_name=None): log_level = getattr(logging, log_level_name.upper()) log_format = '%(asctime)s %(levelname)s %(message)s' logging.basicConfig(level=log_level, format=log_format) + logger = logging.getLogger() if log_file_name: fh = logging.FileHandler(log_file_name) fh.setLevel(log_level) fh.setFormatter(logging.Formatter(log_format)) - logging.getLogger().addHandler(fh) + logger.addHandler(fh) + system_handler = platform_settings.get_system_logging_handler() + if system_handler: + logger.addHandler(system_handler) -if __name__ == '__main__': +def AddDnsForward(server_manager, platform_settings, host): + """Forward DNS traffic.""" + server_manager.AppendStartStopFunctions( + [platform_settings.set_primary_dns, host], + [platform_settings.restore_primary_dns]) + +def AddDnsProxy(server_manager, options, host, real_dns_lookup, http_archive): + dns_lookup = None + if options.dns_private_passthrough: + dns_lookup = dnsproxy.PrivateIpDnsLookup( + host, real_dns_lookup, http_archive) + server_manager.AppendRecordCallback(dns_lookup.InitializeArchiveHosts) + server_manager.AppendReplayCallback(dns_lookup.InitializeArchiveHosts) + server_manager.Append(dnsproxy.DnsProxyServer, dns_lookup, host) + + +def AddWebProxy(server_manager, options, host, real_dns_lookup, http_archive, + cache_misses): + inject_script = httpclient.GetInjectScript(options.inject_scripts.split(',')) + http_custom_handlers = customhandlers.CustomHandlers(options.screenshot_dir) + if options.spdy: + assert not options.record, 'spdy cannot be used with --record.' + http_archive_fetch = httpclient.ReplayHttpArchiveFetch( + http_archive, + inject_script, + options.diff_unknown_requests, + cache_misses=cache_misses, + use_closest_match=options.use_closest_match) + server_manager.Append( + replayspdyserver.ReplaySpdyServer, http_archive_fetch, + http_custom_handlers, host=host, port=options.port, + certfile=options.certfile) + else: + http_custom_handlers.add_server_manager_handler(server_manager) + http_archive_fetch = httpclient.ControllableHttpArchiveFetch( + http_archive, real_dns_lookup, + inject_script, + options.diff_unknown_requests, options.record, + cache_misses=cache_misses, use_closest_match=options.use_closest_match) + server_manager.AppendRecordCallback(http_archive_fetch.SetRecordMode) + server_manager.AppendReplayCallback(http_archive_fetch.SetReplayMode) + server_manager.Append( + httpproxy.HttpProxyServer, http_archive_fetch, http_custom_handlers, + host=host, port=options.port, use_delays=options.use_server_delay) + if options.ssl: + server_manager.Append( + httpproxy.HttpsProxyServer, http_archive_fetch, + http_custom_handlers, options.certfile, + host=host, port=options.ssl_port, use_delays=options.use_server_delay) + + +def AddTrafficShaper(server_manager, options, host): + if options.HasTrafficShaping(): + server_manager.Append( + trafficshaper.TrafficShaper, host=host, port=options.shaping_port, + ssl_port=(options.ssl_shaping_port if options.ssl else None), + up_bandwidth=options.up, down_bandwidth=options.down, + delay_ms=options.delay_ms, packet_loss_rate=options.packet_loss_rate, + init_cwnd=options.init_cwnd, use_loopback=not options.server_mode) + + +class OptionsWrapper(object): + """Add checks, updates, and methods to option values. + + Example: + options, args = option_parser.parse_args() + options = OptionsWrapper(options, option_parser) # run checks and updates + if options.record and options.HasTrafficShaping(): + [...] + """ + _TRAFFICSHAPING_OPTIONS = set( + ['down', 'up', 'delay_ms', 'packet_loss_rate', 'init_cwnd', 'net']) + _CONFLICTING_OPTIONS = ( + ('record', ('down', 'up', 'delay_ms', 'packet_loss_rate', 'net', + 'spdy', 'use_server_delay')), + ('net', ('down', 'up', 'delay_ms')), + ('server', ('server_mode',)), + ) + # The --net values come from http://www.webpagetest.org/. + # https://sites.google.com/a/webpagetest.org/docs/other-resources/2011-fcc-broadband-data + _NET_CONFIGS = ( + # key --down --up --delay_ms + ('dsl', ('1536Kbit/s', '384Kbit/s', '50')), + ('cable', ( '5Mbit/s', '1Mbit/s', '28')), + ('fios', ( '20Mbit/s', '5Mbit/s', '4')), + ) + NET_CHOICES = [key for key, values in _NET_CONFIGS] + + def __init__(self, options, parser): + self._options = options + self._parser = parser + self._nondefaults = set([ + name for name, value in parser.defaults.items() + if getattr(options, name) != value]) + self._CheckConflicts() + self._MassageValues() + + def _CheckConflicts(self): + """Give an error if mutually exclusive options are used.""" + for option, bad_options in self._CONFLICTING_OPTIONS: + if option in self._nondefaults: + for bad_option in bad_options: + if bad_option in self._nondefaults: + self._parser.error('Option --%s cannot be used with --%s.' % + (bad_option, option)) + + def _MassageValues(self): + """Set options that depend on the values of other options.""" + for net_choice, values in self._NET_CONFIGS: + if net_choice == self.net: + self._options.down, self._options.up, self._options.delay_ms = values + if not self.shaping_port: + self._options.shaping_port = self.port + if not self.ssl_shaping_port: + self._options.ssl_shaping_port = self.ssl_port + if not self.ssl: + self._options.certfile = None + + def __getattr__(self, name): + """Make the original option values available.""" + return getattr(self._options, name) + + def HasTrafficShaping(self): + """Returns True iff the options require traffic shaping.""" + return bool(self._TRAFFICSHAPING_OPTIONS & self._nondefaults) + + def IsRootRequired(self): + """Returns True iff the options require root access.""" + return (self.HasTrafficShaping() or + self.dns_forwarding or + self.port < 1024 or + self.ssl_port < 1024) + + +def replay(options, replay_filename): + platform_settings = platformsettings.get_platform_settings() + if options.IsRootRequired(): + platform_settings.rerun_as_administrator() + configure_logging(platform_settings, options.log_level, options.log_file) + server_manager = servermanager.ServerManager(options.record) + cache_misses = None + if options.cache_miss_file: + if os.path.exists(options.cache_miss_file): + logging.warning('Cache Miss Archive file %s already exists; ' + 'replay will load and append entries to archive file', + options.cache_miss_file) + cache_misses = cachemissarchive.CacheMissArchive.Load( + options.cache_miss_file) + else: + cache_misses = cachemissarchive.CacheMissArchive( + options.cache_miss_file) + if options.server: + AddDnsForward(server_manager, platform_settings, options.server) + else: + host = platform_settings.get_server_ip_address(options.server_mode) + real_dns_lookup = dnsproxy.RealDnsLookup( + name_servers=[platform_settings.get_original_primary_dns()]) + if options.record: + http_archive = httparchive.HttpArchive() + http_archive.AssertWritable(replay_filename) + else: + http_archive = httparchive.HttpArchive.Load(replay_filename) + logging.info('Loaded %d responses from %s', + len(http_archive), replay_filename) + server_manager.AppendRecordCallback(real_dns_lookup.ClearCache) + server_manager.AppendRecordCallback(http_archive.clear) + + if options.dns_forwarding: + if not options.server_mode: + AddDnsForward(server_manager, platform_settings, host) + AddDnsProxy(server_manager, options, host, real_dns_lookup, http_archive) + if options.ssl and options.certfile is None: + options.certfile = platform_settings.get_certfile_name() + server_manager.AppendStartStopFunctions( + [platform_settings.create_certfile, options.certfile], + [os.unlink, options.certfile]) + AddWebProxy(server_manager, options, host, real_dns_lookup, + http_archive, cache_misses) + AddTrafficShaper(server_manager, options, host) + + exit_status = 0 + try: + server_manager.Run() + except KeyboardInterrupt: + logging.info('Shutting down.') + except (dnsproxy.DnsProxyException, + trafficshaper.TrafficShaperException, + platformsettings.NotAdministratorError, + platformsettings.DnsUpdateError) as e: + logging.critical('%s: %s', e.__class__.__name__, e) + exit_status = 1 + except: + logging.critical(traceback.format_exc()) + exit_status = 2 + + if options.record: + http_archive.Persist(replay_filename) + logging.info('Saved %d responses to %s', len(http_archive), replay_filename) + if cache_misses: + cache_misses.Persist() + logging.info('Saved %d cache misses and %d requests to %s', + cache_misses.get_total_cache_misses(), + len(cache_misses.request_counts.keys()), + options.cache_miss_file) + return exit_status + + +def main(): class PlainHelpFormatter(optparse.IndentedHelpFormatter): def format_description(self, description): if description: @@ -190,10 +305,9 @@ if __name__ == '__main__': description=__doc__, epilog='http://code.google.com/p/web-page-replay/') - option_parser.add_option('-s', '--spdy', default=False, - action='store', - type='string', - help='Use spdy to replay relay_file. --spdy="no-ssl" uses SPDY without SSL.') + option_parser.add_option('--spdy', default=False, + action='store_true', + help='Replay via SPDY. (Can be combined with --no-ssl).') option_parser.add_option('-r', '--record', default=False, action='store_true', help='Download real responses and record them to replay_file') @@ -206,6 +320,12 @@ if __name__ == '__main__': action='store', type='string', help='Log file to use in addition to writting logs to stderr.') + option_parser.add_option('-e', '--cache_miss_file', default=None, + action='store', + dest='cache_miss_file', + type='string', + help='Archive file to record cache misses as pickled objects.' + 'Cache misses occur when a request cannot be served in replay mode.') network_group = optparse.OptionGroup(option_parser, 'Network Simulation Options', @@ -230,6 +350,12 @@ if __name__ == '__main__': action='store', type='string', help='Set initial cwnd (linux only, requires kernel patch)') + network_group.add_option('--net', default=None, + action='store', + type='choice', + choices=OptionsWrapper.NET_CHOICES, + help='Select a set of network options: %s.' % ', '.join( + OptionsWrapper.NET_CHOICES)) option_parser.add_option_group(network_group) harness_group = optparse.OptionGroup(option_parser, @@ -246,17 +372,28 @@ if __name__ == '__main__': 'without changing the primary DNS nameserver. ' 'Other hosts may connect to this using "replay.py --server" ' 'or by pointing their DNS to this server.') - harness_group.add_option('-n', '--no-deterministic_script', default=True, + harness_group.add_option('-i', '--inject_scripts', default='deterministic.js', + action='store', + dest='inject_scripts', + help='A comma separated list of JavaScript sources to inject in all ' + 'pages. By default a script is injected that eliminates sources ' + 'of entropy such as Date() and Math.random() deterministic. ' + 'CAUTION: Without deterministic.js, many pages will not replay.') + harness_group.add_option('-D', '--no-diff_unknown_requests', default=True, action='store_false', - dest='deterministic_script', - help='During a record, do not inject JavaScript to make sources of ' - 'entropy such as Date() and Math.random() deterministic. CAUTION: ' - 'With this option many web pages will not replay properly.') - harness_group.add_option('-D', '--diff_unknown_requests', default=False, - action='store_true', dest='diff_unknown_requests', - help='During replay, show a unified diff of any unknown requests against ' + help='During replay, do not show a diff of unknown requests against ' 'their nearest match in the archive.') + harness_group.add_option('-C', '--use_closest_match', default=False, + action='store_true', + dest='use_closest_match', + help='During replay, if a request is not found, serve the closest match' + 'in the archive instead of giving a 404.') + harness_group.add_option('-U', '--use_server_delay', default=False, + action='store_true', + dest='use_server_delay', + help='During replay, simulate server delay by delaying response time to' + 'requests.') harness_group.add_option('-I', '--screenshot_dir', default=None, action='store', type='string', @@ -270,33 +407,39 @@ if __name__ == '__main__': harness_group.add_option('-x', '--no-dns_forwarding', default=True, action='store_false', dest='dns_forwarding', - help='Don\'t forward DNS requests to the local replay server.' + help='Don\'t forward DNS requests to the local replay server. ' 'CAUTION: With this option an external mechanism must be used to ' 'forward traffic to the replay server.') harness_group.add_option('-o', '--port', default=80, action='store', type='int', help='Port number to listen on.') - harness_group.add_option('--shaping_port', default=0, + harness_group.add_option('--ssl_port', default=443, action='store', type='int', - help='Port to apply traffic shaping to. \'0\' means use the same ' - 'port as the listen port (--port)') - harness_group.add_option('-c', '--certfile', default='', + help='SSL port number to listen on.') + harness_group.add_option('--shaping_port', default=None, action='store', - dest='certfile', - type='string', - help='Certificate file for use with SSL') - harness_group.add_option('-k', '--keyfile', default='', + type='int', + help='Port on which to apply traffic shaping. Defaults to the ' + 'listen port (--port)') + harness_group.add_option('--ssl_shaping_port', default=None, + action='store', + type='int', + help='SSL port on which to apply traffic shaping. Defaults to the ' + 'SSL listen port (--ssl_port)') + harness_group.add_option('-c', '--certfile', default=None, action='store', - dest='keyfile', type='string', - help='Key file for use with SSL') + help='Certificate file to use with SSL (gets auto-generated if needed).') + harness_group.add_option('--no-ssl', default=True, + action='store_false', + dest='ssl', + help='Do not setup an SSL proxy.') option_parser.add_option_group(harness_group) options, args = option_parser.parse_args() - - configure_logging(options.log_level, options.log_file) + options = OptionsWrapper(options, option_parser) if options.server: replay_filename = None @@ -305,23 +448,8 @@ if __name__ == '__main__': else: replay_filename = args[0] - if options.record: - if options.up != '0': - option_parser.error('Option --up cannot be used with --record.') - if options.down != '0': - option_parser.error('Option --down cannot be used with --record.') - if options.delay_ms != '0': - option_parser.error('Option --delay_ms cannot be used with --record.') - if options.packet_loss_rate != '0': - option_parser.error( - 'Option --packet_loss_rate cannot be used with --record.') - if options.spdy: - option_parser.error('Option --spdy cannot be used with --record.') + return replay(options, replay_filename) - if options.server and options.server_mode: - option_parser.error('Cannot run with both --server and --server_mode') - if options.shaping_port == 0: - options.shaping_port = options.port - - sys.exit(main(options, replay_filename)) +if __name__ == '__main__': + sys.exit(main()) diff --git a/wpr/replayspdyserver.py b/wpr/replayspdyserver.py index 76a5295..bdcd96e 100755 --- a/wpr/replayspdyserver.py +++ b/wpr/replayspdyserver.py @@ -32,8 +32,12 @@ VERSION = 'version' class ReplaySpdyServer(daemonserver.DaemonServer): def __init__(self, http_archive_fetch, custom_handlers, - host='localhost', port=80, - use_ssl=True, certfile=None, keyfile=None): + host='localhost', port=80, certfile=None, keyfile=None): + """Initialize ReplaySpdyServer. + + The private key may be stored in |certfile|. If so, |keyfile| + may be left unset. + """ #TODO(lzheng): figure out how to get the log level from main. self.log = logging.getLogger('ReplaySpdyServer') self.log.setLevel(logging.INFO) @@ -41,17 +45,10 @@ class ReplaySpdyServer(daemonserver.DaemonServer): self.custom_handlers = custom_handlers self.host = host self.port = port - self.use_ssl = use_ssl - if self.use_ssl and (not certfile or not keyfile): - self.log.error('SPDY SSL mode requires a keyfile and certificate file') - raise Exception('keyfile or certfile missing') - self.spdy_server = spdy_server.SpdyServer(host, - port, - self.use_ssl, - certfile, - keyfile, - self.request_handler, - self.log) + self.use_ssl = certfile is not None + self.spdy_server = spdy_server.SpdyServer( + host, port, self.use_ssl, certfile, keyfile, self.request_handler, + self.log) def serve_forever(self): self.log.info('Replaying with SPDY on %s:%d', self.host, self.port) @@ -66,54 +63,52 @@ class ReplaySpdyServer(daemonserver.DaemonServer): Based on method, host and uri to fetch the matching response and reply to browser using spdy. """ + dummy = http_common.dummy + def simple_responder(code, content): + res_hdrs = [('content-type', 'text/html'), ('version', 'HTTP/1.1')] + res_body, res_done = res_start(str(code), content, res_hdrs, dummy) + res_body(None) + res_done(None) + host = '' - for (name, value) in hdrs: + for name, value in hdrs: if name.lower() == 'host': host = value self.log.debug("request: %s, uri: %s, method: %s", host, uri, method) - dummy = http_common.dummy if method == 'GET': - request = httparchive.ArchivedHttpRequest(method, host, uri, None) + request = httparchive.ArchivedHttpRequest( + method, host, uri, None, dict(hdrs)) response_code = self.custom_handlers.handle(request) if response_code: - self.send_simple_response(response_code, "Handled by custom handlers") + simple_responder(response_code, "Handled by custom handlers") return dummy, dummy response = self.http_archive_fetch(request) if response: res_hdrs = [('version', 'HTTP/1.1')] - for (name, value) in response.headers: + for name, value in response.headers: name_lower = name.lower() - if name.lower() == CONTENT_LENGTH: + if name_lower == CONTENT_LENGTH: res_hdrs.append((name, str(value))) - elif name_lower == STATUS: - pass - elif name_lower == VERSION: + elif name_lower in (STATUS, VERSION): pass else: - res_hdrs.append((name, value)) - res_body, res_done = res_start(str(response.status), - response.reason, - res_hdrs, dummy) + res_hdrs.append((name_lower, value)) + res_body, res_done = res_start( + str(response.status), response.reason, res_hdrs, dummy) body = '' for item in response.response_data: res_body(item) res_done(None) else: self.log.error("404 returned: %s %s", method, uri) - self.send_simple_response(404, "file not found") + simple_responder(404, "file not found") else: # TODO(lzheng): Add support for other methods. self.log.error("method: %s is not supported: %s", method, uri) - self.send_simple_response(500, "Not supported") - + simple_responder(500, "Not supported") return dummy, dummy - def send_simple_response(self, code, phrase): - res_hdrs = [('Content-Type', 'text/html'), ('version', 'HTTP/1.1')] - res_body, res_done = res_start(str(code), phrase, res_hdrs, dummy) - res_body(None) - res_done(None) if __name__ == "__main__": logging.basicConfig() diff --git a/wpr/servermanager.py b/wpr/servermanager.py new file mode 100644 index 0000000..decaca3 --- /dev/null +++ b/wpr/servermanager.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python +# Copyright 2011 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Control "replay.py --server_mode" (e.g. switch from record to replay).""" + +import sys +import time + +class ServerManager(object): + """Run servers until is removed or an exception is raised. + + Servers start in the order they are appended and stop in the + opposite order. Servers are started by calling the initializer + passed to ServerManager.Append() and by calling __enter__(). Once an + server's initializer is called successfully, the __exit__() function + is guaranteed to be called when ServerManager.Run() completes. + """ + + def __init__(self, is_record_mode): + """Initialize a server manager.""" + self.initializers = [] + self.record_callbacks = [] + self.replay_callbacks = [] + self.is_record_mode = is_record_mode + + def Append(self, initializer, *init_args, **init_kwargs): + """Append a server to the end of the list to run. + + Servers start in the order they are appended and stop in the + opposite order. + + Args: + initializer: a function that returns a server instance. + A server needs to implement the with-statement interface. + init_args: positional arguments for the initializer. + init_args: keyword arguments for the initializer. + """ + self.initializers.append((initializer, init_args, init_kwargs)) + + def AppendStartStopFunctions(self, start_spec, stop_spec): + """Append functions to call before and after the main run-loop. + + If the enter function succeeds, then the exit function will be + called when shutting down. + + Args: + start_spec: (start_func, start_args_1, start_arg_2, ...) + # The arguments are optional. + stop_spec: (stop_func, stop_args_1, stop_arg_2, ...) + # The arguments are optional. + """ + class Context(object): + def __enter__(self): + start_spec[0](*start_spec[1:]) + def __exit__(self, type, value, traceback): + stop_spec[0](*stop_spec[1:]) + self.Append(Context) + + def AppendRecordCallback(self, func): + """Append a function to the list to call when switching to record mode. + + Args: + func: a function that takes no arguments and returns no value. + """ + self.record_callbacks.append(func) + + def AppendReplayCallback(self, func): + """Append a function to the list to call when switching to replay mode. + + Args: + func: a function that takes no arguments and returns no value. + """ + self.replay_callbacks.append(func) + + def IsRecordMode(self): + """Call all the functions that have been registered to enter replay mode.""" + return self.is_record_mode + + def SetRecordMode(self): + """Call all the functions that have been registered to enter record mode.""" + self.is_record_mode = True + for record_func in self.record_callbacks: + record_func() + + def SetReplayMode(self): + """Call all the functions that have been registered to enter replay mode.""" + self.is_record_mode = False + for replay_func in self.replay_callbacks: + replay_func() + + def Run(self): + """Create the servers and loop. + + The loop quits if a server raises an exception. + + Raises: + any exception raised by the servers + """ + server_exits = [] + exception_info = (None, None, None) + try: + for initializer, init_args, init_kwargs in self.initializers: + server = initializer(*init_args, **init_kwargs) + server_exits.insert(0, server.__exit__) + server.__enter__() + + while True: + time.sleep(1) + except: + exception_info = sys.exc_info() + finally: + for server_exit in server_exits: + try: + if server_exit(*exception_info): + exception_info = (None, None, None) + except: + exception_info = sys.exc_info() + if exception_info != (None, None, None): + raise exception_info[0], exception_info[1], exception_info[2] diff --git a/wpr/setup.cfg b/wpr/setup.cfg new file mode 100644 index 0000000..861a9f5 --- /dev/null +++ b/wpr/setup.cfg @@ -0,0 +1,5 @@ +[egg_info] +tag_build = +tag_date = 0 +tag_svn_revision = 0 + diff --git a/wpr/setup.py b/wpr/setup.py new file mode 100644 index 0000000..d6c5348 --- /dev/null +++ b/wpr/setup.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# Copyright 2012 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Creates a distributable python package. + +Creating new packages: + 1. Generate the package, dist/webpagereplay-X.X.tar.gz: + python setup.py sdist + 2. Upload the package file to the following: + http://code.google.com/p/web-page-replay/downloads/entry + +Installing packages: + $ easy_install http://web-page-replay.googlecode.com/files/webpagereplay-X.X.tar.gz + - The replay and httparchive commands are now on your PATH. +""" + +import setuptools + +setuptools.setup( + name='webpagereplay', + version='1.1.2', + description='Record and replay web content', + author='Web Page Replay Project Authors', + author_email='web-page-replay-dev@googlegroups.com', + url='http://code.google.com/p/web-page-replay/', + license='Apache License 2.0', + install_requires=['dnspython>=1.8'], + packages=[ + '', + 'perftracker', + 'third_party', + 'third_party.ipaddr', + 'third_party.nbhttp' + ], + package_dir={'': '.'}, + package_data={ + '': ['*.js', '*.txt', 'COPYING', 'LICENSE'], + }, + entry_points={ + 'console_scripts': [ + 'httparchive = httparchive:main', + 'replay = replay:main', + ] + }, + ) diff --git a/wpr/third_party/dns/README.web-page-replay b/wpr/third_party/dns/README.web-page-replay new file mode 100644 index 0000000..6d445fe --- /dev/null +++ b/wpr/third_party/dns/README.web-page-replay @@ -0,0 +1,12 @@ +Name: A DNS toolkit for Python +Short Name: dnspython +URL: http://www.dnspython.org/ +Version: 1.8.0 (found in ./version.py) +License: ISC +License File: LICENSE + +Description: +Used by Web Page Replay's dnsproxy module to create and handle dns queries. + +Local Modifications: +None. \ No newline at end of file diff --git a/wpr/third_party/ipaddr/README.web-page-replay b/wpr/third_party/ipaddr/README.web-page-replay new file mode 100644 index 0000000..4b42084 --- /dev/null +++ b/wpr/third_party/ipaddr/README.web-page-replay @@ -0,0 +1,12 @@ +Name: An IPv4/IPv6 manipulation library in Python. +Short Name: ipaddr-py +URL: https://code.google.com/p/ipaddr-py/ +Version: 2.1.10 (ipaddr.__version__) +License: Apache (v2.0) +License File: COPYING + +Description: +Used by Web Page Replay to check if an IP address is private. + +Local Modifications: +Cherry picked revision 728996d6b1d4 to add license boilerplate to test-2to3.sh. diff --git a/wpr/third_party/ipaddr/ipaddr.py b/wpr/third_party/ipaddr/ipaddr.py index d7eb222..ad27ae9 100644 --- a/wpr/third_party/ipaddr/ipaddr.py +++ b/wpr/third_party/ipaddr/ipaddr.py @@ -22,7 +22,7 @@ and networks. """ -__version__ = 'trunk' +__version__ = '2.1.10' import struct @@ -134,7 +134,7 @@ def v4_int_to_packed(address): """ if address > _BaseV4._ALL_ONES: raise ValueError('Address too large for IPv4') - return struct.pack('!I', address) + return Bytes(struct.pack('!I', address)) def v6_int_to_packed(address): @@ -146,7 +146,7 @@ def v6_int_to_packed(address): Returns: The binary representation of this address. """ - return struct.pack('!QQ', address >> 64, address & (2**64 - 1)) + return Bytes(struct.pack('!QQ', address >> 64, address & (2**64 - 1))) def _find_address_range(addresses): @@ -270,12 +270,12 @@ def _collapse_address_list_recursive(addresses): Example: - ip1 = IPv4Network'1.1.0.0/24') - ip2 = IPv4Network'1.1.1.0/24') - ip3 = IPv4Network'1.1.2.0/24') - ip4 = IPv4Network'1.1.3.0/24') - ip5 = IPv4Network'1.1.4.0/24') - ip6 = IPv4Network'1.1.0.1/22') + ip1 = IPv4Network('1.1.0.0/24') + ip2 = IPv4Network('1.1.1.0/24') + ip3 = IPv4Network('1.1.2.0/24') + ip4 = IPv4Network('1.1.3.0/24') + ip5 = IPv4Network('1.1.4.0/24') + ip6 = IPv4Network('1.1.0.1/22') _collapse_address_list_recursive([ip1, ip2, ip3, ip4, ip5, ip6]) -> [IPv4Network('1.1.0.0/22'), IPv4Network('1.1.4.0/24')] @@ -368,15 +368,27 @@ def collapse_address_list(addresses): # backwards compatibility CollapseAddrList = collapse_address_list -# Test whether this Python implementation supports byte objects that -# are not identical to str ones. -# We need to exclude platforms where bytes == str so that we can -# distinguish between packed representations and strings, for example -# b'12::' (the IPv4 address 49.50.58.58) and '12::' (an IPv6 address). +# We need to distinguish between the string and packed-bytes representations +# of an IP address. For example, b'0::1' is the IPv4 address 48.58.58.49, +# while '0::1' is an IPv6 address. +# +# In Python 3, the native 'bytes' type already provides this functionality, +# so we use it directly. For earlier implementations where bytes is not a +# distinct type, we create a subclass of str to serve as a tag. +# +# Usage example (Python 2): +# ip = ipaddr.IPAddress(ipaddr.Bytes('xxxx')) +# +# Usage example (Python 3): +# ip = ipaddr.IPAddress(b'xxxx') try: - _compat_has_real_bytes = bytes is not str -except NameError: # 255 or (octet_str[0] == '0' and len(octet_str) > 1): + raise ValueError + return octet_int + def _string_from_ip_int(self, ip_int): """Turns a 32-bit integer into dotted decimal notation. @@ -1059,37 +1089,6 @@ class _BaseV4(object): ip_int >>= 8 return '.'.join(octets) - def _is_valid_ip(self, address): - """Validate the dotted decimal notation IP/netmask string. - - Args: - address: A string, either representing a quad-dotted ip - or an integer which is a valid IPv4 IP address. - - Returns: - A boolean, True if the string is a valid dotted decimal IP - string. - - """ - octets = address.split('.') - if len(octets) == 1: - # We have an integer rather than a dotted decimal IP. - try: - return int(address) >= 0 and int(address) <= self._ALL_ONES - except ValueError: - return False - - if len(octets) != 4: - return False - - for octet in octets: - try: - if not 0 <= int(octet) <= 255: - return False - except ValueError: - return False - return True - @property def max_prefixlen(self): return self._max_prefixlen @@ -1190,7 +1189,6 @@ class IPv4Address(_BaseV4, _BaseIP): AddressValueError: If ipaddr isn't a valid IPv4 address. """ - _BaseIP.__init__(self, address) _BaseV4.__init__(self, address) # Efficient constructor from integer. @@ -1201,17 +1199,16 @@ class IPv4Address(_BaseV4, _BaseIP): return # Constructing from a packed address - if _compat_has_real_bytes: - if isinstance(address, bytes) and len(address) == 4: - self._ip = struct.unpack('!I', address)[0] - return + if isinstance(address, Bytes): + try: + self._ip, = struct.unpack('!I', address) + except struct.error: + raise AddressValueError(address) # Wrong length. + return # Assume input argument to be string or any object representation # which converts into a formatted IP string. addr_str = str(address) - if not self._is_valid_ip(addr_str): - raise AddressValueError(addr_str) - self._ip = self._ip_int_from_string(addr_str) @@ -1276,25 +1273,14 @@ class IPv4Network(_BaseV4, _BaseNet): _BaseNet.__init__(self, address) _BaseV4.__init__(self, address) - # Efficient constructor from integer. - if isinstance(address, (int, long)): - self._ip = address - self.ip = IPv4Address(self._ip) + # Constructing from an integer or packed bytes. + if isinstance(address, (int, long, Bytes)): + self.ip = IPv4Address(address) + self._ip = self.ip._ip self._prefixlen = self._max_prefixlen self.netmask = IPv4Address(self._ALL_ONES) - if address < 0 or address > self._ALL_ONES: - raise AddressValueError(address) return - # Constructing from a packed address - if _compat_has_real_bytes: - if isinstance(address, bytes) and len(address) == 4: - self._ip = struct.unpack('!I', address)[0] - self.ip = IPv4Address(self._ip) - self._prefixlen = self._max_prefixlen - self.netmask = IPv4Address(self._ALL_ONES) - return - # Assume input argument to be string or any object representation # which converts into a formatted IP prefix string. addr = str(address).split('/') @@ -1302,9 +1288,6 @@ class IPv4Network(_BaseV4, _BaseNet): if len(addr) > 2: raise AddressValueError(address) - if not self._is_valid_ip(addr[0]): - raise AddressValueError(addr[0]) - self._ip = self._ip_int_from_string(addr[0]) self.ip = IPv4Address(self._ip) @@ -1338,6 +1321,8 @@ class IPv4Network(_BaseV4, _BaseNet): if self.ip != self.network: raise ValueError('%s has host bits set' % self.ip) + if self._prefixlen == (self._max_prefixlen - 1): + self.iterhosts = self.__iter__ def _is_hostmask(self, ip_str): """Test if the IP string is a hostmask (rather than a netmask). @@ -1403,12 +1388,14 @@ class _BaseV6(object): """ _ALL_ONES = (2**IPV6LENGTH) - 1 + _HEXTET_COUNT = 8 + _HEX_DIGITS = frozenset('0123456789ABCDEFabcdef') def __init__(self, address): self._version = 6 self._max_prefixlen = IPV6LENGTH - def _ip_int_from_string(self, ip_str=None): + def _ip_int_from_string(self, ip_str): """Turn an IPv6 ip_str into an integer. Args: @@ -1418,35 +1405,95 @@ class _BaseV6(object): A long, the IPv6 ip_str. Raises: - AddressValueError: if ip_str isn't a valid IP Address. + AddressValueError: if ip_str isn't a valid IPv6 Address. """ - if not ip_str: - ip_str = str(self.ip) + parts = ip_str.split(':') - ip_int = 0 + # An IPv6 address needs at least 2 colons (3 parts). + if len(parts) < 3: + raise AddressValueError(ip_str) - # Do we have an IPv4 mapped (::ffff:a.b.c.d) or compact (::a.b.c.d) - # ip_str? - fields = ip_str.split(':') - if fields[-1].count('.') == 3: - ipv4_string = fields.pop() - ipv4_int = IPv4Network(ipv4_string)._ip - octets = [] - for _ in xrange(2): - octets.append(hex(ipv4_int & 0xFFFF).lstrip('0x').rstrip('L')) - ipv4_int >>= 16 - fields.extend(reversed(octets)) - ip_str = ':'.join(fields) + # If the address has an IPv4-style suffix, convert it to hexadecimal. + if '.' in parts[-1]: + ipv4_int = IPv4Address(parts.pop())._ip + parts.append('%x' % ((ipv4_int >> 16) & 0xFFFF)) + parts.append('%x' % (ipv4_int & 0xFFFF)) - fields = self._explode_shorthand_ip_string(ip_str).split(':') - for field in fields: - try: - ip_int = (ip_int << 16) + int(field or '0', 16) - except ValueError: + # An IPv6 address can't have more than 8 colons (9 parts). + if len(parts) > self._HEXTET_COUNT + 1: + raise AddressValueError(ip_str) + + # Disregarding the endpoints, find '::' with nothing in between. + # This indicates that a run of zeroes has been skipped. + try: + skip_index, = ( + [i for i in xrange(1, len(parts) - 1) if not parts[i]] or + [None]) + except ValueError: + # Can't have more than one '::' + raise AddressValueError(ip_str) + + # parts_hi is the number of parts to copy from above/before the '::' + # parts_lo is the number of parts to copy from below/after the '::' + if skip_index is not None: + # If we found a '::', then check if it also covers the endpoints. + parts_hi = skip_index + parts_lo = len(parts) - skip_index - 1 + if not parts[0]: + parts_hi -= 1 + if parts_hi: + raise AddressValueError(ip_str) # ^: requires ^:: + if not parts[-1]: + parts_lo -= 1 + if parts_lo: + raise AddressValueError(ip_str) # :$ requires ::$ + parts_skipped = self._HEXTET_COUNT - (parts_hi + parts_lo) + if parts_skipped < 1: raise AddressValueError(ip_str) + else: + # Otherwise, allocate the entire address to parts_hi. The endpoints + # could still be empty, but _parse_hextet() will check for that. + if len(parts) != self._HEXTET_COUNT: + raise AddressValueError(ip_str) + parts_hi = len(parts) + parts_lo = 0 + parts_skipped = 0 - return ip_int + try: + # Now, parse the hextets into a 128-bit integer. + ip_int = 0L + for i in xrange(parts_hi): + ip_int <<= 16 + ip_int |= self._parse_hextet(parts[i]) + ip_int <<= 16 * parts_skipped + for i in xrange(-parts_lo, 0): + ip_int <<= 16 + ip_int |= self._parse_hextet(parts[i]) + return ip_int + except ValueError: + raise AddressValueError(ip_str) + + def _parse_hextet(self, hextet_str): + """Convert an IPv6 hextet string into an integer. + + Args: + hextet_str: A string, the number to parse. + + Returns: + The hextet as an integer. + + Raises: + ValueError: if the input isn't strictly a hex number from [0..FFFF]. + + """ + # Whitelist the characters, since int() allows a lot of bizarre stuff. + if not self._HEX_DIGITS.issuperset(hextet_str): + raise ValueError + hextet_int = int(hextet_str, 16) + if hextet_int > 0xFFFF: + raise ValueError + return hextet_int def _compress_hextets(self, hextets): """Compresses a list of hextets. @@ -1522,7 +1569,7 @@ class _BaseV6(object): hextets = self._compress_hextets(hextets) return ':'.join(hextets) - def _explode_shorthand_ip_string(self, ip_str=None): + def _explode_shorthand_ip_string(self): """Expand a shortened IPv6 address. Args: @@ -1532,108 +1579,20 @@ class _BaseV6(object): A string, the expanded IPv6 address. """ - if not ip_str: + if isinstance(self, _BaseNet): + ip_str = str(self.ip) + else: ip_str = str(self) - if isinstance(self, _BaseNet): - ip_str = str(self.ip) - if self._is_shorthand_ip(ip_str): - new_ip = [] - hextet = ip_str.split('::') - - if len(hextet) > 1: - sep = len(hextet[0].split(':')) + len(hextet[1].split(':')) - new_ip = hextet[0].split(':') - - for _ in xrange(8 - sep): - new_ip.append('0000') - new_ip += hextet[1].split(':') - - else: - new_ip = ip_str.split(':') - # Now need to make sure every hextet is 4 lower case characters. - # If a hextet is < 4 characters, we've got missing leading 0's. - ret_ip = [] - for hextet in new_ip: - ret_ip.append(('0' * (4 - len(hextet)) + hextet).lower()) - return ':'.join(ret_ip) - # We've already got a longhand ip_str. - return ip_str - - def _is_valid_ip(self, ip_str): - """Ensure we have a valid IPv6 address. - - Probably not as exhaustive as it should be. - - Args: - ip_str: A string, the IPv6 address. - - Returns: - A boolean, True if this is a valid IPv6 address. - - """ - # We need to have at least one ':'. - if ':' not in ip_str: - return False - - # We can only have one '::' shortener. - if ip_str.count('::') > 1: - return False - - # '::' should be encompassed by start, digits or end. - if ':::' in ip_str: - return False - - # A single colon can neither start nor end an address. - if ((ip_str.startswith(':') and not ip_str.startswith('::')) or - (ip_str.endswith(':') and not ip_str.endswith('::'))): - return False - - # If we have no concatenation, we need to have 8 fields with 7 ':'. - if '::' not in ip_str and ip_str.count(':') != 7: - # We might have an IPv4 mapped address. - if ip_str.count('.') != 3: - return False - - ip_str = self._explode_shorthand_ip_string(ip_str) - - # Now that we have that all squared away, let's check that each of the - # hextets are between 0x0 and 0xFFFF. - for hextet in ip_str.split(':'): - if hextet.count('.') == 3: - # If we have an IPv4 mapped address, the IPv4 portion has to - # be at the end of the IPv6 portion. - if not ip_str.split(':')[-1] == hextet: - return False - try: - IPv4Network(hextet) - except AddressValueError: - return False - else: - try: - # a value error here means that we got a bad hextet, - # something like 0xzzzz - if int(hextet, 16) < 0x0 or int(hextet, 16) > 0xFFFF: - return False - except ValueError: - return False - return True - - def _is_shorthand_ip(self, ip_str=None): - """Determine if the address is shortened. - - Args: - ip_str: A string, the IPv6 address. - - Returns: - A boolean, True if the address is shortened. - - """ - if ip_str.count('::') == 1: - return True - if filter(lambda x: len(x) < 4, ip_str.split(':')): - return True - return False + ip_int = self._ip_int_from_string(ip_str) + parts = [] + for i in xrange(self._HEXTET_COUNT): + parts.append('%04x' % (ip_int & 0xFFFF)) + ip_int >>= 16 + parts.reverse() + if isinstance(self, _BaseNet): + return '%s/%d' % (':'.join(parts), self.prefixlen) + return ':'.join(parts) @property def max_prefixlen(self): @@ -1749,13 +1708,9 @@ class _BaseV6(object): IPv4 mapped address. Return None otherwise. """ - hextets = self._explode_shorthand_ip_string().split(':') - if hextets[-3] != 'ffff': - return None - try: - return IPv4Address(int('%s%s' % (hextets[-2], hextets[-1]), 16)) - except AddressValueError: + if (self._ip >> 32) != 0xFFFF: return None + return IPv4Address(self._ip & 0xFFFFFFFF) @property def teredo(self): @@ -1764,14 +1719,13 @@ class _BaseV6(object): Returns: Tuple of the (server, client) IPs or None if the address doesn't appear to be a teredo address (doesn't start with - 2001) + 2001::/32) """ - bits = self._explode_shorthand_ip_string().split(':') - if not bits[0] == '2001': + if (self._ip >> 96) != 0x20010000: return None - return (IPv4Address(int(''.join(bits[2:4]), 16)), - IPv4Address(int(''.join(bits[6:]), 16) ^ 0xFFFFFFFF)) + return (IPv4Address((self._ip >> 64) & 0xFFFFFFFF), + IPv4Address(~self._ip & 0xFFFFFFFF)) @property def sixtofour(self): @@ -1782,10 +1736,9 @@ class _BaseV6(object): address doesn't appear to contain a 6to4 embedded address. """ - bits = self._explode_shorthand_ip_string().split(':') - if not bits[0] == '2002': + if (self._ip >> 112) != 0x2002: return None - return IPv4Address(int(''.join(bits[1:3]), 16)) + return IPv4Address((self._ip >> 80) & 0xFFFFFFFF) class IPv6Address(_BaseV6, _BaseIP): @@ -1810,7 +1763,6 @@ class IPv6Address(_BaseV6, _BaseIP): AddressValueError: If address isn't a valid IPv6 address. """ - _BaseIP.__init__(self, address) _BaseV6.__init__(self, address) # Efficient constructor from integer. @@ -1821,11 +1773,13 @@ class IPv6Address(_BaseV6, _BaseIP): return # Constructing from a packed address - if _compat_has_real_bytes: - if isinstance(address, bytes) and len(address) == 16: - tmp = struct.unpack('!QQ', address) - self._ip = (tmp[0] << 64) | tmp[1] - return + if isinstance(address, Bytes): + try: + hi, lo = struct.unpack('!QQ', address) + except struct.error: + raise AddressValueError(address) # Wrong length. + self._ip = (hi << 64) | lo + return # Assume input argument to be string or any object representation # which converts into a formatted IP string. @@ -1833,9 +1787,6 @@ class IPv6Address(_BaseV6, _BaseIP): if not addr_str: raise AddressValueError('') - if not self._is_valid_ip(addr_str): - raise AddressValueError(addr_str) - self._ip = self._ip_int_from_string(addr_str) @@ -1889,26 +1840,14 @@ class IPv6Network(_BaseV6, _BaseNet): _BaseNet.__init__(self, address) _BaseV6.__init__(self, address) - # Efficient constructor from integer. - if isinstance(address, (int, long)): - self._ip = address - self.ip = IPv6Address(self._ip) + # Constructing from an integer or packed bytes. + if isinstance(address, (int, long, Bytes)): + self.ip = IPv6Address(address) + self._ip = self.ip._ip self._prefixlen = self._max_prefixlen self.netmask = IPv6Address(self._ALL_ONES) - if address < 0 or address > self._ALL_ONES: - raise AddressValueError(address) return - # Constructing from a packed address - if _compat_has_real_bytes: - if isinstance(address, bytes) and len(address) == 16: - tmp = struct.unpack('!QQ', address) - self._ip = (tmp[0] << 64) | tmp[1] - self.ip = IPv6Address(self._ip) - self._prefixlen = self._max_prefixlen - self.netmask = IPv6Address(self._ALL_ONES) - return - # Assume input argument to be string or any object representation # which converts into a formatted IP prefix string. addr = str(address).split('/') @@ -1916,8 +1855,8 @@ class IPv6Network(_BaseV6, _BaseNet): if len(addr) > 2: raise AddressValueError(address) - if not self._is_valid_ip(addr[0]): - raise AddressValueError(addr[0]) + self._ip = self._ip_int_from_string(addr[0]) + self.ip = IPv6Address(self._ip) if len(addr) == 2: if self._is_valid_netmask(addr[1]): @@ -1929,13 +1868,12 @@ class IPv6Network(_BaseV6, _BaseNet): self.netmask = IPv6Address(self._ip_int_from_prefix(self._prefixlen)) - self._ip = self._ip_int_from_string(addr[0]) - self.ip = IPv6Address(self._ip) - if strict: if self.ip != self.network: raise ValueError('%s has host bits set' % self.ip) + if self._prefixlen == (self._max_prefixlen - 1): + self.iterhosts = self.__iter__ def _is_valid_netmask(self, prefixlen): """Verify that the netmask/prefixlen is valid. diff --git a/wpr/third_party/ipaddr/ipaddr_test.py b/wpr/third_party/ipaddr/ipaddr_test.py index 64bc2b4..9446889 100755 --- a/wpr/third_party/ipaddr/ipaddr_test.py +++ b/wpr/third_party/ipaddr/ipaddr_test.py @@ -23,10 +23,10 @@ import time import ipaddr # Compatibility function to cast str to bytes objects -if ipaddr._compat_has_real_bytes: - _cb = lambda bytestr: bytes(bytestr, 'charmap') +if issubclass(ipaddr.Bytes, str): + _cb = ipaddr.Bytes else: - _cb = str + _cb = lambda bytestr: bytes(bytestr, 'charmap') class IpaddrUnitTest(unittest.TestCase): @@ -68,25 +68,72 @@ class IpaddrUnitTest(unittest.TestCase): ipaddr.IPv6Address('::1')) def testInvalidStrings(self): - self.assertRaises(ValueError, ipaddr.IPNetwork, '') - self.assertRaises(ValueError, ipaddr.IPNetwork, 'www.google.com') - self.assertRaises(ValueError, ipaddr.IPNetwork, '1.2.3') - self.assertRaises(ValueError, ipaddr.IPNetwork, '1.2.3.4.5') - self.assertRaises(ValueError, ipaddr.IPNetwork, '301.2.2.2') - self.assertRaises(ValueError, ipaddr.IPNetwork, '1:2:3:4:5:6:7') - self.assertRaises(ValueError, ipaddr.IPNetwork, '1:2:3:4:5:6:7:') - self.assertRaises(ValueError, ipaddr.IPNetwork, ':2:3:4:5:6:7:8') - self.assertRaises(ValueError, ipaddr.IPNetwork, '1:2:3:4:5:6:7:8:9') - self.assertRaises(ValueError, ipaddr.IPNetwork, '1:2:3:4:5:6:7:8:') - self.assertRaises(ValueError, ipaddr.IPNetwork, '1::3:4:5:6::8') - self.assertRaises(ValueError, ipaddr.IPNetwork, 'a:') - self.assertRaises(ValueError, ipaddr.IPNetwork, ':') - self.assertRaises(ValueError, ipaddr.IPNetwork, ':::') - self.assertRaises(ValueError, ipaddr.IPNetwork, '::a:') - self.assertRaises(ValueError, ipaddr.IPNetwork, '1ffff::') - self.assertRaises(ValueError, ipaddr.IPNetwork, '0xa::') - self.assertRaises(ValueError, ipaddr.IPNetwork, '1:2:3:4:5:6:1a.2.3.4') - self.assertRaises(ValueError, ipaddr.IPNetwork, '1:2:3:4:5:1.2.3.4:8') + def AssertInvalidIP(ip_str): + self.assertRaises(ValueError, ipaddr.IPAddress, ip_str) + AssertInvalidIP("") + AssertInvalidIP("016.016.016.016") + AssertInvalidIP("016.016.016") + AssertInvalidIP("016.016") + AssertInvalidIP("016") + AssertInvalidIP("000.000.000.000") + AssertInvalidIP("000") + AssertInvalidIP("0x0a.0x0a.0x0a.0x0a") + AssertInvalidIP("0x0a.0x0a.0x0a") + AssertInvalidIP("0x0a.0x0a") + AssertInvalidIP("0x0a") + AssertInvalidIP("42.42.42.42.42") + AssertInvalidIP("42.42.42") + AssertInvalidIP("42.42") + AssertInvalidIP("42") + AssertInvalidIP("42..42.42") + AssertInvalidIP("42..42.42.42") + AssertInvalidIP("42.42.42.42.") + AssertInvalidIP("42.42.42.42...") + AssertInvalidIP(".42.42.42.42") + AssertInvalidIP("...42.42.42.42") + AssertInvalidIP("42.42.42.-0") + AssertInvalidIP("42.42.42.+0") + AssertInvalidIP(".") + AssertInvalidIP("...") + AssertInvalidIP("bogus") + AssertInvalidIP("bogus.com") + AssertInvalidIP("192.168.0.1.com") + AssertInvalidIP("12345.67899.-54321.-98765") + AssertInvalidIP("257.0.0.0") + AssertInvalidIP("42.42.42.-42") + AssertInvalidIP("3ffe::1.net") + AssertInvalidIP("3ffe::1::1") + AssertInvalidIP("1::2::3::4:5") + AssertInvalidIP("::7:6:5:4:3:2:") + AssertInvalidIP(":6:5:4:3:2:1::") + AssertInvalidIP("2001::db:::1") + AssertInvalidIP("FEDC:9878") + AssertInvalidIP("+1.+2.+3.4") + AssertInvalidIP("1.2.3.4e0") + AssertInvalidIP("::7:6:5:4:3:2:1:0") + AssertInvalidIP("7:6:5:4:3:2:1:0::") + AssertInvalidIP("9:8:7:6:5:4:3::2:1") + AssertInvalidIP("0:1:2:3::4:5:6:7") + AssertInvalidIP("3ffe:0:0:0:0:0:0:0:1") + AssertInvalidIP("3ffe::10000") + AssertInvalidIP("3ffe::goog") + AssertInvalidIP("3ffe::-0") + AssertInvalidIP("3ffe::+0") + AssertInvalidIP("3ffe::-1") + AssertInvalidIP(":") + AssertInvalidIP(":::") + AssertInvalidIP("::1.2.3") + AssertInvalidIP("::1.2.3.4.5") + AssertInvalidIP("::1.2.3.4:") + AssertInvalidIP("1.2.3.4::") + AssertInvalidIP("2001:db8::1:") + AssertInvalidIP(":2001:db8::1") + AssertInvalidIP(":1:2:3:4:5:6:7") + AssertInvalidIP("1:2:3:4:5:6:7:") + AssertInvalidIP(":1:2:3:4:5:6:") + AssertInvalidIP("192.0.2.1/32") + AssertInvalidIP("2001:db8::1/128") + self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv4Network, '') self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv4Network, 'google.com') @@ -188,26 +235,25 @@ class IpaddrUnitTest(unittest.TestCase): self.assertEqual(ipaddr.IPNetwork(self.ipv4.ip).version, 4) self.assertEqual(ipaddr.IPNetwork(self.ipv6.ip).version, 6) - if ipaddr._compat_has_real_bytes: # on python3+ - def testIpFromPacked(self): - ip = ipaddr.IPNetwork + def testIpFromPacked(self): + ip = ipaddr.IPNetwork - self.assertEqual(self.ipv4.ip, - ip(_cb('\x01\x02\x03\x04')).ip) - self.assertEqual(ip('255.254.253.252'), - ip(_cb('\xff\xfe\xfd\xfc'))) - self.assertRaises(ValueError, ipaddr.IPNetwork, _cb('\x00' * 3)) - self.assertRaises(ValueError, ipaddr.IPNetwork, _cb('\x00' * 5)) - self.assertEqual(self.ipv6.ip, - ip(_cb('\x20\x01\x06\x58\x02\x2a\xca\xfe' - '\x02\x00\x00\x00\x00\x00\x00\x01')).ip) - self.assertEqual(ip('ffff:2:3:4:ffff::'), - ip(_cb('\xff\xff\x00\x02\x00\x03\x00\x04' + - '\xff\xff' + '\x00' * 6))) - self.assertEqual(ip('::'), - ip(_cb('\x00' * 16))) - self.assertRaises(ValueError, ip, _cb('\x00' * 15)) - self.assertRaises(ValueError, ip, _cb('\x00' * 17)) + self.assertEqual(self.ipv4.ip, + ip(_cb('\x01\x02\x03\x04')).ip) + self.assertEqual(ip('255.254.253.252'), + ip(_cb('\xff\xfe\xfd\xfc'))) + self.assertRaises(ValueError, ipaddr.IPNetwork, _cb('\x00' * 3)) + self.assertRaises(ValueError, ipaddr.IPNetwork, _cb('\x00' * 5)) + self.assertEqual(self.ipv6.ip, + ip(_cb('\x20\x01\x06\x58\x02\x2a\xca\xfe' + '\x02\x00\x00\x00\x00\x00\x00\x01')).ip) + self.assertEqual(ip('ffff:2:3:4:ffff::'), + ip(_cb('\xff\xff\x00\x02\x00\x03\x00\x04' + + '\xff\xff' + '\x00' * 6))) + self.assertEqual(ip('::'), + ip(_cb('\x00' * 16))) + self.assertRaises(ValueError, ip, _cb('\x00' * 15)) + self.assertRaises(ValueError, ip, _cb('\x00' * 17)) def testGetIp(self): self.assertEqual(int(self.ipv4.ip), 16909060) @@ -287,6 +333,11 @@ class IpaddrUnitTest(unittest.TestCase): self.assertEqual(self.ipv4.subnet(), list(self.ipv4.iter_subnets())) self.assertEqual(self.ipv6.subnet(), list(self.ipv6.iter_subnets())) + def testIterHosts(self): + self.assertEqual([ipaddr.IPv4Address('2.0.0.0'), + ipaddr.IPv4Address('2.0.0.1')], + list(ipaddr.IPNetwork('2.0.0.0/31').iterhosts())) + def testFancySubnetting(self): self.assertEqual(sorted(self.ipv4.subnet(prefixlen_diff=3)), sorted(self.ipv4.subnet(new_prefix=27))) @@ -893,7 +944,7 @@ class IpaddrUnitTest(unittest.TestCase): '2001:0:0:4:0:0:0:8': '2001:0:0:4::8/128', '2001:0:0:4:5:6:7:8': '2001::4:5:6:7:8/128', '2001:0:3:4:5:6:7:8': '2001:0:3:4:5:6:7:8/128', - '2001:0::3:4:5:6:7:8': '2001:0:3:4:5:6:7:8/128', + '2001:0:3:4:5:6:7:8': '2001:0:3:4:5:6:7:8/128', '0:0:3:0:0:0:0:ffff': '0:0:3::ffff/128', '0:0:0:4:0:0:0:ffff': '::4:0:0:0:ffff/128', '0:0:0:0:5:0:0:ffff': '::5:0:0:ffff/128', @@ -903,6 +954,12 @@ class IpaddrUnitTest(unittest.TestCase): '0:0:0:0:0:0:0:1': '::1/128', '2001:0658:022a:cafe:0000:0000:0000:0000/66': '2001:658:22a:cafe::/66', + '::1.2.3.4': '::102:304/128', + '1:2:3:4:5:ffff:1.2.3.4': '1:2:3:4:5:ffff:102:304/128', + '::7:6:5:4:3:2:1': '0:7:6:5:4:3:2:1/128', + '::7:6:5:4:3:2:0': '0:7:6:5:4:3:2:0/128', + '7:6:5:4:3:2:1::': '7:6:5:4:3:2:1:0/128', + '0:6:5:4:3:2:1::': '0:6:5:4:3:2:1:0/128', } for uncompressed, compressed in test_addresses.items(): self.assertEqual(compressed, str(ipaddr.IPv6Network(uncompressed))) @@ -910,9 +967,9 @@ class IpaddrUnitTest(unittest.TestCase): def testExplodeShortHandIpStr(self): addr1 = ipaddr.IPv6Network('2001::1') addr2 = ipaddr.IPv6Address('2001:0:5ef5:79fd:0:59d:a0e5:ba1') - self.assertEqual('2001:0000:0000:0000:0000:0000:0000:0001', - addr1._explode_shorthand_ip_string(str(addr1.ip))) - self.assertEqual('0000:0000:0000:0000:0000:0000:0000:0001', + self.assertEqual('2001:0000:0000:0000:0000:0000:0000:0001/128', + addr1.exploded) + self.assertEqual('0000:0000:0000:0000:0000:0000:0000:0001/128', ipaddr.IPv6Network('::1/128').exploded) # issue 77 self.assertEqual('2001:0000:5ef5:79fd:0000:059d:a0e5:0ba1', @@ -957,7 +1014,7 @@ class IpaddrUnitTest(unittest.TestCase): self.assertEqual(ipaddr.IPNetwork('::/121').Supernet(), ipaddr.IPNetwork('::/120')) - self.assertEqual(ipaddr.IPNetwork('10.0.0.02').IsRFC1918(), True) + self.assertEqual(ipaddr.IPNetwork('10.0.0.2').IsRFC1918(), True) self.assertEqual(ipaddr.IPNetwork('10.0.0.0').IsMulticast(), False) self.assertEqual(ipaddr.IPNetwork('127.255.255.255').IsLoopback(), True) self.assertEqual(ipaddr.IPNetwork('169.255.255.255').IsLinkLocal(), @@ -1017,19 +1074,6 @@ class IpaddrUnitTest(unittest.TestCase): self.assertTrue(self.ipv6._cache.has_key('broadcast')) self.assertTrue(self.ipv6._cache.has_key('hostmask')) - def testIsValidIp(self): - ip = ipaddr.IPv6Address('::') - self.assertTrue(ip._is_valid_ip('2001:658:22a:cafe:200::1')) - self.assertTrue(ip._is_valid_ip('::ffff:10.10.0.0')) - self.assertTrue(ip._is_valid_ip('::ffff:192.168.0.0')) - self.assertFalse(ip._is_valid_ip('2001:658:22a::::1')) - self.assertFalse(ip._is_valid_ip(':658:22a:cafe:200::1')) - self.assertFalse(ip._is_valid_ip('2001:658:22a:cafe:200:')) - self.assertFalse(ip._is_valid_ip('2001:658:22a:cafe:200:127.0.0.1::1')) - self.assertFalse(ip._is_valid_ip('2001:658:22a:cafe:200::127.0.1')) - self.assertFalse(ip._is_valid_ip('2001:658:22a:zzzz:200::1')) - self.assertFalse(ip._is_valid_ip('2001:658:22a:cafe1:200::1')) - def testTeredo(self): # stolen from wikipedia server = ipaddr.IPv4Address('65.54.227.120') @@ -1039,6 +1083,8 @@ class IpaddrUnitTest(unittest.TestCase): ipaddr.IPAddress(teredo_addr).teredo) bad_addr = '2000::4136:e378:8000:63bf:3fff:fdd2' self.assertFalse(ipaddr.IPAddress(bad_addr).teredo) + bad_addr = '2001:0001:4136:e378:8000:63bf:3fff:fdd2' + self.assertFalse(ipaddr.IPAddress(bad_addr).teredo) # i77 teredo_addr = ipaddr.IPv6Address('2001:0:5ef5:79fd:0:59d:a0e5:ba1') diff --git a/wpr/third_party/ipaddr/test-2to3.sh b/wpr/third_party/ipaddr/test-2to3.sh index 408d665..5196083 100755 --- a/wpr/third_party/ipaddr/test-2to3.sh +++ b/wpr/third_party/ipaddr/test-2to3.sh @@ -1,5 +1,19 @@ #!/bin/sh - +# Copyright 2007 Google Inc. +# Licensed to PSF under a Contributor Agreement. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# # Converts the python2 ipaddr files to python3 and runs the unit tests # with both python versions. diff --git a/wpr/third_party/ipfw_win32/LICENSE b/wpr/third_party/ipfw_win32/LICENSE new file mode 100644 index 0000000..c1df6fe --- /dev/null +++ b/wpr/third_party/ipfw_win32/LICENSE @@ -0,0 +1,25 @@ +/*- + * Copyright (c) 1998-2010 Luigi Rizzo, Universita` di Pisa + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ diff --git a/wpr/third_party/ipfw_win32/README.web-page-replay b/wpr/third_party/ipfw_win32/README.web-page-replay new file mode 100644 index 0000000..8bf15c6 --- /dev/null +++ b/wpr/third_party/ipfw_win32/README.web-page-replay @@ -0,0 +1,12 @@ +Name: Windows XP NDIS module for Dummynet. +Short Name: ipfw3 +URL: http://info.iet.unipi.it/~luigi/dummynet/ +Version: 20100322 v.3.0.0.2 +License: BSD +License File: LICENSE + +Description: +Used by Web Page Replay to simulate network delays and bandwidth throttling on Windows XP. + +Local Modifications: +Dropped files: cyg-ipfw.exe, cygwin1.dll, testme.bat, wget.exe. \ No newline at end of file diff --git a/wpr/third_party/nbhttp/README.web-page-replay b/wpr/third_party/nbhttp/README.web-page-replay index b606e41..64d9eb0 100644 --- a/wpr/third_party/nbhttp/README.web-page-replay +++ b/wpr/third_party/nbhttp/README.web-page-replay @@ -1,5 +1,16 @@ -Source code home: https://github.com/mnot/nbhttp.git - commit 3f5d9b4f38c6579199cb - tree 47b3e9909bf633a098fb - parent 59b7793ef70f4fcf46ad -This directory contains files only from nbhttp/src directory. Please see each file header or LICENSE file (which is extracted from file headers) for license information. +Name: Tools for building non-blocking HTTP components +Short Name: nbhttp +URL: https://github.com/mnot/nbhttp/tree/spdy +Revision: commit 3f5d9b4f38c6579199cb + tree 47b3e9909bf633a098fb + parent 59b7793ef70f4fcf46ad +License: MIT/X11 (BSD like) +License File: LICENSE + +Description: +nbhttp is used to add support for spdy/2. + +Local Modifications: +Copied license from README to LICENSE. +Only included files from the nbhttp/src directory. +Moved license boilerplate to tops of files for Chrome license check. \ No newline at end of file diff --git a/wpr/third_party/nbhttp/__init__.py b/wpr/third_party/nbhttp/__init__.py index a909c83..ce1e048 100644 --- a/wpr/third_party/nbhttp/__init__.py +++ b/wpr/third_party/nbhttp/__init__.py @@ -4,6 +4,28 @@ Non-blocking HTTP components. """ +__copyright__ = """\ +Copyright (c) 2008-2009 Mark Nottingham + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + from client import Client from server import Server from push_tcp import run, stop, schedule diff --git a/wpr/third_party/nbhttp/client.py b/wpr/third_party/nbhttp/client.py index f91f0dc..e912362 100644 --- a/wpr/third_party/nbhttp/client.py +++ b/wpr/third_party/nbhttp/client.py @@ -1,5 +1,27 @@ #!/usr/bin/env python +__copyright__ = """\ +Copyright (c) 2008-2009 Mark Nottingham + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + """ Non-Blocking HTTP Client @@ -63,27 +85,6 @@ with the appropriate error dictionary. """ __author__ = "Mark Nottingham " -__copyright__ = """\ -Copyright (c) 2008-2009 Mark Nottingham - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" from urlparse import urlsplit, urlunsplit diff --git a/wpr/third_party/nbhttp/push_tcp.py b/wpr/third_party/nbhttp/push_tcp.py index 43017da..547e6bc 100644 --- a/wpr/third_party/nbhttp/push_tcp.py +++ b/wpr/third_party/nbhttp/push_tcp.py @@ -2,6 +2,28 @@ import traceback +__copyright__ = """\ +Copyright (c) 2008-2009 Mark Nottingham + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + """ push-based asynchronous TCP @@ -122,27 +144,6 @@ To stop it, just stop it; """ __author__ = "Mark Nottingham " -__copyright__ = """\ -Copyright (c) 2008-2009 Mark Nottingham - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" import sys import socket diff --git a/wpr/third_party/nbhttp/server.py b/wpr/third_party/nbhttp/server.py index 6f5a0be..7e43845 100755 --- a/wpr/third_party/nbhttp/server.py +++ b/wpr/third_party/nbhttp/server.py @@ -1,5 +1,27 @@ #!/usr/bin/env python +__copyright__ = """\ +Copyright (c) 2008-2009 Mark Nottingham + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + """ Non-Blocking HTTP Server @@ -63,27 +85,6 @@ indicated length are incorrect). """ __author__ = "Mark Nottingham " -__copyright__ = """\ -Copyright (c) 2008-2009 Mark Nottingham - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" import os import sys diff --git a/wpr/third_party/nbhttp/spdy_client.py b/wpr/third_party/nbhttp/spdy_client.py index 4c9af41..58856ec 100644 --- a/wpr/third_party/nbhttp/spdy_client.py +++ b/wpr/third_party/nbhttp/spdy_client.py @@ -1,5 +1,27 @@ #!/usr/bin/env python +__copyright__ = """\ +Copyright (c) 2008-2009 Mark Nottingham + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + """ Non-Blocking SPDY Client @@ -65,27 +87,6 @@ with the appropriate error dictionary. # FIXME: update docs for API change (move res_start) __author__ = "Mark Nottingham " -__copyright__ = """\ -Copyright (c) 2008-2009 Mark Nottingham - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" from urlparse import urlsplit diff --git a/wpr/third_party/nbhttp/spdy_common.py b/wpr/third_party/nbhttp/spdy_common.py index 09b3416..a978b79 100644 --- a/wpr/third_party/nbhttp/spdy_common.py +++ b/wpr/third_party/nbhttp/spdy_common.py @@ -33,10 +33,18 @@ THE SOFTWARE. import struct -import c_zlib +compressed_hdrs = True +try: + import c_zlib +except TypeError: + # c_zlib loads "libz". However, that fails on Windows. + compressed_hdrs = False + import sys + print >>sys.stderr, ( + 'WARNING: sdpy_common: import c_zlib failed. Using uncompressed headers.') + from http_common import dummy -compressed_hdrs = True # There is a null character ('\0') at the end of the dictionary. The '\0' might # be removed in future spdy versions. dictionary = \ diff --git a/wpr/third_party/nbhttp/spdy_server.py b/wpr/third_party/nbhttp/spdy_server.py index 5fe084b..931645e 100755 --- a/wpr/third_party/nbhttp/spdy_server.py +++ b/wpr/third_party/nbhttp/spdy_server.py @@ -1,5 +1,27 @@ #!/usr/bin/env python +__copyright__ = """\ +Copyright (c) 2008-2009 Mark Nottingham + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + """ Non-Blocking SPDY Server @@ -63,27 +85,6 @@ indicated length are incorrect). """ __author__ = "Mark Nottingham " -__copyright__ = """\ -Copyright (c) 2008-2009 Mark Nottingham - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" import os import sys diff --git a/wpr/trafficshaper.py b/wpr/trafficshaper.py old mode 100755 new mode 100644 index 8ff74c2..9b6f894 --- a/wpr/trafficshaper.py +++ b/wpr/trafficshaper.py @@ -37,30 +37,37 @@ class BandwidthValueError(TrafficShaperException): class TrafficShaper(object): + """Manages network traffic shaping.""" - _UPLOAD_PIPE = '1' # Enforces overall upload bandwidth. - _UPLOAD_QUEUE = '2' # Shares upload bandwidth among source ports. - _DOWNLOAD_PIPE = '3' # Enforces overall download bandwidth. - _DOWNLOAD_QUEUE = '4' # Shares download bandwidth among destination ports. + # Pick webpagetest-compatible values (details: http://goo.gl/oghTg). + _UPLOAD_PIPE = '10' # Enforces overall upload bandwidth. + _UPLOAD_QUEUE = '10' # Shares upload bandwidth among source ports. + _UPLOAD_RULE = '5000' # Specifies when the upload queue is used. + _DOWNLOAD_PIPE = '11' # Enforces overall download bandwidth. + _DOWNLOAD_QUEUE = '11' # Shares download bandwidth among destination ports. + _DOWNLOAD_RULE = '5100' # Specifies when the download queue is used. + _QUEUE_SLOTS = 100 # Number of packets to queue. _BANDWIDTH_RE = re.compile(BANDWIDTH_PATTERN) - """Manages network traffic shaping.""" def __init__(self, dont_use=None, host='127.0.0.1', port='80', + ssl_port='443', dns_port='53', up_bandwidth='0', down_bandwidth='0', delay_ms='0', packet_loss_rate='0', - init_cwnd='0'): + init_cwnd='0', + use_loopback=True): """Start shaping traffic. Args: host: a host string (name or IP) for the web proxy. port: a port string (e.g. '80') for the web proxy. + ssl_port: a port string (e.g. '443') for the SSL web proxy. dns_port: a port string for the dns proxy (for unit testing). up_bandwidth: Upload bandwidth down_bandwidth: Download bandwidth @@ -68,43 +75,47 @@ class TrafficShaper(object): delay_ms: Propagation delay in milliseconds. '0' means no delay. packet_loss_rate: Packet loss rate in range [0..1]. '0' means no loss. init_cwnd: the initial cwnd setting. '0' means no change. + use_loopback: True iff shaping is done on the loopback (or equiv) adapter. """ assert dont_use is None # Force args to be named. self.platformsettings = platformsettings.get_platform_settings() self.host = host self.port = port + self.ssl_port = ssl_port self.dns_port = dns_port self.up_bandwidth = up_bandwidth self.down_bandwidth = down_bandwidth self.delay_ms = delay_ms self.packet_loss_rate = packet_loss_rate self.init_cwnd = init_cwnd + self.use_loopback = use_loopback if not self._BANDWIDTH_RE.match(self.up_bandwidth): raise BandwidthValueError(self.up_bandwidth) if not self._BANDWIDTH_RE.match(self.down_bandwidth): raise BandwidthValueError(self.down_bandwidth) - + self.is_shaping = False def __enter__(self): - self.platformsettings.configure_loopback() + if self.use_loopback: + self.platformsettings.configure_loopback() if self.init_cwnd != '0': - if self.platformsettings.is_cwnd_available(): - self.original_cwnd = self.platformsettings.get_cwnd() - self.platformsettings.set_cwnd(self.init_cwnd) - else: - logging.error('Platform does not support setting cwnd.') + self.platformsettings.set_cwnd(self.init_cwnd) try: - self.platformsettings.ipfw('-q', 'flush') + ipfw_list = self.platformsettings.ipfw('list') + if not ipfw_list.startswith('65535 '): + logging.warn('ipfw has existing rules:\n%s', ipfw_list) + self._delete_rules(ipfw_list) except: pass if (self.up_bandwidth == '0' and self.down_bandwidth == '0' and self.delay_ms == '0' and self.packet_loss_rate == '0'): + logging.info('Skipped shaping traffic.') return if not self.dns_port and not self.port: raise TrafficShaperException('No ports on which to shape traffic.') - ports = ','.join(str(p) for p in (self.port, self.dns_port) if p) - queue_size = self.platformsettings.get_ipfw_queue_slots() + ports = ','.join( + str(p) for p in (self.port, self.ssl_port, self.dns_port) if p) half_delay_ms = int(self.delay_ms) / 2 # split over up/down links try: @@ -120,18 +131,19 @@ class TrafficShaper(object): 'config', 'pipe', self._UPLOAD_PIPE, 'plr', self.packet_loss_rate, - 'queue', queue_size, + 'queue', self._QUEUE_SLOTS, 'mask', 'src-port', '0xffff', ) self.platformsettings.ipfw( - 'add', + 'add', self._UPLOAD_RULE, 'queue', self._UPLOAD_QUEUE, 'ip', 'from', 'any', 'to', self.host, - 'out', + self.use_loopback and 'out' or 'in', 'dst-port', ports, ) + self.is_shaping = True # Configure download shaping. self.platformsettings.ipfw( @@ -145,11 +157,11 @@ class TrafficShaper(object): 'config', 'pipe', self._DOWNLOAD_PIPE, 'plr', self.packet_loss_rate, - 'queue', queue_size, + 'queue', self._QUEUE_SLOTS, 'mask', 'dst-port', '0xffff', ) self.platformsettings.ipfw( - 'add', + 'add', self._DOWNLOAD_RULE, 'queue', self._DOWNLOAD_QUEUE, 'ip', 'from', self.host, @@ -162,12 +174,22 @@ class TrafficShaper(object): raise TrafficShaperException('Unable to shape traffic: %s' % e) def __exit__(self, unused_exc_type, unused_exc_val, unused_exc_tb): - self.platformsettings.unconfigure_loopback() - if (self.init_cwnd != '0' and - self.platformsettings.is_cwnd_available()): - self.platformsettings.set_cwnd(self.original_cwnd) - try: - self.platformsettings.ipfw('-q', 'flush') - logging.info('Stopped shaping traffic') - except Exception, e: - raise TrafficShaperException('Unable to stop shaping traffic: %s' % e) + if self.use_loopback: + self.platformsettings.unconfigure_loopback() + self.platformsettings.restore_cwnd() + if self.is_shaping: + try: + self._delete_rules() + logging.info('Stopped shaping traffic') + except Exception, e: + raise TrafficShaperException('Unable to stop shaping traffic: %s' % e) + + def _delete_rules(self, ipfw_list=None): + if ipfw_list is None: + ipfw_list = self.platformsettings.ipfw('list') + existing_rules = set( + r.split()[0].lstrip('0') for r in ipfw_list.splitlines()) + delete_rules = [r for r in (self._DOWNLOAD_RULE, self._UPLOAD_RULE) + if r in existing_rules] + if delete_rules: + self.platformsettings.ipfw('delete', *delete_rules) diff --git a/wpr/trafficshaper_test.py b/wpr/trafficshaper_test.py index d7f4179..2c35393 100755 --- a/wpr/trafficshaper_test.py +++ b/wpr/trafficshaper_test.py @@ -25,24 +25,14 @@ import multiprocessing import platformsettings import socket import SocketServer -import sys -import time import trafficshaper import unittest +RESPONSE_SIZE_KEY = 'response-size:' TEST_DNS_PORT = 5555 TEST_HTTP_PORT = 8888 -RESPONSE_SIZE_KEY = 'response-size:' - - -# from timeit.py -if sys.platform == "win32": - # On Windows, the best timer is time.clock() - DEFAULT_TIMER = time.clock -else: - # On most other platforms the best timer is time.time() - DEFAULT_TIMER = time.time +TIMER = platformsettings.get_platform_settings().timer def GetElapsedMs(start_time, end_time): @@ -100,7 +90,7 @@ class TimedUdpServer(SocketServer.ThreadingUDPServer, # Override SocketServer.TcpServer setting to avoid intermittent errors. allow_reuse_address = True - def __init__(self, host, port, timer=DEFAULT_TIMER): + def __init__(self, host, port, timer=TIMER): SocketServer.ThreadingUDPServer.__init__( self, (host, port), TimedUdpHandler) self.timer = timer @@ -116,7 +106,7 @@ class TimedTcpServer(SocketServer.ThreadingTCPServer, # Override SocketServer.TcpServer setting to avoid intermittent errors. allow_reuse_address = True - def __init__(self, host, port, timer=DEFAULT_TIMER): + def __init__(self, host, port, timer=TIMER): SocketServer.ThreadingTCPServer.__init__( self, (host, port), TimedTcpHandler) self.timer = timer @@ -162,7 +152,7 @@ class TcpTrafficShaperTest(TimedTestCase): self.host = platform_settings.get_server_ip_address() self.port = TEST_HTTP_PORT self.tcp_socket_creator = TcpTestSocketCreator(self.host, self.port) - self.timer = DEFAULT_TIMER + self.timer = TIMER def TrafficShaper(self, **kwargs): return trafficshaper.TrafficShaper( @@ -236,7 +226,7 @@ class UdpTrafficShaperTest(TimedTestCase): platform_settings = platformsettings.get_platform_settings() self.host = platform_settings.get_server_ip_address() self.dns_port = TEST_DNS_PORT - self.timer = DEFAULT_TIMER + self.timer = TIMER def TrafficShaper(self, **kwargs): return trafficshaper.TrafficShaper( diff --git a/wpr/util.py b/wpr/util.py new file mode 100644 index 0000000..486d5c2 --- /dev/null +++ b/wpr/util.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# Copyright 2012 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Miscellaneous utility functions.""" + + +try: + # pkg_resources (part of setuptools) is needed when WPR is + # distributed as a package. (Resources may need to be extracted from + # the package.) + + import pkg_resources + + def resource_exists(resource_name): + return pkg_resources.resource_exists(__name__, resource_name) + + def resource_string(resource_name): + return pkg_resources.resource_string(__name__, resource_name) + +except ImportError: + # Import of pkg_resources failed, so fall back to getting resources + # from the file system. + + import os + + def _resource_path(resource_name): + _replay_dir = os.path.dirname(os.path.abspath(__file__)) + return os.path.join(_replay_dir, resource_name) + + def resource_exists(resource_name): + return os.path.exists(_resource_path(resource_name)) + + def resource_string(resource_name): + return open(_resource_path(resource_name)).read() diff --git a/wpr/webpagereplay.egg-info/PKG-INFO b/wpr/webpagereplay.egg-info/PKG-INFO new file mode 100644 index 0000000..9863d0b --- /dev/null +++ b/wpr/webpagereplay.egg-info/PKG-INFO @@ -0,0 +1,10 @@ +Metadata-Version: 1.0 +Name: webpagereplay +Version: 1.1.2 +Summary: Record and replay web content +Home-page: http://code.google.com/p/web-page-replay/ +Author: Web Page Replay Project Authors +Author-email: web-page-replay-dev@googlegroups.com +License: Apache License 2.0 +Description: UNKNOWN +Platform: UNKNOWN diff --git a/wpr/webpagereplay.egg-info/SOURCES.txt b/wpr/webpagereplay.egg-info/SOURCES.txt new file mode 100644 index 0000000..16c5f1e --- /dev/null +++ b/wpr/webpagereplay.egg-info/SOURCES.txt @@ -0,0 +1,209 @@ +.gitignore +COPYING +cachemissarchive.py +cachemissarchive_test.py +customhandlers.py +daemonserver.py +deterministic.js +dnsproxy.py +httparchive.py +httparchive_test.py +httpclient.py +httpproxy.py +httpzlib.py +mock-archive.txt +mockhttprequest.py +persistentmixin.py +platformsettings.py +platformsettings_test.py +replay.py +replayspdyserver.py +servermanager.py +setup.py +trafficshaper.py +trafficshaper_test.py +util.py +./cachemissarchive.py +./cachemissarchive_test.py +./customhandlers.py +./daemonserver.py +./dnsproxy.py +./httparchive.py +./httparchive_test.py +./httpclient.py +./httpproxy.py +./httpzlib.py +./mockhttprequest.py +./persistentmixin.py +./platformsettings.py +./platformsettings_test.py +./replay.py +./replayspdyserver.py +./servermanager.py +./trafficshaper.py +./trafficshaper_test.py +./util.py +./perftracker/__init__.py +./perftracker/runner.py +./perftracker/runner_cfg.py +./third_party/__init__.py +./third_party/ipaddr/ipaddr.py +./third_party/ipaddr/ipaddr_test.py +./third_party/ipaddr/setup.py +./third_party/nbhttp/__init__.py +./third_party/nbhttp/c_zlib.py +./third_party/nbhttp/client.py +./third_party/nbhttp/error.py +./third_party/nbhttp/http_common.py +./third_party/nbhttp/push_tcp.py +./third_party/nbhttp/server.py +./third_party/nbhttp/spdy_client.py +./third_party/nbhttp/spdy_common.py +./third_party/nbhttp/spdy_server.py +perftracker/README +perftracker/__init__.py +perftracker/runner.py +perftracker/runner_cfg.py +perftracker/app/app.yaml +perftracker/app/appengine_config.py +perftracker/app/index.yaml +perftracker/app/json.py +perftracker/app/main.py +perftracker/app/models.py +perftracker/app/suite.html +perftracker/app/jst/jsevalcontext.js +perftracker/app/jst/jstemplate.js +perftracker/app/jst/jstemplate_test.js +perftracker/app/jst/util.js +perftracker/app/scripts/util.js +perftracker/app/styles/style.css +perftracker/app/templates/compare_set.html +perftracker/app/templates/index.html +perftracker/app/templates/search.html +perftracker/app/templates/view_set.html +perftracker/app/templates/view_summary.html +perftracker/extension/background.html +perftracker/extension/manifest.json +perftracker/extension/script.js +perftracker/extension/server.js +perftracker/extension/start.js +third_party/__init__.py +third_party/dns/LICENSE +third_party/dns/README.web-page-replay +third_party/dns/__init__.py +third_party/dns/dnssec.py +third_party/dns/e164.py +third_party/dns/edns.py +third_party/dns/entropy.py +third_party/dns/exception.py +third_party/dns/flags.py +third_party/dns/inet.py +third_party/dns/ipv4.py +third_party/dns/ipv6.py +third_party/dns/message.py +third_party/dns/name.py +third_party/dns/namedict.py +third_party/dns/node.py +third_party/dns/opcode.py +third_party/dns/query.py +third_party/dns/rcode.py +third_party/dns/rdata.py +third_party/dns/rdataclass.py +third_party/dns/rdataset.py +third_party/dns/rdatatype.py +third_party/dns/renderer.py +third_party/dns/resolver.py +third_party/dns/reversename.py +third_party/dns/rrset.py +third_party/dns/set.py +third_party/dns/tokenizer.py +third_party/dns/tsig.py +third_party/dns/tsigkeyring.py +third_party/dns/ttl.py +third_party/dns/update.py +third_party/dns/version.py +third_party/dns/zone.py +third_party/dns/rdtypes/__init__.py +third_party/dns/rdtypes/dsbase.py +third_party/dns/rdtypes/keybase.py +third_party/dns/rdtypes/mxbase.py +third_party/dns/rdtypes/nsbase.py +third_party/dns/rdtypes/sigbase.py +third_party/dns/rdtypes/txtbase.py +third_party/dns/rdtypes/ANY/AFSDB.py +third_party/dns/rdtypes/ANY/CERT.py +third_party/dns/rdtypes/ANY/CNAME.py +third_party/dns/rdtypes/ANY/DLV.py +third_party/dns/rdtypes/ANY/DNAME.py +third_party/dns/rdtypes/ANY/DNSKEY.py +third_party/dns/rdtypes/ANY/DS.py +third_party/dns/rdtypes/ANY/GPOS.py +third_party/dns/rdtypes/ANY/HINFO.py +third_party/dns/rdtypes/ANY/HIP.py +third_party/dns/rdtypes/ANY/ISDN.py +third_party/dns/rdtypes/ANY/KEY.py +third_party/dns/rdtypes/ANY/LOC.py +third_party/dns/rdtypes/ANY/MX.py +third_party/dns/rdtypes/ANY/NS.py +third_party/dns/rdtypes/ANY/NSEC.py +third_party/dns/rdtypes/ANY/NSEC3.py +third_party/dns/rdtypes/ANY/NSEC3PARAM.py +third_party/dns/rdtypes/ANY/NXT.py +third_party/dns/rdtypes/ANY/PTR.py +third_party/dns/rdtypes/ANY/RP.py +third_party/dns/rdtypes/ANY/RRSIG.py +third_party/dns/rdtypes/ANY/RT.py +third_party/dns/rdtypes/ANY/SIG.py +third_party/dns/rdtypes/ANY/SOA.py +third_party/dns/rdtypes/ANY/SPF.py +third_party/dns/rdtypes/ANY/SSHFP.py +third_party/dns/rdtypes/ANY/TXT.py +third_party/dns/rdtypes/ANY/X25.py +third_party/dns/rdtypes/ANY/__init__.py +third_party/dns/rdtypes/IN/A.py +third_party/dns/rdtypes/IN/AAAA.py +third_party/dns/rdtypes/IN/APL.py +third_party/dns/rdtypes/IN/DHCID.py +third_party/dns/rdtypes/IN/IPSECKEY.py +third_party/dns/rdtypes/IN/KX.py +third_party/dns/rdtypes/IN/NAPTR.py +third_party/dns/rdtypes/IN/NSAP.py +third_party/dns/rdtypes/IN/NSAP_PTR.py +third_party/dns/rdtypes/IN/PX.py +third_party/dns/rdtypes/IN/SRV.py +third_party/dns/rdtypes/IN/WKS.py +third_party/dns/rdtypes/IN/__init__.py +third_party/ipaddr/COPYING +third_party/ipaddr/MANIFEST.in +third_party/ipaddr/OWNERS +third_party/ipaddr/README +third_party/ipaddr/README.web-page-replay +third_party/ipaddr/ipaddr.py +third_party/ipaddr/ipaddr_test.py +third_party/ipaddr/setup.py +third_party/ipaddr/test-2to3.sh +third_party/ipfw_win32/LICENSE +third_party/ipfw_win32/README.txt +third_party/ipfw_win32/README.web-page-replay +third_party/ipfw_win32/ipfw.exe +third_party/ipfw_win32/ipfw.sys +third_party/ipfw_win32/netipfw.inf +third_party/ipfw_win32/netipfw_m.inf +third_party/nbhttp/LICENSE +third_party/nbhttp/README.web-page-replay +third_party/nbhttp/__init__.py +third_party/nbhttp/c_zlib.py +third_party/nbhttp/client.py +third_party/nbhttp/error.py +third_party/nbhttp/http_common.py +third_party/nbhttp/push_tcp.py +third_party/nbhttp/server.py +third_party/nbhttp/spdy_client.py +third_party/nbhttp/spdy_common.py +third_party/nbhttp/spdy_server.py +webpagereplay.egg-info/PKG-INFO +webpagereplay.egg-info/SOURCES.txt +webpagereplay.egg-info/dependency_links.txt +webpagereplay.egg-info/entry_points.txt +webpagereplay.egg-info/requires.txt +webpagereplay.egg-info/top_level.txt \ No newline at end of file diff --git a/wpr/webpagereplay.egg-info/dependency_links.txt b/wpr/webpagereplay.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/wpr/webpagereplay.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/wpr/webpagereplay.egg-info/entry_points.txt b/wpr/webpagereplay.egg-info/entry_points.txt new file mode 100644 index 0000000..5ec8e3a --- /dev/null +++ b/wpr/webpagereplay.egg-info/entry_points.txt @@ -0,0 +1,4 @@ +[console_scripts] +httparchive = httparchive:main +replay = replay:main + diff --git a/wpr/webpagereplay.egg-info/requires.txt b/wpr/webpagereplay.egg-info/requires.txt new file mode 100644 index 0000000..2413754 --- /dev/null +++ b/wpr/webpagereplay.egg-info/requires.txt @@ -0,0 +1 @@ +dnspython>=1.8 \ No newline at end of file diff --git a/wpr/webpagereplay.egg-info/top_level.txt b/wpr/webpagereplay.egg-info/top_level.txt new file mode 100644 index 0000000..d956a0e --- /dev/null +++ b/wpr/webpagereplay.egg-info/top_level.txt @@ -0,0 +1,3 @@ + +third_party +perftracker