Save javascript function arguments as json instead of separate rows

This should help reduce the javascript table size by about 33% and will
make it easier to filter rows by one argument. We choose to save the
arguments as a json object instead of a strified array to make slicing
easier, both in a pandas dataframe and with sqlite extensions like
`json1`.
This commit is contained in:
englehardt 2017-01-25 17:29:41 -05:00
Родитель 70bba2b350
Коммит 4f94f24ba1
6 изменённых файлов: 96 добавлений и 110 удалений

Просмотреть файл

@ -11,7 +11,6 @@ CREATE TABLE IF NOT EXISTS javascript(
symbol TEXT,
operation TEXT,
value TEXT,
parameter_index INTEGER,
parameter_value TEXT,
arguments TEXT,
time_stamp TEXT NOT NULL
);

Просмотреть файл

@ -5,46 +5,51 @@ var pageManager = require("./page-manager.js");
exports.run = function(crawlID, testing) {
// Set up tables
var createJavascriptTable = data.load("create_javascript_table.sql");
loggingDB.executeSQL(createJavascriptTable, false);
// Set up tables
var createJavascriptTable = data.load("create_javascript_table.sql");
loggingDB.executeSQL(createJavascriptTable, false);
// Inject content script to instrument JavaScript API
pageMod.PageMod({
include: "*",
contentScriptWhen: "start",
contentScriptFile: data.url("./content.js"),
contentScriptOptions: {
'testing': testing
},
onAttach: function onAttach(worker) {
var url = worker.url;
function processCallsAndValues(data) {
var update = {};
update["crawl_id"] = crawlID;
update["script_url"] = loggingDB.escapeString(data.scriptUrl);
update["script_line"] = loggingDB.escapeString(data.scriptLine);
update["script_col"] = loggingDB.escapeString(data.scriptCol);
update["func_name"] = loggingDB.escapeString(data.funcName);
update["script_loc_eval"] = loggingDB.escapeString(data.scriptLocEval);
update["call_stack"] = loggingDB.escapeString(data.callStack);
update["symbol"] = loggingDB.escapeString(data.symbol);
update["operation"] = loggingDB.escapeString(data.operation);
update["value"] = loggingDB.escapeString(data.value);
update["time_stamp"] = data.timeStamp;
// Inject content script to instrument JavaScript API
pageMod.PageMod({
include: "*",
contentScriptWhen: "start",
contentScriptFile: data.url("./content.js"),
contentScriptOptions: {
'testing': testing
},
onAttach: function onAttach(worker) {
var url = worker.url;
function processCallsAndValues(data) {
var update = {};
update["crawl_id"] = crawlID;
update["script_url"] = loggingDB.escapeString(data.scriptUrl);
update["script_line"] = loggingDB.escapeString(data.scriptLine);
update["script_col"] = loggingDB.escapeString(data.scriptCol);
update["func_name"] = loggingDB.escapeString(data.funcName);
update["script_loc_eval"] = loggingDB.escapeString(data.scriptLocEval);
update["call_stack"] = loggingDB.escapeString(data.callStack);
update["symbol"] = loggingDB.escapeString(data.symbol);
update["operation"] = loggingDB.escapeString(data.operation);
update["value"] = loggingDB.escapeString(data.value);
update["time_stamp"] = data.timeStamp;
if (data.operation == 'call' && data.args.length > 0) {
for(var i = 0; i < data.args.length; i++) {
update["parameter_index"] = i;
update["parameter_value"] = loggingDB.escapeString(data.args[i]);
loggingDB.executeSQL(loggingDB.createInsert("javascript", update), true);
}
} else {
loggingDB.executeSQL(loggingDB.createInsert("javascript", update), true);
}
}
worker.port.on("logCall", function(data){processCallsAndValues(data)});
worker.port.on("logValue", function(data){processCallsAndValues(data)});
// Create a json object for function arguments
// We create an object that maps array positon to argument
// e.g. someFunc('a',123,'b') --> {0: a, 1: 123, 2: 'b'}
// to make it easier to query the data, using something like the
// sqlite3 json1 extension.
var args = {};
if (data.operation == 'call' && data.args.length > 0) {
for(var i = 0; i < data.args.length; i++) {
args[i] = data.args[i]
}
update["arguments"] = loggingDB.escapeString(JSON.stringify(args));
}
});
loggingDB.executeSQL(loggingDB.createInsert("javascript", update), true);
}
worker.port.on("logCall", function(data){processCallsAndValues(data)});
worker.port.on("logValue", function(data){processCallsAndValues(data)});
}
});
};

Двоичные данные
automation/Extension/firefox/openwpm.xpi

Двоичный файл не отображается.

Просмотреть файл

@ -27,8 +27,8 @@ def get_javascript_content(data_directory):
"""
db_path = os.path.join(data_directory, 'javascript.ldb')
db = plyvel.DB(db_path,
create_if_missing = False,
compression = 'snappy')
create_if_missing=False,
compression='snappy')
for content_hash, content in db.iterator():
yield content_hash, content
db.close()
@ -38,8 +38,7 @@ def get_javascript_entries(db, all_columns=False):
if all_columns:
select_columns = "*"
else:
select_columns = "script_url, symbol, operation, value, parameter_index,\
parameter_value"
select_columns = "script_url, symbol, operation, value, arguments"
return query_db(db, "SELECT %s FROM javascript" % select_columns)

Просмотреть файл

@ -33,57 +33,41 @@ PROPERTIES = {
CANVAS_TEST_URL = u"%s/canvas_fingerprinting.html" % utilities.BASE_TEST_URL
CANVAS_CALLS = {
(CANVAS_TEST_URL,
u"HTMLCanvasElement.getContext", u"call", u"", 0, u"2d"),
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.textBaseline",
u"set", u"top", None, None),
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.font", u"set",
u"14px 'Arial'", None, None),
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.textBaseline",
u"set", u"alphabetic", None, None),
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillStyle",
u"set", u"#f60", None, None),
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillRect",
u"call", u"", 0, u"125"),
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillRect",
u"call", u"", 1, u"1"),
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillRect",
u"call", u"", 2, u"62"),
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillRect",
u"call", u"", 3, u"20"),
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillStyle",
u"set", u"#069", None, None),
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillText",
u"call", u"", 0, u"BrowserLeaks,com <canvas> 1.0"),
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillText",
u"call", u"", 1, u"2"),
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillText",
u"call", u"", 2, u"15"),
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillStyle",
u"set", u"rgba(102, 204, 0, 0.7)", None, None),
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillText",
u"call", u"", 0, u"BrowserLeaks,com <canvas> 1.0"),
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillText",
u"call", u"", 1, u"4"),
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillText",
u"call", u"", 2, u"17"),
(CANVAS_TEST_URL, u"HTMLCanvasElement.toDataURL", u"call",
u"", None, None)
(CANVAS_TEST_URL, 'CanvasRenderingContext2D.fillStyle',
'set', '#f60', None),
(CANVAS_TEST_URL, 'CanvasRenderingContext2D.textBaseline', 'set',
'alphabetic', None),
(CANVAS_TEST_URL, 'CanvasRenderingContext2D.textBaseline', 'set',
'top', None),
(CANVAS_TEST_URL, 'CanvasRenderingContext2D.font', 'set',
"14px 'Arial'", None),
(CANVAS_TEST_URL, 'CanvasRenderingContext2D.fillStyle', 'set',
'#069', None),
(CANVAS_TEST_URL, 'CanvasRenderingContext2D.fillStyle', 'set',
'rgba(102, 204, 0, 0.7)', None),
(CANVAS_TEST_URL, 'HTMLCanvasElement.getContext', 'call',
'', '{"0":"2d"}'),
(CANVAS_TEST_URL, 'CanvasRenderingContext2D.fillRect', 'call',
'', '{"0":125,"1":1,"2":62,"3":20}'),
(CANVAS_TEST_URL, 'HTMLCanvasElement.toDataURL', 'call',
'', None),
(CANVAS_TEST_URL, 'CanvasRenderingContext2D.fillText', 'call',
'', '{"0":"BrowserLeaks,com <canvas> 1.0","1":4,"2":17}'),
(CANVAS_TEST_URL, 'CanvasRenderingContext2D.fillText', 'call',
'', '{"0":"BrowserLeaks,com <canvas> 1.0","1":2,"2":15}')
}
WEBRTC_TEST_URL = u"%s/webrtc_localip.html" % utilities.BASE_TEST_URL
WEBRTC_CALLS = {
(WEBRTC_TEST_URL, u'RTCPeerConnection.createDataChannel',
u'call', u'', 0, u''),
(WEBRTC_TEST_URL, u'RTCPeerConnection.createDataChannel',
u'call', u'', 1, u'{"reliable":false}'),
(WEBRTC_TEST_URL, u'RTCPeerConnection.onicecandidate',
u'set', u'FUNCTION', None, None),
(WEBRTC_TEST_URL, u'RTCPeerConnection.createOffer',
u'call', u'', 0, u'FUNCTION'),
(WEBRTC_TEST_URL, u'RTCPeerConnection.createOffer',
u'call', u'', 1, u'FUNCTION'),
(WEBRTC_TEST_URL, 'RTCPeerConnection.createOffer', 'call',
'', '{"0":"FUNCTION","1":"FUNCTION"}'),
(WEBRTC_TEST_URL, 'RTCPeerConnection.createDataChannel', 'call',
'', '{"0":""}'),
(WEBRTC_TEST_URL, 'RTCPeerConnection.createDataChannel', 'call',
'', '{"0":"","1":"{\\"reliable\\":false}"}'),
(WEBRTC_TEST_URL, 'RTCPeerConnection.onicecandidate', 'set',
'FUNCTION', None)
}
# we expect these strings to be present in the WebRTC SDP
@ -254,8 +238,8 @@ class TestExtension(OpenWPMTest):
observed_rows = set()
for item in rows:
if (item[1] == "RTCPeerConnection.setLocalDescription" and
item[2] == 'call' and item[4] == 0):
sdp_offer = item[5]
item[2] == 'call'):
sdp_offer = item[4]
self.check_webrtc_sdp_offer(sdp_offer)
else:
observed_rows.add(item)
@ -290,7 +274,7 @@ class TestExtension(OpenWPMTest):
rows = db_utils.get_javascript_entries(db, all_columns=True)
assert len(rows) # make sure we have some JS events captured
for row in rows:
js_time = datetime.strptime(row[14], "%Y-%m-%dT%H:%M:%S.%fZ")
js_time = datetime.strptime(row[13], "%Y-%m-%dT%H:%M:%S.%fZ")
# compare UTC now and the timestamp recorded at the visit
assert (utc_now - js_time).seconds < MAX_TIMEDELTA
assert not db_utils.any_command_failed(db)

Просмотреть файл

@ -23,10 +23,9 @@ GETS_AND_SETS = {
}
METHOD_CALLS = {
("window.test.method1", "call", 0, "hello"),
("window.test.method1", "call", 1, "{\"world\":true}"),
("window.test.method1", "call", 0, "new argument"),
("window.test.prop1", "call", 0, "now accepting arugments")
('window.test.prop1', 'call', '{"0":"now accepting arugments"}'),
('window.test.method1', 'call', '{"0":"hello","1":"{\\"world\\":true}"}'),
('window.test.method1', 'call', '{"0":"new argument"}')
}
RECURSIVE_GETS_AND_SETS = {
@ -42,9 +41,9 @@ RECURSIVE_GETS_AND_SETS = {
}
RECURSIVE_METHOD_CALLS = {
("window.test2.nestedObj.method1", "call", 0, "arg-before"),
("window.test2.nestedObj.method1", "call", 0, "arg-after"),
("window.test2.nestedObj.doubleNested.method1", "call", 0, "blah")
('window.test2.nestedObj.method1', 'call', '{"0":"arg-before"}'),
('window.test2.nestedObj.method1', 'call', '{"0":"arg-after"}'),
('window.test2.nestedObj.doubleNested.method1', 'call', '{"0":"blah"}')
}
RECURSIVE_PROP_SET = {
@ -53,8 +52,8 @@ RECURSIVE_PROP_SET = {
}
SET_PREVENT_CALLS = {
(u'window.test3.method1', u'call', None, None),
('window.test3.obj1.method2', 'call', None, None)
(u'window.test3.method1', u'call', None),
('window.test3.obj1.method2', 'call', None)
}
SET_PREVENT_GETS_AND_SETS = {
@ -86,26 +85,26 @@ class TestJSInstrument(OpenWPMTest):
# Check calls of non-recursive instrumentation
observed_gets_and_sets = set()
observed_calls = set()
for script_url, symbol, operation, value, pindex, pvalue in rows:
for script_url, symbol, operation, value, arguments in rows:
if not symbol.startswith('window.test.'):
continue
if operation == 'get' or operation == 'set':
observed_gets_and_sets.add((symbol, operation, value))
else:
observed_calls.add((symbol, operation, pindex, pvalue))
observed_calls.add((symbol, operation, arguments))
assert observed_calls == METHOD_CALLS
assert observed_gets_and_sets == GETS_AND_SETS
# Check calls of recursive instrumentation
observed_gets_and_sets = set()
observed_calls = set()
for script_url, symbol, operation, value, pindex, pvalue in rows:
for script_url, symbol, operation, value, arguments in rows:
if not symbol.startswith('window.test2.nestedObj'):
continue
if operation == 'get' or operation == 'set':
observed_gets_and_sets.add((symbol, operation, value))
else:
observed_calls.add((symbol, operation, pindex, pvalue))
observed_calls.add((symbol, operation, arguments))
assert observed_calls == RECURSIVE_METHOD_CALLS
assert observed_gets_and_sets == RECURSIVE_GETS_AND_SETS
@ -113,7 +112,7 @@ class TestJSInstrument(OpenWPMTest):
# We should only see the window.test2.l1.l2.l3.l4.l5.prop access
# and not window.test2.l1.l2.l3.l4.l5.l6.prop access.
prop_access = set()
for script_url, symbol, operation, value, pindex, pvalue in rows:
for script_url, symbol, operation, value, arguments in rows:
if not symbol.startswith('window.test2.l1'):
continue
prop_access.add((symbol, operation, value))
@ -122,11 +121,11 @@ class TestJSInstrument(OpenWPMTest):
# Check calls of object with sets prevented
observed_gets_and_sets = set()
observed_calls = set()
for script_url, symbol, operation, value, pindex, pvalue in rows:
for script_url, symbol, operation, value, arguments in rows:
if not symbol.startswith('window.test3'):
continue
if operation == 'call':
observed_calls.add((symbol, operation, pindex, pvalue))
observed_calls.add((symbol, operation, arguments))
else:
observed_gets_and_sets.add((symbol, operation, value))
assert observed_calls == SET_PREVENT_CALLS