зеркало из https://github.com/openwpm/OpenWPM.git
Save javascript function arguments as json instead of separate rows
This should help reduce the javascript table size by about 33% and will make it easier to filter rows by one argument. We choose to save the arguments as a json object instead of a strified array to make slicing easier, both in a pandas dataframe and with sqlite extensions like `json1`.
This commit is contained in:
Родитель
70bba2b350
Коммит
4f94f24ba1
|
@ -11,7 +11,6 @@ CREATE TABLE IF NOT EXISTS javascript(
|
|||
symbol TEXT,
|
||||
operation TEXT,
|
||||
value TEXT,
|
||||
parameter_index INTEGER,
|
||||
parameter_value TEXT,
|
||||
arguments TEXT,
|
||||
time_stamp TEXT NOT NULL
|
||||
);
|
||||
|
|
|
@ -5,46 +5,51 @@ var pageManager = require("./page-manager.js");
|
|||
|
||||
exports.run = function(crawlID, testing) {
|
||||
|
||||
// Set up tables
|
||||
var createJavascriptTable = data.load("create_javascript_table.sql");
|
||||
loggingDB.executeSQL(createJavascriptTable, false);
|
||||
// Set up tables
|
||||
var createJavascriptTable = data.load("create_javascript_table.sql");
|
||||
loggingDB.executeSQL(createJavascriptTable, false);
|
||||
|
||||
// Inject content script to instrument JavaScript API
|
||||
pageMod.PageMod({
|
||||
include: "*",
|
||||
contentScriptWhen: "start",
|
||||
contentScriptFile: data.url("./content.js"),
|
||||
contentScriptOptions: {
|
||||
'testing': testing
|
||||
},
|
||||
onAttach: function onAttach(worker) {
|
||||
var url = worker.url;
|
||||
function processCallsAndValues(data) {
|
||||
var update = {};
|
||||
update["crawl_id"] = crawlID;
|
||||
update["script_url"] = loggingDB.escapeString(data.scriptUrl);
|
||||
update["script_line"] = loggingDB.escapeString(data.scriptLine);
|
||||
update["script_col"] = loggingDB.escapeString(data.scriptCol);
|
||||
update["func_name"] = loggingDB.escapeString(data.funcName);
|
||||
update["script_loc_eval"] = loggingDB.escapeString(data.scriptLocEval);
|
||||
update["call_stack"] = loggingDB.escapeString(data.callStack);
|
||||
update["symbol"] = loggingDB.escapeString(data.symbol);
|
||||
update["operation"] = loggingDB.escapeString(data.operation);
|
||||
update["value"] = loggingDB.escapeString(data.value);
|
||||
update["time_stamp"] = data.timeStamp;
|
||||
// Inject content script to instrument JavaScript API
|
||||
pageMod.PageMod({
|
||||
include: "*",
|
||||
contentScriptWhen: "start",
|
||||
contentScriptFile: data.url("./content.js"),
|
||||
contentScriptOptions: {
|
||||
'testing': testing
|
||||
},
|
||||
onAttach: function onAttach(worker) {
|
||||
var url = worker.url;
|
||||
function processCallsAndValues(data) {
|
||||
var update = {};
|
||||
update["crawl_id"] = crawlID;
|
||||
update["script_url"] = loggingDB.escapeString(data.scriptUrl);
|
||||
update["script_line"] = loggingDB.escapeString(data.scriptLine);
|
||||
update["script_col"] = loggingDB.escapeString(data.scriptCol);
|
||||
update["func_name"] = loggingDB.escapeString(data.funcName);
|
||||
update["script_loc_eval"] = loggingDB.escapeString(data.scriptLocEval);
|
||||
update["call_stack"] = loggingDB.escapeString(data.callStack);
|
||||
update["symbol"] = loggingDB.escapeString(data.symbol);
|
||||
update["operation"] = loggingDB.escapeString(data.operation);
|
||||
update["value"] = loggingDB.escapeString(data.value);
|
||||
update["time_stamp"] = data.timeStamp;
|
||||
|
||||
if (data.operation == 'call' && data.args.length > 0) {
|
||||
for(var i = 0; i < data.args.length; i++) {
|
||||
update["parameter_index"] = i;
|
||||
update["parameter_value"] = loggingDB.escapeString(data.args[i]);
|
||||
loggingDB.executeSQL(loggingDB.createInsert("javascript", update), true);
|
||||
}
|
||||
} else {
|
||||
loggingDB.executeSQL(loggingDB.createInsert("javascript", update), true);
|
||||
}
|
||||
}
|
||||
worker.port.on("logCall", function(data){processCallsAndValues(data)});
|
||||
worker.port.on("logValue", function(data){processCallsAndValues(data)});
|
||||
// Create a json object for function arguments
|
||||
// We create an object that maps array positon to argument
|
||||
// e.g. someFunc('a',123,'b') --> {0: a, 1: 123, 2: 'b'}
|
||||
// to make it easier to query the data, using something like the
|
||||
// sqlite3 json1 extension.
|
||||
var args = {};
|
||||
if (data.operation == 'call' && data.args.length > 0) {
|
||||
for(var i = 0; i < data.args.length; i++) {
|
||||
args[i] = data.args[i]
|
||||
}
|
||||
update["arguments"] = loggingDB.escapeString(JSON.stringify(args));
|
||||
}
|
||||
});
|
||||
|
||||
loggingDB.executeSQL(loggingDB.createInsert("javascript", update), true);
|
||||
}
|
||||
worker.port.on("logCall", function(data){processCallsAndValues(data)});
|
||||
worker.port.on("logValue", function(data){processCallsAndValues(data)});
|
||||
}
|
||||
});
|
||||
};
|
||||
|
|
Двоичные данные
automation/Extension/firefox/openwpm.xpi
Двоичные данные
automation/Extension/firefox/openwpm.xpi
Двоичный файл не отображается.
|
@ -27,8 +27,8 @@ def get_javascript_content(data_directory):
|
|||
"""
|
||||
db_path = os.path.join(data_directory, 'javascript.ldb')
|
||||
db = plyvel.DB(db_path,
|
||||
create_if_missing = False,
|
||||
compression = 'snappy')
|
||||
create_if_missing=False,
|
||||
compression='snappy')
|
||||
for content_hash, content in db.iterator():
|
||||
yield content_hash, content
|
||||
db.close()
|
||||
|
@ -38,8 +38,7 @@ def get_javascript_entries(db, all_columns=False):
|
|||
if all_columns:
|
||||
select_columns = "*"
|
||||
else:
|
||||
select_columns = "script_url, symbol, operation, value, parameter_index,\
|
||||
parameter_value"
|
||||
select_columns = "script_url, symbol, operation, value, arguments"
|
||||
|
||||
return query_db(db, "SELECT %s FROM javascript" % select_columns)
|
||||
|
||||
|
|
|
@ -33,57 +33,41 @@ PROPERTIES = {
|
|||
CANVAS_TEST_URL = u"%s/canvas_fingerprinting.html" % utilities.BASE_TEST_URL
|
||||
|
||||
CANVAS_CALLS = {
|
||||
(CANVAS_TEST_URL,
|
||||
u"HTMLCanvasElement.getContext", u"call", u"", 0, u"2d"),
|
||||
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.textBaseline",
|
||||
u"set", u"top", None, None),
|
||||
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.font", u"set",
|
||||
u"14px 'Arial'", None, None),
|
||||
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.textBaseline",
|
||||
u"set", u"alphabetic", None, None),
|
||||
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillStyle",
|
||||
u"set", u"#f60", None, None),
|
||||
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillRect",
|
||||
u"call", u"", 0, u"125"),
|
||||
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillRect",
|
||||
u"call", u"", 1, u"1"),
|
||||
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillRect",
|
||||
u"call", u"", 2, u"62"),
|
||||
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillRect",
|
||||
u"call", u"", 3, u"20"),
|
||||
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillStyle",
|
||||
u"set", u"#069", None, None),
|
||||
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillText",
|
||||
u"call", u"", 0, u"BrowserLeaks,com <canvas> 1.0"),
|
||||
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillText",
|
||||
u"call", u"", 1, u"2"),
|
||||
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillText",
|
||||
u"call", u"", 2, u"15"),
|
||||
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillStyle",
|
||||
u"set", u"rgba(102, 204, 0, 0.7)", None, None),
|
||||
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillText",
|
||||
u"call", u"", 0, u"BrowserLeaks,com <canvas> 1.0"),
|
||||
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillText",
|
||||
u"call", u"", 1, u"4"),
|
||||
(CANVAS_TEST_URL, u"CanvasRenderingContext2D.fillText",
|
||||
u"call", u"", 2, u"17"),
|
||||
(CANVAS_TEST_URL, u"HTMLCanvasElement.toDataURL", u"call",
|
||||
u"", None, None)
|
||||
(CANVAS_TEST_URL, 'CanvasRenderingContext2D.fillStyle',
|
||||
'set', '#f60', None),
|
||||
(CANVAS_TEST_URL, 'CanvasRenderingContext2D.textBaseline', 'set',
|
||||
'alphabetic', None),
|
||||
(CANVAS_TEST_URL, 'CanvasRenderingContext2D.textBaseline', 'set',
|
||||
'top', None),
|
||||
(CANVAS_TEST_URL, 'CanvasRenderingContext2D.font', 'set',
|
||||
"14px 'Arial'", None),
|
||||
(CANVAS_TEST_URL, 'CanvasRenderingContext2D.fillStyle', 'set',
|
||||
'#069', None),
|
||||
(CANVAS_TEST_URL, 'CanvasRenderingContext2D.fillStyle', 'set',
|
||||
'rgba(102, 204, 0, 0.7)', None),
|
||||
(CANVAS_TEST_URL, 'HTMLCanvasElement.getContext', 'call',
|
||||
'', '{"0":"2d"}'),
|
||||
(CANVAS_TEST_URL, 'CanvasRenderingContext2D.fillRect', 'call',
|
||||
'', '{"0":125,"1":1,"2":62,"3":20}'),
|
||||
(CANVAS_TEST_URL, 'HTMLCanvasElement.toDataURL', 'call',
|
||||
'', None),
|
||||
(CANVAS_TEST_URL, 'CanvasRenderingContext2D.fillText', 'call',
|
||||
'', '{"0":"BrowserLeaks,com <canvas> 1.0","1":4,"2":17}'),
|
||||
(CANVAS_TEST_URL, 'CanvasRenderingContext2D.fillText', 'call',
|
||||
'', '{"0":"BrowserLeaks,com <canvas> 1.0","1":2,"2":15}')
|
||||
}
|
||||
|
||||
WEBRTC_TEST_URL = u"%s/webrtc_localip.html" % utilities.BASE_TEST_URL
|
||||
|
||||
WEBRTC_CALLS = {
|
||||
(WEBRTC_TEST_URL, u'RTCPeerConnection.createDataChannel',
|
||||
u'call', u'', 0, u''),
|
||||
(WEBRTC_TEST_URL, u'RTCPeerConnection.createDataChannel',
|
||||
u'call', u'', 1, u'{"reliable":false}'),
|
||||
(WEBRTC_TEST_URL, u'RTCPeerConnection.onicecandidate',
|
||||
u'set', u'FUNCTION', None, None),
|
||||
(WEBRTC_TEST_URL, u'RTCPeerConnection.createOffer',
|
||||
u'call', u'', 0, u'FUNCTION'),
|
||||
(WEBRTC_TEST_URL, u'RTCPeerConnection.createOffer',
|
||||
u'call', u'', 1, u'FUNCTION'),
|
||||
(WEBRTC_TEST_URL, 'RTCPeerConnection.createOffer', 'call',
|
||||
'', '{"0":"FUNCTION","1":"FUNCTION"}'),
|
||||
(WEBRTC_TEST_URL, 'RTCPeerConnection.createDataChannel', 'call',
|
||||
'', '{"0":""}'),
|
||||
(WEBRTC_TEST_URL, 'RTCPeerConnection.createDataChannel', 'call',
|
||||
'', '{"0":"","1":"{\\"reliable\\":false}"}'),
|
||||
(WEBRTC_TEST_URL, 'RTCPeerConnection.onicecandidate', 'set',
|
||||
'FUNCTION', None)
|
||||
}
|
||||
|
||||
# we expect these strings to be present in the WebRTC SDP
|
||||
|
@ -254,8 +238,8 @@ class TestExtension(OpenWPMTest):
|
|||
observed_rows = set()
|
||||
for item in rows:
|
||||
if (item[1] == "RTCPeerConnection.setLocalDescription" and
|
||||
item[2] == 'call' and item[4] == 0):
|
||||
sdp_offer = item[5]
|
||||
item[2] == 'call'):
|
||||
sdp_offer = item[4]
|
||||
self.check_webrtc_sdp_offer(sdp_offer)
|
||||
else:
|
||||
observed_rows.add(item)
|
||||
|
@ -290,7 +274,7 @@ class TestExtension(OpenWPMTest):
|
|||
rows = db_utils.get_javascript_entries(db, all_columns=True)
|
||||
assert len(rows) # make sure we have some JS events captured
|
||||
for row in rows:
|
||||
js_time = datetime.strptime(row[14], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
js_time = datetime.strptime(row[13], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
# compare UTC now and the timestamp recorded at the visit
|
||||
assert (utc_now - js_time).seconds < MAX_TIMEDELTA
|
||||
assert not db_utils.any_command_failed(db)
|
||||
|
|
|
@ -23,10 +23,9 @@ GETS_AND_SETS = {
|
|||
}
|
||||
|
||||
METHOD_CALLS = {
|
||||
("window.test.method1", "call", 0, "hello"),
|
||||
("window.test.method1", "call", 1, "{\"world\":true}"),
|
||||
("window.test.method1", "call", 0, "new argument"),
|
||||
("window.test.prop1", "call", 0, "now accepting arugments")
|
||||
('window.test.prop1', 'call', '{"0":"now accepting arugments"}'),
|
||||
('window.test.method1', 'call', '{"0":"hello","1":"{\\"world\\":true}"}'),
|
||||
('window.test.method1', 'call', '{"0":"new argument"}')
|
||||
}
|
||||
|
||||
RECURSIVE_GETS_AND_SETS = {
|
||||
|
@ -42,9 +41,9 @@ RECURSIVE_GETS_AND_SETS = {
|
|||
}
|
||||
|
||||
RECURSIVE_METHOD_CALLS = {
|
||||
("window.test2.nestedObj.method1", "call", 0, "arg-before"),
|
||||
("window.test2.nestedObj.method1", "call", 0, "arg-after"),
|
||||
("window.test2.nestedObj.doubleNested.method1", "call", 0, "blah")
|
||||
('window.test2.nestedObj.method1', 'call', '{"0":"arg-before"}'),
|
||||
('window.test2.nestedObj.method1', 'call', '{"0":"arg-after"}'),
|
||||
('window.test2.nestedObj.doubleNested.method1', 'call', '{"0":"blah"}')
|
||||
}
|
||||
|
||||
RECURSIVE_PROP_SET = {
|
||||
|
@ -53,8 +52,8 @@ RECURSIVE_PROP_SET = {
|
|||
}
|
||||
|
||||
SET_PREVENT_CALLS = {
|
||||
(u'window.test3.method1', u'call', None, None),
|
||||
('window.test3.obj1.method2', 'call', None, None)
|
||||
(u'window.test3.method1', u'call', None),
|
||||
('window.test3.obj1.method2', 'call', None)
|
||||
}
|
||||
|
||||
SET_PREVENT_GETS_AND_SETS = {
|
||||
|
@ -86,26 +85,26 @@ class TestJSInstrument(OpenWPMTest):
|
|||
# Check calls of non-recursive instrumentation
|
||||
observed_gets_and_sets = set()
|
||||
observed_calls = set()
|
||||
for script_url, symbol, operation, value, pindex, pvalue in rows:
|
||||
for script_url, symbol, operation, value, arguments in rows:
|
||||
if not symbol.startswith('window.test.'):
|
||||
continue
|
||||
if operation == 'get' or operation == 'set':
|
||||
observed_gets_and_sets.add((symbol, operation, value))
|
||||
else:
|
||||
observed_calls.add((symbol, operation, pindex, pvalue))
|
||||
observed_calls.add((symbol, operation, arguments))
|
||||
assert observed_calls == METHOD_CALLS
|
||||
assert observed_gets_and_sets == GETS_AND_SETS
|
||||
|
||||
# Check calls of recursive instrumentation
|
||||
observed_gets_and_sets = set()
|
||||
observed_calls = set()
|
||||
for script_url, symbol, operation, value, pindex, pvalue in rows:
|
||||
for script_url, symbol, operation, value, arguments in rows:
|
||||
if not symbol.startswith('window.test2.nestedObj'):
|
||||
continue
|
||||
if operation == 'get' or operation == 'set':
|
||||
observed_gets_and_sets.add((symbol, operation, value))
|
||||
else:
|
||||
observed_calls.add((symbol, operation, pindex, pvalue))
|
||||
observed_calls.add((symbol, operation, arguments))
|
||||
assert observed_calls == RECURSIVE_METHOD_CALLS
|
||||
assert observed_gets_and_sets == RECURSIVE_GETS_AND_SETS
|
||||
|
||||
|
@ -113,7 +112,7 @@ class TestJSInstrument(OpenWPMTest):
|
|||
# We should only see the window.test2.l1.l2.l3.l4.l5.prop access
|
||||
# and not window.test2.l1.l2.l3.l4.l5.l6.prop access.
|
||||
prop_access = set()
|
||||
for script_url, symbol, operation, value, pindex, pvalue in rows:
|
||||
for script_url, symbol, operation, value, arguments in rows:
|
||||
if not symbol.startswith('window.test2.l1'):
|
||||
continue
|
||||
prop_access.add((symbol, operation, value))
|
||||
|
@ -122,11 +121,11 @@ class TestJSInstrument(OpenWPMTest):
|
|||
# Check calls of object with sets prevented
|
||||
observed_gets_and_sets = set()
|
||||
observed_calls = set()
|
||||
for script_url, symbol, operation, value, pindex, pvalue in rows:
|
||||
for script_url, symbol, operation, value, arguments in rows:
|
||||
if not symbol.startswith('window.test3'):
|
||||
continue
|
||||
if operation == 'call':
|
||||
observed_calls.add((symbol, operation, pindex, pvalue))
|
||||
observed_calls.add((symbol, operation, arguments))
|
||||
else:
|
||||
observed_gets_and_sets.add((symbol, operation, value))
|
||||
assert observed_calls == SET_PREVENT_CALLS
|
||||
|
|
Загрузка…
Ссылка в новой задаче