import boto3
import botocore
import json
import pandas as pd
import utils.load_data_util
# Pandas Display Settings to allow the dataframe to display in one view
pd.set_option('display.max_columns', 500)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.max_rows', 50000)
s3 = boto3.resource('s3')
# Helper function to trim the json files into a proper json format
def process_string(data):
return "[" + data[1:-1] + "]"
#Helper function to count the occurance of a given key
def count_key(data, key, key_value_count):
for site in data :
key_value = site[key]
key_value_count[key_value] = key_value_count.get(key_value, 0) + 1
result = utils.load_data_util.load_random_data(50)
unique_args = result.arguments.unique()
count = 0
with open("uniqueArgs.txt", "wb") as f:
for arg in unique_args:
count += 1
f.write((str(arg)+"\n").encode("utf-8"))
grouped_by_symbol = result.groupby(['symbol']).count()
grouped_by_symbol
|
arguments |
call_stack |
crawl_id |
file_number |
func_name |
in_iframe |
location |
operation |
script_col |
script_line |
script_loc_eval |
script_url |
time_stamp |
value |
symbol |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CanvasRenderingContext2D.fillRect |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
CanvasRenderingContext2D.fillStyle |
0 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
CanvasRenderingContext2D.textBaseline |
0 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
HTMLCanvasElement.getContext |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
HTMLCanvasElement.height |
0 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
HTMLCanvasElement.style |
0 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
HTMLCanvasElement.width |
0 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
RTCPeerConnection.iceGatheringState |
0 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
RTCPeerConnection.idpLoginUrl |
0 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
RTCPeerConnection.localDescription |
0 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
RTCPeerConnection.onicecandidate |
0 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
RTCPeerConnection.onremovestream |
0 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
RTCPeerConnection.peerIdentity |
0 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
RTCPeerConnection.remoteDescription |
0 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
RTCPeerConnection.signalingState |
0 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
window.Storage.getItem |
182 |
182 |
182 |
182 |
182 |
182 |
182 |
182 |
182 |
182 |
182 |
182 |
182 |
182 |
window.Storage.key |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
window.Storage.length |
0 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
window.Storage.removeItem |
35 |
35 |
35 |
35 |
35 |
35 |
35 |
35 |
35 |
35 |
35 |
35 |
35 |
35 |
window.Storage.setItem |
49 |
49 |
49 |
49 |
49 |
49 |
49 |
49 |
49 |
49 |
49 |
49 |
49 |
49 |
window.document.cookie |
0 |
479 |
479 |
479 |
479 |
479 |
479 |
479 |
479 |
479 |
479 |
479 |
479 |
479 |
window.localStorage |
0 |
94 |
94 |
94 |
94 |
94 |
94 |
94 |
94 |
94 |
94 |
94 |
94 |
94 |
window.name |
0 |
31 |
31 |
31 |
31 |
31 |
31 |
31 |
31 |
31 |
31 |
31 |
31 |
31 |
window.navigator.appCodeName |
0 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
window.navigator.appName |
0 |
20 |
20 |
20 |
20 |
20 |
20 |
20 |
20 |
20 |
20 |
20 |
20 |
20 |
window.navigator.appVersion |
0 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
window.navigator.cookieEnabled |
0 |
14 |
14 |
14 |
14 |
14 |
14 |
14 |
14 |
14 |
14 |
14 |
14 |
14 |
window.navigator.language |
0 |
21 |
21 |
21 |
21 |
21 |
21 |
21 |
21 |
21 |
21 |
21 |
21 |
21 |
window.navigator.mimeTypes[application/futuresplash].type |
0 |
4 |
4 |
4 |
4 |
4 |
4 |
4 |
4 |
4 |
4 |
4 |
4 |
4 |
window.navigator.mimeTypes[application/x-shockwave-flash].type |
0 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
window.navigator.onLine |
0 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
window.navigator.platform |
0 |
23 |
23 |
23 |
23 |
23 |
23 |
23 |
23 |
23 |
23 |
23 |
23 |
23 |
window.navigator.plugins[Shockwave Flash].description |
0 |
39 |
39 |
39 |
39 |
39 |
39 |
39 |
39 |
39 |
39 |
39 |
39 |
39 |
window.navigator.plugins[Shockwave Flash].filename |
0 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
window.navigator.plugins[Shockwave Flash].length |
0 |
9 |
9 |
9 |
9 |
9 |
9 |
9 |
9 |
9 |
9 |
9 |
9 |
9 |
window.navigator.plugins[Shockwave Flash].name |
0 |
10 |
10 |
10 |
10 |
10 |
10 |
10 |
10 |
10 |
10 |
10 |
10 |
10 |
window.navigator.plugins[Shockwave Flash].version |
0 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
window.navigator.product |
0 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
window.navigator.productSub |
0 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
window.navigator.userAgent |
0 |
258 |
258 |
258 |
258 |
258 |
258 |
258 |
258 |
258 |
258 |
258 |
258 |
258 |
window.navigator.vendor |
0 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
window.navigator.vendorSub |
0 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
window.screen.colorDepth |
0 |
22 |
22 |
22 |
22 |
22 |
22 |
22 |
22 |
22 |
22 |
22 |
22 |
22 |
window.screen.pixelDepth |
0 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
window.sessionStorage |
0 |
65 |
65 |
65 |
65 |
65 |
65 |
65 |
65 |
65 |
65 |
65 |
65 |
65 |
result.corr()
|
crawl_id |
file_number |
in_iframe |
crawl_id |
NaN |
NaN |
NaN |
file_number |
NaN |
1.000000 |
0.137485 |
in_iframe |
NaN |
0.137485 |
1.000000 |