UCOSP project fresh/historyless commit.
This commit is contained in:
Родитель
3cc44edbe6
Коммит
81e5cfba55
|
@ -0,0 +1,114 @@
|
|||
# misc OS crap
|
||||
.DS_Store
|
||||
|
||||
# large text files should be locally stored
|
||||
file_index.txt
|
||||
analyses/result.csv
|
||||
cache/
|
||||
/analyses/cache
|
||||
.ipynb_checkpoints/
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
env/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
.hypothesis/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# celery beat schedule file
|
||||
celerybeat-schedule
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# dotenv
|
||||
.env
|
||||
|
||||
# virtualenv
|
||||
.venv
|
||||
venv/
|
||||
ENV/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
|
||||
# cache
|
||||
cache/
|
|
@ -0,0 +1,2 @@
|
|||
# sb2018
|
||||
Safe Browsing Project - Contributions from the UCOSP 2018 winter semester cohort
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,883 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import json\n",
|
||||
"import sys\n",
|
||||
"sys.path.append('..')\n",
|
||||
"from utils.load_data_util import load_random_data\n",
|
||||
"\n",
|
||||
"result = load_random_data(10, seed=42)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>key</th>\n",
|
||||
" <th>value</th>\n",
|
||||
" <th>script_url</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>RNLBSERVERID</td>\n",
|
||||
" <td>ded6726</td>\n",
|
||||
" <td>https://syndication.exosrv.com/splash.php?idzo...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>_ga</td>\n",
|
||||
" <td>GA1.2.692713596.1513387628</td>\n",
|
||||
" <td>https://syndication.exosrv.com/splash.php?idzo...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>_gid</td>\n",
|
||||
" <td>GA1.2.1540566351.1513387628</td>\n",
|
||||
" <td>https://syndication.exosrv.com/splash.php?idzo...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>_gat</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>https://syndication.exosrv.com/splash.php?idzo...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>splash_i</td>\n",
|
||||
" <td>false</td>\n",
|
||||
" <td>https://syndication.exosrv.com/splash.php?idzo...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5</th>\n",
|
||||
" <td>expires</td>\n",
|
||||
" <td>Sun, 16 Dec 2018 01:27:12 GMT</td>\n",
|
||||
" <td>https://syndication.exosrv.com/splash.php?idzo...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>6</th>\n",
|
||||
" <td>path</td>\n",
|
||||
" <td>/</td>\n",
|
||||
" <td>https://syndication.exosrv.com/splash.php?idzo...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>7</th>\n",
|
||||
" <td>Adshow</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>8</th>\n",
|
||||
" <td>z_pro_city</td>\n",
|
||||
" <td>s_provice%3Dmixiegenzhou%26s_city%3Dnull</td>\n",
|
||||
" <td>http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>9</th>\n",
|
||||
" <td>userProvinceId</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>10</th>\n",
|
||||
" <td>userCityId</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>11</th>\n",
|
||||
" <td>userCountyId</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>12</th>\n",
|
||||
" <td>userLocationId</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>13</th>\n",
|
||||
" <td>ip_ck</td>\n",
|
||||
" <td>4c+H5PP1j7QuNjg2MzkyLjE1MTM0MTU0NzQ%3D</td>\n",
|
||||
" <td>http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>14</th>\n",
|
||||
" <td>lv</td>\n",
|
||||
" <td>1513415476</td>\n",
|
||||
" <td>http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>15</th>\n",
|
||||
" <td>vn</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>16</th>\n",
|
||||
" <td>Adshow</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>17</th>\n",
|
||||
" <td>z_pro_city</td>\n",
|
||||
" <td>s_provice%3Dmixiegenzhou%26s_city%3Dnull</td>\n",
|
||||
" <td>http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>18</th>\n",
|
||||
" <td>userProvinceId</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>19</th>\n",
|
||||
" <td>userCityId</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>20</th>\n",
|
||||
" <td>userCountyId</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>21</th>\n",
|
||||
" <td>userLocationId</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>22</th>\n",
|
||||
" <td>ip_ck</td>\n",
|
||||
" <td>4c+H5PP1j7QuNjg2MzkyLjE1MTM0MTU0NzQ%3D</td>\n",
|
||||
" <td>http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>23</th>\n",
|
||||
" <td>lv</td>\n",
|
||||
" <td>1513415476</td>\n",
|
||||
" <td>http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>24</th>\n",
|
||||
" <td>vn</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>25</th>\n",
|
||||
" <td>POSMEDIAID</td>\n",
|
||||
" <td>c8de8cfb85858ad6c30636190806b8fc9b43af469b42ff...</td>\n",
|
||||
" <td>http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>26</th>\n",
|
||||
" <td>path</td>\n",
|
||||
" <td>/</td>\n",
|
||||
" <td>http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>27</th>\n",
|
||||
" <td>expires</td>\n",
|
||||
" <td>Thu, 30 Nov 2090 18:22:56 GMT</td>\n",
|
||||
" <td>http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>28</th>\n",
|
||||
" <td>domain</td>\n",
|
||||
" <td>.ydjs.zol.com.cn</td>\n",
|
||||
" <td>http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>29</th>\n",
|
||||
" <td>sd</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>https://content.adriver.ru/banners/0002186/000...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4822</th>\n",
|
||||
" <td>__utmt</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4823</th>\n",
|
||||
" <td>__utma</td>\n",
|
||||
" <td>219845656.2053000654.1513465816.1513465816.151...</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4824</th>\n",
|
||||
" <td>__utmb</td>\n",
|
||||
" <td>219845656.1.10.1513465816</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4825</th>\n",
|
||||
" <td>__utmc</td>\n",
|
||||
" <td>219845656</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4826</th>\n",
|
||||
" <td>__utmz</td>\n",
|
||||
" <td>219845656.1513465816.1.1.utmcsr=(direct)|utmcc...</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4827</th>\n",
|
||||
" <td>__utmt</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4828</th>\n",
|
||||
" <td>__utma</td>\n",
|
||||
" <td>219845656.2053000654.1513465816.1513465816.151...</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4829</th>\n",
|
||||
" <td>__utmb</td>\n",
|
||||
" <td>219845656.1.10.1513465816</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4830</th>\n",
|
||||
" <td>__utmc</td>\n",
|
||||
" <td>219845656</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4831</th>\n",
|
||||
" <td>__utmz</td>\n",
|
||||
" <td>219845656.1513465816.1.1.utmcsr=(direct)|utmcc...</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4832</th>\n",
|
||||
" <td>__utmt</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4833</th>\n",
|
||||
" <td>__utma</td>\n",
|
||||
" <td>219845656.2053000654.1513465816.1513465816.151...</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4834</th>\n",
|
||||
" <td>path</td>\n",
|
||||
" <td>/</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4835</th>\n",
|
||||
" <td>expires</td>\n",
|
||||
" <td>Mon, 16 Dec 2019 23:10:15 GMT</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4836</th>\n",
|
||||
" <td>domain</td>\n",
|
||||
" <td>pepper.pr.co</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4837</th>\n",
|
||||
" <td>__utmb</td>\n",
|
||||
" <td>219845656.2.9.1513465816</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4838</th>\n",
|
||||
" <td>path</td>\n",
|
||||
" <td>/</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4839</th>\n",
|
||||
" <td>expires</td>\n",
|
||||
" <td>Sat, 16 Dec 2017 23:40:15 GMT</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4840</th>\n",
|
||||
" <td>domain</td>\n",
|
||||
" <td>pepper.pr.co</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4841</th>\n",
|
||||
" <td>__utmc</td>\n",
|
||||
" <td>219845656</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4842</th>\n",
|
||||
" <td>path</td>\n",
|
||||
" <td>/</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4843</th>\n",
|
||||
" <td>domain</td>\n",
|
||||
" <td>pepper.pr.co</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4844</th>\n",
|
||||
" <td>__utmz</td>\n",
|
||||
" <td>219845656.1513465816.1.1.utmcsr=(direct)|utmcc...</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4845</th>\n",
|
||||
" <td>path</td>\n",
|
||||
" <td>/</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4846</th>\n",
|
||||
" <td>expires</td>\n",
|
||||
" <td>Sun, 17 Jun 2018 11:10:15 GMT</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4847</th>\n",
|
||||
" <td>domain</td>\n",
|
||||
" <td>pepper.pr.co</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4848</th>\n",
|
||||
" <td>__utmv</td>\n",
|
||||
" <td></td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4849</th>\n",
|
||||
" <td>path</td>\n",
|
||||
" <td>/</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4850</th>\n",
|
||||
" <td>expires</td>\n",
|
||||
" <td>Sat, 16 Dec 2017 23:10:15 GMT</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4851</th>\n",
|
||||
" <td>domain</td>\n",
|
||||
" <td>pepper.pr.co</td>\n",
|
||||
" <td>http://stats.g.doubleclick.net/dc.js</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>4852 rows × 3 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" key value script_url\n",
|
||||
"0 RNLBSERVERID ded6726 https://syndication.exosrv.com/splash.php?idzo...\n",
|
||||
"1 _ga GA1.2.692713596.1513387628 https://syndication.exosrv.com/splash.php?idzo...\n",
|
||||
"2 _gid GA1.2.1540566351.1513387628 https://syndication.exosrv.com/splash.php?idzo...\n",
|
||||
"3 _gat 1 https://syndication.exosrv.com/splash.php?idzo...\n",
|
||||
"4 splash_i false https://syndication.exosrv.com/splash.php?idzo...\n",
|
||||
"5 expires Sun, 16 Dec 2018 01:27:12 GMT https://syndication.exosrv.com/splash.php?idzo...\n",
|
||||
"6 path / https://syndication.exosrv.com/splash.php?idzo...\n",
|
||||
"7 Adshow 1 http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...\n",
|
||||
"8 z_pro_city s_provice%3Dmixiegenzhou%26s_city%3Dnull http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...\n",
|
||||
"9 userProvinceId 1 http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...\n",
|
||||
"10 userCityId 0 http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...\n",
|
||||
"11 userCountyId 0 http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...\n",
|
||||
"12 userLocationId 1 http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...\n",
|
||||
"13 ip_ck 4c+H5PP1j7QuNjg2MzkyLjE1MTM0MTU0NzQ%3D http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...\n",
|
||||
"14 lv 1513415476 http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...\n",
|
||||
"15 vn 1 http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...\n",
|
||||
"16 Adshow 1 http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...\n",
|
||||
"17 z_pro_city s_provice%3Dmixiegenzhou%26s_city%3Dnull http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...\n",
|
||||
"18 userProvinceId 1 http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...\n",
|
||||
"19 userCityId 0 http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...\n",
|
||||
"20 userCountyId 0 http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...\n",
|
||||
"21 userLocationId 1 http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...\n",
|
||||
"22 ip_ck 4c+H5PP1j7QuNjg2MzkyLjE1MTM0MTU0NzQ%3D http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...\n",
|
||||
"23 lv 1513415476 http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...\n",
|
||||
"24 vn 1 http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...\n",
|
||||
"25 POSMEDIAID c8de8cfb85858ad6c30636190806b8fc9b43af469b42ff... http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...\n",
|
||||
"26 path / http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...\n",
|
||||
"27 expires Thu, 30 Nov 2090 18:22:56 GMT http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...\n",
|
||||
"28 domain .ydjs.zol.com.cn http://ydjs.zol.com.cn/m.html?mediaid=c8de8cfb...\n",
|
||||
"29 sd 1 https://content.adriver.ru/banners/0002186/000...\n",
|
||||
"... ... ... ...\n",
|
||||
"4822 __utmt 1 http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4823 __utma 219845656.2053000654.1513465816.1513465816.151... http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4824 __utmb 219845656.1.10.1513465816 http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4825 __utmc 219845656 http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4826 __utmz 219845656.1513465816.1.1.utmcsr=(direct)|utmcc... http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4827 __utmt 1 http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4828 __utma 219845656.2053000654.1513465816.1513465816.151... http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4829 __utmb 219845656.1.10.1513465816 http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4830 __utmc 219845656 http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4831 __utmz 219845656.1513465816.1.1.utmcsr=(direct)|utmcc... http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4832 __utmt 1 http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4833 __utma 219845656.2053000654.1513465816.1513465816.151... http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4834 path / http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4835 expires Mon, 16 Dec 2019 23:10:15 GMT http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4836 domain pepper.pr.co http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4837 __utmb 219845656.2.9.1513465816 http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4838 path / http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4839 expires Sat, 16 Dec 2017 23:40:15 GMT http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4840 domain pepper.pr.co http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4841 __utmc 219845656 http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4842 path / http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4843 domain pepper.pr.co http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4844 __utmz 219845656.1513465816.1.1.utmcsr=(direct)|utmcc... http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4845 path / http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4846 expires Sun, 17 Jun 2018 11:10:15 GMT http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4847 domain pepper.pr.co http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4848 __utmv http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4849 path / http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4850 expires Sat, 16 Dec 2017 23:10:15 GMT http://stats.g.doubleclick.net/dc.js\n",
|
||||
"4851 domain pepper.pr.co http://stats.g.doubleclick.net/dc.js\n",
|
||||
"\n",
|
||||
"[4852 rows x 3 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result_cookie = result.loc[(result['symbol'] == \"window.document.cookie\") & (result['value'].str.contains(\"=\"))]\n",
|
||||
"cookiedf = pd.DataFrame(columns=['key', 'value', 'script_url'])\n",
|
||||
"for i, row in result_cookie.iterrows():\n",
|
||||
" for kv in row['value'].split(\";\"):\n",
|
||||
" if \"=\" in kv:\n",
|
||||
" keyValueArr = kv.strip().split(\"=\", maxsplit=1)\n",
|
||||
" cookiedf = cookiedf.append({'key':keyValueArr[0], 'value':keyValueArr[1], 'script_url':row['script_url']}, ignore_index=True) \n",
|
||||
"\n",
|
||||
"cookiedf\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'json': {'i18nextLng': 'en-US'},\n",
|
||||
" 'script_url': 'https://syndication.exosrv.com/splash.php?idzone=1931806&type=3&sub=1'},\n",
|
||||
" {'json': {},\n",
|
||||
" 'script_url': 'http://d1m6l9dfulcyw7.cloudfront.net/uxa/a82093cd9cdbf.js'},\n",
|
||||
" {'json': {'CSLocalStorageTest': 'CSLocalStorageTest'},\n",
|
||||
" 'script_url': 'http://d1m6l9dfulcyw7.cloudfront.net/uxa/a82093cd9cdbf.js'},\n",
|
||||
" {'json': {},\n",
|
||||
" 'script_url': 'http://d1m6l9dfulcyw7.cloudfront.net/uxa/a82093cd9cdbf.js'},\n",
|
||||
" {'json': {},\n",
|
||||
" 'script_url': 'http://d1m6l9dfulcyw7.cloudfront.net/uxa/a82093cd9cdbf.js'},\n",
|
||||
" {'json': {'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'http://d1m6l9dfulcyw7.cloudfront.net/uxa/a82093cd9cdbf.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '0',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'https://js.datadome.co/tags.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '0',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'http://d1m6l9dfulcyw7.cloudfront.net/uxa/a82093cd9cdbf.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'http://d1m6l9dfulcyw7.cloudfront.net/uxa/a82093cd9cdbf.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'http://d1m6l9dfulcyw7.cloudfront.net/uxa/a82093cd9cdbf.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'http://d1m6l9dfulcyw7.cloudfront.net/uxa/a82093cd9cdbf.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:CACHE_BUSTING': '\"263385\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'http://d1m6l9dfulcyw7.cloudfront.net/uxa/a82093cd9cdbf.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:CACHE_BUSTING': '\"263385\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:USER_ID': '\"d8151d4d-1a7c-a810-c2dd-5b1cc3a0f286\"',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'http://d1m6l9dfulcyw7.cloudfront.net/uxa/a82093cd9cdbf.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:CACHE_BUSTING': '\"263385\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SESSION_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:USER_ID': '\"d8151d4d-1a7c-a810-c2dd-5b1cc3a0f286\"',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'http://d1m6l9dfulcyw7.cloudfront.net/uxa/a82093cd9cdbf.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:CACHE_BUSTING': '\"263385\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_VISIT': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SESSION_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:USER_ID': '\"d8151d4d-1a7c-a810-c2dd-5b1cc3a0f286\"',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'http://d1m6l9dfulcyw7.cloudfront.net/uxa/a82093cd9cdbf.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:CACHE_BUSTING': '\"263385\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_VISIT': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SESSION_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:USER_ID': '\"d8151d4d-1a7c-a810-c2dd-5b1cc3a0f286\"',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'http://d1m6l9dfulcyw7.cloudfront.net/uxa/a82093cd9cdbf.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:CACHE_BUSTING': '\"263385\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_VISIT': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SESSION_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:USER_ID': '\"d8151d4d-1a7c-a810-c2dd-5b1cc3a0f286\"',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'http://d1m6l9dfulcyw7.cloudfront.net/uxa/a82093cd9cdbf.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:CACHE_BUSTING': '\"263385\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_VISIT': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PAGE_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SESSION_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:USER_ID': '\"d8151d4d-1a7c-a810-c2dd-5b1cc3a0f286\"',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'http://d1m6l9dfulcyw7.cloudfront.net/uxa/a82093cd9cdbf.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:CACHE_BUSTING': '\"263385\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_VISIT': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PAGE_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:RECORDING': '\"0\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SESSION_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:USER_ID': '\"d8151d4d-1a7c-a810-c2dd-5b1cc3a0f286\"',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'http://d1m6l9dfulcyw7.cloudfront.net/uxa/a82093cd9cdbf.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:CACHE_BUSTING': '\"263385\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_VISIT': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PAGE_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:RECORDING': '\"0\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SESSION_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:USER_ID': '\"d8151d4d-1a7c-a810-c2dd-5b1cc3a0f286\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:queues:events:0': '[0,3485,1366,697]',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'http://static.criteo.net/js/ld/ld.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:CACHE_BUSTING': '\"263385\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_VISIT': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PAGE_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:RECORDING': '\"0\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SESSION_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:USER_ID': '\"d8151d4d-1a7c-a810-c2dd-5b1cc3a0f286\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:queues:events:0': '[0,3485,1366,697]',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'http://static.criteo.net/js/ld/ld.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:CACHE_BUSTING': '\"263385\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_VISIT': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PAGE_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:RECORDING': '\"0\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SESSION_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:USER_ID': '\"d8151d4d-1a7c-a810-c2dd-5b1cc3a0f286\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:queues:events:0': '[0,3485,1366,697]',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'http://static.criteo.net/js/ld/ld.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:CACHE_BUSTING': '\"263385\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_VISIT': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PAGE_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:RECORDING': '\"0\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SESSION_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:USER_ID': '\"d8151d4d-1a7c-a810-c2dd-5b1cc3a0f286\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:queues:events:0': '[0,3485,1366,697]',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'http://static.criteo.net/js/ld/ld.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:CACHE_BUSTING': '\"263385\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_VISIT': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PAGE_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:RECORDING': '\"0\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SESSION_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:USER_ID': '\"d8151d4d-1a7c-a810-c2dd-5b1cc3a0f286\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:queues:events:0': '[0,3485,1366,697]',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'http://static.criteo.net/js/ld/ld.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:CACHE_BUSTING': '\"263385\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_VISIT': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PAGE_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:RECORDING': '\"0\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SESSION_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:USER_ID': '\"d8151d4d-1a7c-a810-c2dd-5b1cc3a0f286\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:queues:events:0': '[0,3485,1366,697]',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'http://static.criteo.net/js/ld/ld.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:CACHE_BUSTING': '\"263385\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_VISIT': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PAGE_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:RECORDING': '\"0\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SESSION_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:USER_ID': '\"d8151d4d-1a7c-a810-c2dd-5b1cc3a0f286\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:queues:events:0': '[0,3485,1366,697]',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'http://static.criteo.net/js/ld/ld.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:CACHE_BUSTING': '\"263385\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_VISIT': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PAGE_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:RECORDING': '\"0\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SESSION_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:USER_ID': '\"d8151d4d-1a7c-a810-c2dd-5b1cc3a0f286\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:queues:events:0': '[0,3485,1366,697]',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'http://static.criteo.net/js/ld/ld.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:CACHE_BUSTING': '\"263385\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_VISIT': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PAGE_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:RECORDING': '\"0\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SESSION_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:USER_ID': '\"d8151d4d-1a7c-a810-c2dd-5b1cc3a0f286\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:queues:events:0': '[0,3485,1366,697]',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'https://pmcdn.staticpmrk.com/rakuten-static-deliver/app/397.0.1/20170919/static/front/libraries/ral/ral-1.0.20.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:CACHE_BUSTING': '\"263385\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_VISIT': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PAGE_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:RECORDING': '\"0\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SESSION_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:USER_ID': '\"d8151d4d-1a7c-a810-c2dd-5b1cc3a0f286\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:queues:events:0': '[0,3485,1366,697]',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'https://pmcdn.staticpmrk.com/rakuten-static-deliver/app/397.0.1/20170919/static/front/libraries/ral/ral-1.0.20.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:CACHE_BUSTING': '\"263385\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_VISIT': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PAGE_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:RECORDING': '\"0\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SESSION_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:USER_ID': '\"d8151d4d-1a7c-a810-c2dd-5b1cc3a0f286\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:queues:events:0': '[0,3485,1366,697]',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'https://pmcdn.staticpmrk.com/rakuten-static-deliver/app/397.0.1/20170919/static/front/libraries/ral/ral-1.0.20.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:CACHE_BUSTING': '\"263385\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_VISIT': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PAGE_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:RECORDING': '\"0\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SESSION_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:USER_ID': '\"d8151d4d-1a7c-a810-c2dd-5b1cc3a0f286\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:queues:events:0': '[0,3485,1366,697]',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'https://pmcdn.staticpmrk.com/rakuten-static-deliver/app/397.0.1/20170919/static/front/libraries/ral/ral-1.0.20.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:CACHE_BUSTING': '\"263385\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_VISIT': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PAGE_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:RECORDING': '\"0\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SESSION_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:USER_ID': '\"d8151d4d-1a7c-a810-c2dd-5b1cc3a0f286\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:queues:events:0': '[0,3485,1366,697]',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071'},\n",
|
||||
" 'script_url': 'https://pmcdn.staticpmrk.com/rakuten-static-deliver/app/397.0.1/20170919/static/front/libraries/ral/ral-1.0.20.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:CACHE_BUSTING': '\"263385\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_VISIT': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PAGE_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:RECORDING': '\"0\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SESSION_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:USER_ID': '\"d8151d4d-1a7c-a810-c2dd-5b1cc3a0f286\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:queues:events:0': '[0,3485,1366,697]',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071',\n",
|
||||
" 'RALROFL': '{\"ltm\":\"2017-12-16 00:47:43\",\"acc\":10,\"cntln\":\"fr\",\"reqc\":\"success\",\"pgt\":\"nav\",\"pgn\":\"nav\",\"aid\":5,\"cp\":{\"rg\":null,\"buyer\":\"2\",\"rsp\":5,\"ekm\":\"\",\"usergenre\":null,\"usertrackinggroup\":null,\"club_status\":\"\"},\"pgl\":\"PC\",\"icategories\":\"jardin,mobilier-de-jardin\",\"cat1\":\"jardin\",\"cat2\":\"mobilier-de-jardin\",\"etype\":\"pv\",\"url\":\"http://www.priceminister.com/nav/jardin_mobilier-de-jardin/f2/Coffre+de+jardin\",\"tid\":\"9c355f85\",\"tzo\":0,\"res\":\"1366x768\",\"jav\":false,\"bln\":\"en-US\",\"ua\":\"Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0\",\"online\":true,\"ver\":\"1.0.20d\",\"rqtime\":1041,\"ldtime\":2446,\"astime\":4157,\"navtype\":0,\"ifr\":0,\"pgid\":\"d61245cc156d369b\",\"cks\":\"4fd32bc9-4a6d-4d63-88b6-bdefd08f3c22\"}'},\n",
|
||||
" 'script_url': 'https://pmcdn.staticpmrk.com/rakuten-static-deliver/app/397.0.1/20170919/static/front/libraries/ral/ral-1.0.20.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:CACHE_BUSTING': '\"263385\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_VISIT': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PAGE_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:RECORDING': '\"0\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SESSION_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:USER_ID': '\"d8151d4d-1a7c-a810-c2dd-5b1cc3a0f286\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:queues:events:0': '[0,3485,1366,697]',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071',\n",
|
||||
" 'RALROFL': '{\"ltm\":\"2017-12-16 00:47:43\",\"acc\":10,\"cntln\":\"fr\",\"reqc\":\"success\",\"pgt\":\"nav\",\"pgn\":\"nav\",\"aid\":5,\"cp\":{\"rg\":null,\"buyer\":\"2\",\"rsp\":5,\"ekm\":\"\",\"usergenre\":null,\"usertrackinggroup\":null,\"club_status\":\"\"},\"pgl\":\"PC\",\"icategories\":\"jardin,mobilier-de-jardin\",\"cat1\":\"jardin\",\"cat2\":\"mobilier-de-jardin\",\"etype\":\"pv\",\"url\":\"http://www.priceminister.com/nav/jardin_mobilier-de-jardin/f2/Coffre+de+jardin\",\"tid\":\"9c355f85\",\"tzo\":0,\"res\":\"1366x768\",\"jav\":false,\"bln\":\"en-US\",\"ua\":\"Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0\",\"online\":true,\"ver\":\"1.0.20d\",\"rqtime\":1041,\"ldtime\":2446,\"astime\":4157,\"navtype\":0,\"ifr\":0,\"pgid\":\"d61245cc156d369b\",\"cks\":\"4fd32bc9-4a6d-4d63-88b6-bdefd08f3c22\"}'},\n",
|
||||
" 'script_url': 'https://pmcdn.staticpmrk.com/rakuten-static-deliver/app/397.0.1/20170919/static/front/libraries/ral/ral-1.0.20.js'},\n",
|
||||
" {'json': {'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:CACHE_BUSTING': '\"263385\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_HIT_DATE': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:LAST_VISIT': '\"1513385259\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PAGE_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:PROJECT_ID': '269',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:RECORDING': '\"0\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SCROLL_RATE': '7',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:SESSION_NUMBER': '\"1\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:maps:eventsMetaData:USER_ID': '\"d8151d4d-1a7c-a810-c2dd-5b1cc3a0f286\"',\n",
|
||||
" 'CSStorageData:stores:412876:eventsStore:queues:events:0': '[0,3485,1366,697]',\n",
|
||||
" 'CSStorageData:timeStamp': '1513385259071',\n",
|
||||
" 'RALROFL': '{\"ltm\":\"2017-12-16 00:47:43\",\"acc\":10,\"cntln\":\"fr\",\"reqc\":\"success\",\"pgt\":\"nav\",\"pgn\":\"nav\",\"aid\":5,\"cp\":{\"rg\":null,\"buyer\":\"2\",\"rsp\":5,\"ekm\":\"\",\"usergenre\":null,\"usertrackinggroup\":null,\"club_status\":\"\"},\"pgl\":\"PC\",\"icategories\":\"jardin,mobilier-de-jardin\",\"cat1\":\"jardin\",\"cat2\":\"mobilier-de-jardin\",\"etype\":\"pv\",\"url\":\"http://www.priceminister.com/nav/jardin_mobilier-de-jardin/f2/Coffre+de+jardin\",\"tid\":\"9c355f85\",\"tzo\":0,\"res\":\"1366x768\",\"jav\":false,\"bln\":\"en-US\",\"ua\":\"Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0\",\"online\":true,\"ver\":\"1.0.20d\",\"rqtime\":1041,\"ldtime\":2446,\"astime\":4157,\"navtype\":0,\"ifr\":0,\"pgid\":\"d61245cc156d369b\",\"cks\":\"4fd32bc9-4a6d-4d63-88b6-bdefd08f3c22\"}'},\n",
|
||||
" 'script_url': 'https://pmcdn.staticpmrk.com/rakuten-static-deliver/app/397.0.1/20170919/static/front/libraries/ral/ral-1.0.20.js'},\n",
|
||||
" {'json': {'_at.cww': '{\"value\":true,\"expires\":1513386547738}',\n",
|
||||
" 'at-lojson-cache-xa-4d83f5dd760fecd5': '{\"config\":null,\"perConfig\":{}}',\n",
|
||||
" 'at-rand': '0.6129927146038678',\n",
|
||||
" 'google_experiment_mod': '570',\n",
|
||||
" 'google_pub_config': '{\"sraConfigs\":{\"2\":{\"sraTimeout\":60000},\"4\":{\"sraTimeout\":60000}}}'},\n",
|
||||
" 'script_url': 'https://s7.addthis.com/js/250/addthis_widget.js#pubid=xa-4d83f5dd760fecd5'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"test = result.loc[(result['symbol'] == \"window.localStorage\")]\n",
|
||||
"localStorage = []\n",
|
||||
"for i, row in test.iterrows():\n",
|
||||
" localStorage.append({'json': json.loads(row['value']), 'script_url':row['script_url']})\n",
|
||||
"\n",
|
||||
"localStorage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -0,0 +1,206 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# HTTPS and Mixed Content Vulnerability Analysis"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This notebook looks at 2 things from the crawl dataset:\n",
|
||||
"1. What percentage of websites use https.\n",
|
||||
"2. How many websites are using mixed content. \n",
|
||||
"\n",
|
||||
"Mixed content is when a HTTPS webpage loads resouces, such as javascript files, over an insecure HTTP connection."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 101,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.append('..')\n",
|
||||
"from utils import load_data_util"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 102,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"files_to_analyze = 10000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Download webcrawl data from S3 and build a dictionary with webpage urls as keys and HTTP / HTTPS information as values."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 104,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = load_data_util.load_random_data(files_to_analyze, False, 42, False)\n",
|
||||
"\n",
|
||||
"result = {}\n",
|
||||
"for index, row in data.iterrows():\n",
|
||||
" # get the url of the webpage that was being crawled and use that as a unique key.\n",
|
||||
" key = row['location']\n",
|
||||
" \n",
|
||||
" if key not in result:\n",
|
||||
" # check if the webpage is using https.\n",
|
||||
" is_https = False\n",
|
||||
" if key.split(\":\")[0] == \"https\":\n",
|
||||
" is_https = True\n",
|
||||
"\n",
|
||||
" result[key] = {\n",
|
||||
" \"is_https\": is_https,\n",
|
||||
" \"http_script_urls\": 0,\n",
|
||||
" \"https_script_urls\": 0\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" # record the number of javascript function calls for the webpage \n",
|
||||
" # whose script url is fetched using http or https.\n",
|
||||
" url_protocol = row[\"script_url\"].split(\"://\")[0]\n",
|
||||
" if url_protocol == \"http\":\n",
|
||||
" result[key]['http_script_urls'] += 1\n",
|
||||
" elif url_protocol == \"https\":\n",
|
||||
" result[key]['https_script_urls'] += 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Analyze the collected data to get:\n",
|
||||
"* A count of the number of websites that use https.\n",
|
||||
"* A list of websites that have mixed content."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 105,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"urls_of_websites_with_mixed_content_vulnerability = []\n",
|
||||
"number_of_https_websites = 0\n",
|
||||
"x = 0\n",
|
||||
"for key in result:\n",
|
||||
" if result[key]['is_https']:\n",
|
||||
" if result[key]['http_script_urls'] > 0:\n",
|
||||
" urls_of_websites_with_mixed_content_vulnerability.append(key)\n",
|
||||
" number_of_https_websites += 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Print out information for the total number of webpages that use HTTPS."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 106,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"69.79% (6979/10000) of websites use https.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"percent_of_websites_using_https = round(number_of_https_websites / files_to_analyze * 100, 4)\n",
|
||||
"print(\n",
|
||||
" str(percent_of_websites_using_https) + \"% (\" + \n",
|
||||
" str(number_of_https_websites) + \"/\" + str(files_to_analyze) + \n",
|
||||
" \") of websites use https.\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Print out information collected for the number of webpages that have mixed content."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 107,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.0% (0/10000) of websites have mixed content.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"number_of_websites_vulnerable = len(urls_of_websites_with_mixed_content_vulnerability)\n",
|
||||
"percent_of_websites_vulnerable = number_of_websites_vulnerable / files_to_analyze * 100\n",
|
||||
"print(\n",
|
||||
" str(percent_of_websites_vulnerable) + \"% (\" + \n",
|
||||
" str(number_of_websites_vulnerable) + \"/\" + str(files_to_analyze) + \n",
|
||||
" \") of websites have mixed content.\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"if number_of_websites_vulnerable > 0:\n",
|
||||
" print(\"The following websites have mixed content:\")\n",
|
||||
"for url in urls_of_websites_with_mixed_content_vulnerability:\n",
|
||||
" print(url)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"anaconda-cloud": {},
|
||||
"kernelspec": {
|
||||
"display_name": "Python [default]",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
|
@ -0,0 +1,104 @@
|
|||
|
||||
# HTTPS and Mixed Content Vulnerability Analysis
|
||||
|
||||
This notebook looks at 2 things from the crawl dataset:
|
||||
1. What percentage of websites use https.
|
||||
2. How many websites are using mixed content.
|
||||
|
||||
Mixed content is when a HTTPS webpage loads resouces, such as javascript files, over an insecure HTTP connection.
|
||||
|
||||
|
||||
```python
|
||||
import sys
|
||||
sys.path.append('..')
|
||||
from utils import load_data_util
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
files_to_analyze = 10000
|
||||
```
|
||||
|
||||
#### Download webcrawl data from S3 and build a dictionary with webpage urls as keys and HTTP / HTTPS information as values.
|
||||
|
||||
|
||||
```python
|
||||
data = load_data_util.load_random_data(files_to_analyze, False, 42, False)
|
||||
|
||||
result = {}
|
||||
for index, row in data.iterrows():
|
||||
# get the url of the webpage that was being crawled and use that as a unique key.
|
||||
key = row['location']
|
||||
|
||||
if key not in result:
|
||||
# check if the webpage is using https.
|
||||
is_https = False
|
||||
if key.split(":")[0] == "https":
|
||||
is_https = True
|
||||
|
||||
result[key] = {
|
||||
"is_https": is_https,
|
||||
"http_script_urls": 0,
|
||||
"https_script_urls": 0
|
||||
}
|
||||
|
||||
# record the number of javascript function calls for the webpage
|
||||
# whose script url is fetched using http or https.
|
||||
url_protocol = row["script_url"].split("://")[0]
|
||||
if url_protocol == "http":
|
||||
result[key]['http_script_urls'] += 1
|
||||
elif url_protocol == "https":
|
||||
result[key]['https_script_urls'] += 1
|
||||
```
|
||||
|
||||
### Analyze the collected data to get:
|
||||
* A count of the number of websites that use https.
|
||||
* A list of websites that have mixed content.
|
||||
|
||||
|
||||
```python
|
||||
urls_of_websites_with_mixed_content_vulnerability = []
|
||||
number_of_https_websites = 0
|
||||
x = 0
|
||||
for key in result:
|
||||
if result[key]['is_https']:
|
||||
if result[key]['http_script_urls'] > 0:
|
||||
urls_of_websites_with_mixed_content_vulnerability.append(key)
|
||||
number_of_https_websites += 1
|
||||
```
|
||||
|
||||
### Print out information for the total number of webpages that use HTTPS.
|
||||
|
||||
|
||||
```python
|
||||
percent_of_websites_using_https = round(number_of_https_websites / files_to_analyze * 100, 4)
|
||||
print(
|
||||
str(percent_of_websites_using_https) + "% (" +
|
||||
str(number_of_https_websites) + "/" + str(files_to_analyze) +
|
||||
") of websites use https."
|
||||
)
|
||||
```
|
||||
|
||||
69.79% (6979/10000) of websites use https.
|
||||
|
||||
|
||||
### Print out information collected for the number of webpages that have mixed content.
|
||||
|
||||
|
||||
```python
|
||||
number_of_websites_vulnerable = len(urls_of_websites_with_mixed_content_vulnerability)
|
||||
percent_of_websites_vulnerable = number_of_websites_vulnerable / files_to_analyze * 100
|
||||
print(
|
||||
str(percent_of_websites_vulnerable) + "% (" +
|
||||
str(number_of_websites_vulnerable) + "/" + str(files_to_analyze) +
|
||||
") of websites have mixed content."
|
||||
)
|
||||
|
||||
if number_of_websites_vulnerable > 0:
|
||||
print("The following websites have mixed content:")
|
||||
for url in urls_of_websites_with_mixed_content_vulnerability:
|
||||
print(url)
|
||||
```
|
||||
|
||||
0.0% (0/10000) of websites have mixed content.
|
||||
|
|
@ -0,0 +1,77 @@
|
|||
import pandas as pd
|
||||
|
||||
|
||||
def make_entity_list_df():
|
||||
"""Create df from disconnectme entity list."""
|
||||
frames = []
|
||||
entityList = pd.read_json(
|
||||
"https://raw.githubusercontent.com/disconnectme/disconnect-tracking-protection/master/services.json")
|
||||
|
||||
for category in entityList["categories"]: # Category eg. "Advertising"
|
||||
for entity in category: # Entity eg. {'reddit': {'http://www.reddit.com/': ['reddit.com']}}
|
||||
name = list(entity.keys())[0]
|
||||
url = list(entity[name].keys())[0]
|
||||
|
||||
# Rename key from original url to "resources" to collapse resulting df columns.
|
||||
entity[name]["resources"] = entity[name][url]
|
||||
del entity[name][url]
|
||||
|
||||
# Create df for each entity.
|
||||
frame = pd.DataFrame.from_dict(entity, orient="index")
|
||||
frames.append(frame)
|
||||
|
||||
result = pd.concat(frames)
|
||||
result["count"] = 0
|
||||
return result[["resources", "count"]]
|
||||
|
||||
|
||||
def sample_random_files(files):
|
||||
"""Produce statistics for subset of files
|
||||
Keyword arguments:
|
||||
files -- df of random files produced by load_random_data()
|
||||
|
||||
Output:
|
||||
df containing TDL's with count > 0, where count represents the number of times
|
||||
a script was called from a site belonging to that TDL
|
||||
"""
|
||||
|
||||
result = make_entity_list_df()
|
||||
|
||||
# Remove rows where one TDL calls the same script url (ie. only keep unique calls).
|
||||
uniquecalls = files.drop_duplicates(subset={'location', 'script_url'}, keep="last")
|
||||
|
||||
# Reset samplings statistics.
|
||||
result["count"] = 0
|
||||
result["calledFrom"] = [[]] * len(result)
|
||||
|
||||
# For each unique call, for every site owned by a domain on the entity list,
|
||||
# increment count for that domain if the call uses a script from a site owned by the domain.
|
||||
for i, rowCalls in uniquecalls.iterrows():
|
||||
match_found = False
|
||||
url = rowCalls["script_url"]
|
||||
if "//" in url:
|
||||
# Isolate the TLD+1 element by string matching between "//" and first occurrence of "/" element.
|
||||
url = url.split("//")[1].split("/")[0]
|
||||
else:
|
||||
print("irregular script_url: ", url)
|
||||
# TODO: This deeply nested loop can be made more efficient.
|
||||
# TODO: Add comments inside lop to explain the flow of execution.
|
||||
for j, rowResult in result.iterrows():
|
||||
if not match_found:
|
||||
for site in result["sites"][j]:
|
||||
if not match_found:
|
||||
if site in url:
|
||||
if not result.at[j, "calledFrom"]:
|
||||
result.at[j, "calledFrom"] = []
|
||||
result.at[j, "calledFrom"].append(rowCalls["location"])
|
||||
result.at[j, 'count'] += 1
|
||||
print("match found! ", "script_url: ", site, "calledFrom: ", url)
|
||||
match_found = True
|
||||
else:
|
||||
# Match found already.
|
||||
break
|
||||
|
||||
return result[result['count'] > 0]
|
||||
|
||||
|
||||
el = make_entity_list_df()
|
|
@ -0,0 +1,115 @@
|
|||
symbol,operation,N
|
||||
window.Storage.getItem,call,4254
|
||||
window.Storage.setItem,call,1698
|
||||
window.Storage.removeItem,call,1224
|
||||
CanvasRenderingContext2D.fillText,call,224
|
||||
HTMLCanvasElement.getContext,call,171
|
||||
window.Storage.key,call,110
|
||||
CanvasRenderingContext2D.measureText,call,60
|
||||
CanvasRenderingContext2D.createRadialGradient,call,59
|
||||
HTMLCanvasElement.toDataURL,call,51
|
||||
CanvasRenderingContext2D.fillRect,call,46
|
||||
window.Storage.hasOwnProperty,call,33
|
||||
CanvasRenderingContext2D.fill,call,31
|
||||
CanvasRenderingContext2D.arc,call,22
|
||||
CanvasRenderingContext2D.getImageData,call,20
|
||||
CanvasRenderingContext2D.createImageData,call,17
|
||||
CanvasRenderingContext2D.putImageData,call,17
|
||||
CanvasRenderingContext2D.bezierCurveTo,call,12
|
||||
CanvasRenderingContext2D.rect,call,10
|
||||
CanvasRenderingContext2D.stroke,call,9
|
||||
CanvasRenderingContext2D.createLinearGradient,call,9
|
||||
CanvasRenderingContext2D.save,call,8
|
||||
CanvasRenderingContext2D.restore,call,8
|
||||
HTMLCanvasElement.getAttribute,call,7
|
||||
HTMLCanvasElement.addEventListener,call,6
|
||||
RTCPeerConnection.createDataChannel,call,4
|
||||
RTCPeerConnection.createOffer,call,4
|
||||
RTCPeerConnection.setLocalDescription,call,4
|
||||
CanvasRenderingContext2D.isPointInPath,call,3
|
||||
CanvasRenderingContext2D.clip,call,2
|
||||
window.Storage.clear,call,1
|
||||
HTMLCanvasElement.getBoundingClientRect,call,1
|
||||
AudioContext.createOscillator,call,1
|
||||
window.document.cookie,get,13490
|
||||
window.navigator.userAgent,get,6738
|
||||
window.localStorage,get,4067
|
||||
window.sessionStorage,get,1481
|
||||
window.name,get,1224
|
||||
window.navigator.plugins[Shockwave Flash].description,get,852
|
||||
window.screen.colorDepth,get,721
|
||||
window.navigator.appName,get,588
|
||||
window.navigator.language,get,557
|
||||
window.navigator.platform,get,529
|
||||
window.navigator.plugins[Shockwave Flash].name,get,483
|
||||
window.navigator.cookieEnabled,get,352
|
||||
window.navigator.appVersion,get,283
|
||||
window.navigator.vendor,get,256
|
||||
HTMLCanvasElement.offsetWidth,get,173
|
||||
HTMLCanvasElement.offsetHeight,get,173
|
||||
HTMLCanvasElement.offsetTop,get,171
|
||||
HTMLCanvasElement.offsetLeft,get,171
|
||||
window.navigator.doNotTrack,get,153
|
||||
window.navigator.product,get,139
|
||||
window.navigator.plugins[Shockwave Flash].filename,get,135
|
||||
window.Storage.length,get,122
|
||||
window.navigator.mimeTypes[application/x-shockwave-flash].type,get,117
|
||||
window.navigator.languages,get,102
|
||||
window.screen.pixelDepth,get,85
|
||||
window.navigator.plugins[Shockwave Flash].version,get,75
|
||||
window.navigator.plugins[Shockwave Flash].length,get,69
|
||||
window.navigator.mimeTypes[application/futuresplash].type,get,66
|
||||
window.navigator.mimeTypes[application/x-shockwave-flash].suffixes,get,43
|
||||
window.navigator.mimeTypes[application/futuresplash].suffixes,get,43
|
||||
window.navigator.productSub,get,41
|
||||
window.navigator.mimeTypes[application/x-shockwave-flash].description,get,38
|
||||
window.navigator.mimeTypes[application/futuresplash].description,get,38
|
||||
window.navigator.oscpu,get,34
|
||||
window.navigator.onLine,get,32
|
||||
window.navigator.geolocation,get,32
|
||||
HTMLCanvasElement.style,get,31
|
||||
HTMLCanvasElement.height,get,31
|
||||
window.navigator.appCodeName,get,28
|
||||
HTMLCanvasElement.width,get,28
|
||||
window.navigator.buildID,get,20
|
||||
window.navigator.vendorSub,get,19
|
||||
RTCPeerConnection.localDescription,get,6
|
||||
RTCPeerConnection.remoteDescription,get,6
|
||||
RTCPeerConnection.signalingState,get,6
|
||||
RTCPeerConnection.iceGatheringState,get,6
|
||||
RTCPeerConnection.onicecandidate,get,6
|
||||
HTMLCanvasElement.nodeName,get,4
|
||||
RTCPeerConnection.idpLoginUrl,get,3
|
||||
RTCPeerConnection.peerIdentity,get,3
|
||||
RTCPeerConnection.onremovestream,get,3
|
||||
HTMLCanvasElement.nodeType,get,3
|
||||
HTMLCanvasElement.className,get,3
|
||||
HTMLCanvasElement.tagName,get,2
|
||||
HTMLCanvasElement.firstElementChild,get,2
|
||||
HTMLCanvasElement.firstChild,get,2
|
||||
HTMLCanvasElement.nextElementSibling,get,2
|
||||
OscillatorNode.frequency,get,2
|
||||
CanvasRenderingContext2D.globalCompositeOperation,get,1
|
||||
HTMLCanvasElement.parentNode,get,1
|
||||
HTMLCanvasElement.clientTop,get,1
|
||||
HTMLCanvasElement.clientLeft,get,1
|
||||
HTMLCanvasElement.localName,get,1
|
||||
HTMLCanvasElement.attributes,get,1
|
||||
HTMLCanvasElement.childNodes,get,1
|
||||
window.document.cookie,set,3653
|
||||
window.name,set,204
|
||||
CanvasRenderingContext2D.fillStyle,set,163
|
||||
CanvasRenderingContext2D.font,set,161
|
||||
HTMLCanvasElement.height,set,47
|
||||
HTMLCanvasElement.width,set,47
|
||||
CanvasRenderingContext2D.textBaseline,set,36
|
||||
CanvasRenderingContext2D.shadowColor,set,20
|
||||
CanvasRenderingContext2D.shadowBlur,set,20
|
||||
CanvasRenderingContext2D.shadowOffsetX,set,20
|
||||
CanvasRenderingContext2D.shadowOffsetY,set,20
|
||||
CanvasRenderingContext2D.strokeStyle,set,15
|
||||
CanvasRenderingContext2D.lineWidth,set,14
|
||||
CanvasRenderingContext2D.globalCompositeOperation,set,13
|
||||
RTCPeerConnection.onicecandidate,set,8
|
||||
HTMLCanvasElement.requestPointerLock,set,1
|
||||
CanvasRenderingContext2D.lineJoin,set,1
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,940 @@
|
|||
|
||||
|
||||
```python
|
||||
import boto3
|
||||
import botocore
|
||||
import json
|
||||
import pandas as pd
|
||||
import utils.load_data_util
|
||||
|
||||
# Pandas Display Settings to allow the dataframe to display in one view
|
||||
pd.set_option('display.max_columns', 500)
|
||||
pd.set_option('display.expand_frame_repr', False)
|
||||
pd.set_option('display.max_rows', 50000)
|
||||
s3 = boto3.resource('s3')
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
# Helper function to trim the json files into a proper json format
|
||||
def process_string(data):
|
||||
return "[" + data[1:-1] + "]"
|
||||
|
||||
#Helper function to count the occurance of a given key
|
||||
def count_key(data, key, key_value_count):
|
||||
for site in data :
|
||||
key_value = site[key]
|
||||
key_value_count[key_value] = key_value_count.get(key_value, 0) + 1
|
||||
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
result = utils.load_data_util.load_random_data(50)
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
unique_args = result.arguments.unique()
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
count = 0
|
||||
with open("uniqueArgs.txt", "wb") as f:
|
||||
for arg in unique_args:
|
||||
count += 1
|
||||
f.write((str(arg)+"\n").encode("utf-8"))
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
grouped_by_symbol = result.groupby(['symbol']).count()
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
grouped_by_symbol
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
<div>
|
||||
<style scoped>
|
||||
.dataframe tbody tr th:only-of-type {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.dataframe tbody tr th {
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
.dataframe thead th {
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
<th></th>
|
||||
<th>arguments</th>
|
||||
<th>call_stack</th>
|
||||
<th>crawl_id</th>
|
||||
<th>file_number</th>
|
||||
<th>func_name</th>
|
||||
<th>in_iframe</th>
|
||||
<th>location</th>
|
||||
<th>operation</th>
|
||||
<th>script_col</th>
|
||||
<th>script_line</th>
|
||||
<th>script_loc_eval</th>
|
||||
<th>script_url</th>
|
||||
<th>time_stamp</th>
|
||||
<th>value</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>symbol</th>
|
||||
<th></th>
|
||||
<th></th>
|
||||
<th></th>
|
||||
<th></th>
|
||||
<th></th>
|
||||
<th></th>
|
||||
<th></th>
|
||||
<th></th>
|
||||
<th></th>
|
||||
<th></th>
|
||||
<th></th>
|
||||
<th></th>
|
||||
<th></th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>CanvasRenderingContext2D.fillRect</th>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>CanvasRenderingContext2D.fillStyle</th>
|
||||
<td>0</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>CanvasRenderingContext2D.textBaseline</th>
|
||||
<td>0</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>HTMLCanvasElement.getContext</th>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>HTMLCanvasElement.height</th>
|
||||
<td>0</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>HTMLCanvasElement.style</th>
|
||||
<td>0</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>HTMLCanvasElement.width</th>
|
||||
<td>0</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>RTCPeerConnection.iceGatheringState</th>
|
||||
<td>0</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>RTCPeerConnection.idpLoginUrl</th>
|
||||
<td>0</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>RTCPeerConnection.localDescription</th>
|
||||
<td>0</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>RTCPeerConnection.onicecandidate</th>
|
||||
<td>0</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>RTCPeerConnection.onremovestream</th>
|
||||
<td>0</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>RTCPeerConnection.peerIdentity</th>
|
||||
<td>0</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>RTCPeerConnection.remoteDescription</th>
|
||||
<td>0</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>RTCPeerConnection.signalingState</th>
|
||||
<td>0</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.Storage.getItem</th>
|
||||
<td>182</td>
|
||||
<td>182</td>
|
||||
<td>182</td>
|
||||
<td>182</td>
|
||||
<td>182</td>
|
||||
<td>182</td>
|
||||
<td>182</td>
|
||||
<td>182</td>
|
||||
<td>182</td>
|
||||
<td>182</td>
|
||||
<td>182</td>
|
||||
<td>182</td>
|
||||
<td>182</td>
|
||||
<td>182</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.Storage.key</th>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.Storage.length</th>
|
||||
<td>0</td>
|
||||
<td>5</td>
|
||||
<td>5</td>
|
||||
<td>5</td>
|
||||
<td>5</td>
|
||||
<td>5</td>
|
||||
<td>5</td>
|
||||
<td>5</td>
|
||||
<td>5</td>
|
||||
<td>5</td>
|
||||
<td>5</td>
|
||||
<td>5</td>
|
||||
<td>5</td>
|
||||
<td>5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.Storage.removeItem</th>
|
||||
<td>35</td>
|
||||
<td>35</td>
|
||||
<td>35</td>
|
||||
<td>35</td>
|
||||
<td>35</td>
|
||||
<td>35</td>
|
||||
<td>35</td>
|
||||
<td>35</td>
|
||||
<td>35</td>
|
||||
<td>35</td>
|
||||
<td>35</td>
|
||||
<td>35</td>
|
||||
<td>35</td>
|
||||
<td>35</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.Storage.setItem</th>
|
||||
<td>49</td>
|
||||
<td>49</td>
|
||||
<td>49</td>
|
||||
<td>49</td>
|
||||
<td>49</td>
|
||||
<td>49</td>
|
||||
<td>49</td>
|
||||
<td>49</td>
|
||||
<td>49</td>
|
||||
<td>49</td>
|
||||
<td>49</td>
|
||||
<td>49</td>
|
||||
<td>49</td>
|
||||
<td>49</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.document.cookie</th>
|
||||
<td>0</td>
|
||||
<td>479</td>
|
||||
<td>479</td>
|
||||
<td>479</td>
|
||||
<td>479</td>
|
||||
<td>479</td>
|
||||
<td>479</td>
|
||||
<td>479</td>
|
||||
<td>479</td>
|
||||
<td>479</td>
|
||||
<td>479</td>
|
||||
<td>479</td>
|
||||
<td>479</td>
|
||||
<td>479</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.localStorage</th>
|
||||
<td>0</td>
|
||||
<td>94</td>
|
||||
<td>94</td>
|
||||
<td>94</td>
|
||||
<td>94</td>
|
||||
<td>94</td>
|
||||
<td>94</td>
|
||||
<td>94</td>
|
||||
<td>94</td>
|
||||
<td>94</td>
|
||||
<td>94</td>
|
||||
<td>94</td>
|
||||
<td>94</td>
|
||||
<td>94</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.name</th>
|
||||
<td>0</td>
|
||||
<td>31</td>
|
||||
<td>31</td>
|
||||
<td>31</td>
|
||||
<td>31</td>
|
||||
<td>31</td>
|
||||
<td>31</td>
|
||||
<td>31</td>
|
||||
<td>31</td>
|
||||
<td>31</td>
|
||||
<td>31</td>
|
||||
<td>31</td>
|
||||
<td>31</td>
|
||||
<td>31</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.navigator.appCodeName</th>
|
||||
<td>0</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.navigator.appName</th>
|
||||
<td>0</td>
|
||||
<td>20</td>
|
||||
<td>20</td>
|
||||
<td>20</td>
|
||||
<td>20</td>
|
||||
<td>20</td>
|
||||
<td>20</td>
|
||||
<td>20</td>
|
||||
<td>20</td>
|
||||
<td>20</td>
|
||||
<td>20</td>
|
||||
<td>20</td>
|
||||
<td>20</td>
|
||||
<td>20</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.navigator.appVersion</th>
|
||||
<td>0</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.navigator.cookieEnabled</th>
|
||||
<td>0</td>
|
||||
<td>14</td>
|
||||
<td>14</td>
|
||||
<td>14</td>
|
||||
<td>14</td>
|
||||
<td>14</td>
|
||||
<td>14</td>
|
||||
<td>14</td>
|
||||
<td>14</td>
|
||||
<td>14</td>
|
||||
<td>14</td>
|
||||
<td>14</td>
|
||||
<td>14</td>
|
||||
<td>14</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.navigator.language</th>
|
||||
<td>0</td>
|
||||
<td>21</td>
|
||||
<td>21</td>
|
||||
<td>21</td>
|
||||
<td>21</td>
|
||||
<td>21</td>
|
||||
<td>21</td>
|
||||
<td>21</td>
|
||||
<td>21</td>
|
||||
<td>21</td>
|
||||
<td>21</td>
|
||||
<td>21</td>
|
||||
<td>21</td>
|
||||
<td>21</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.navigator.mimeTypes[application/futuresplash].type</th>
|
||||
<td>0</td>
|
||||
<td>4</td>
|
||||
<td>4</td>
|
||||
<td>4</td>
|
||||
<td>4</td>
|
||||
<td>4</td>
|
||||
<td>4</td>
|
||||
<td>4</td>
|
||||
<td>4</td>
|
||||
<td>4</td>
|
||||
<td>4</td>
|
||||
<td>4</td>
|
||||
<td>4</td>
|
||||
<td>4</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.navigator.mimeTypes[application/x-shockwave-flash].type</th>
|
||||
<td>0</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
<td>3</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.navigator.onLine</th>
|
||||
<td>0</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.navigator.platform</th>
|
||||
<td>0</td>
|
||||
<td>23</td>
|
||||
<td>23</td>
|
||||
<td>23</td>
|
||||
<td>23</td>
|
||||
<td>23</td>
|
||||
<td>23</td>
|
||||
<td>23</td>
|
||||
<td>23</td>
|
||||
<td>23</td>
|
||||
<td>23</td>
|
||||
<td>23</td>
|
||||
<td>23</td>
|
||||
<td>23</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.navigator.plugins[Shockwave Flash].description</th>
|
||||
<td>0</td>
|
||||
<td>39</td>
|
||||
<td>39</td>
|
||||
<td>39</td>
|
||||
<td>39</td>
|
||||
<td>39</td>
|
||||
<td>39</td>
|
||||
<td>39</td>
|
||||
<td>39</td>
|
||||
<td>39</td>
|
||||
<td>39</td>
|
||||
<td>39</td>
|
||||
<td>39</td>
|
||||
<td>39</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.navigator.plugins[Shockwave Flash].filename</th>
|
||||
<td>0</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.navigator.plugins[Shockwave Flash].length</th>
|
||||
<td>0</td>
|
||||
<td>9</td>
|
||||
<td>9</td>
|
||||
<td>9</td>
|
||||
<td>9</td>
|
||||
<td>9</td>
|
||||
<td>9</td>
|
||||
<td>9</td>
|
||||
<td>9</td>
|
||||
<td>9</td>
|
||||
<td>9</td>
|
||||
<td>9</td>
|
||||
<td>9</td>
|
||||
<td>9</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.navigator.plugins[Shockwave Flash].name</th>
|
||||
<td>0</td>
|
||||
<td>10</td>
|
||||
<td>10</td>
|
||||
<td>10</td>
|
||||
<td>10</td>
|
||||
<td>10</td>
|
||||
<td>10</td>
|
||||
<td>10</td>
|
||||
<td>10</td>
|
||||
<td>10</td>
|
||||
<td>10</td>
|
||||
<td>10</td>
|
||||
<td>10</td>
|
||||
<td>10</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.navigator.plugins[Shockwave Flash].version</th>
|
||||
<td>0</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.navigator.product</th>
|
||||
<td>0</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.navigator.productSub</th>
|
||||
<td>0</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
<td>2</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.navigator.userAgent</th>
|
||||
<td>0</td>
|
||||
<td>258</td>
|
||||
<td>258</td>
|
||||
<td>258</td>
|
||||
<td>258</td>
|
||||
<td>258</td>
|
||||
<td>258</td>
|
||||
<td>258</td>
|
||||
<td>258</td>
|
||||
<td>258</td>
|
||||
<td>258</td>
|
||||
<td>258</td>
|
||||
<td>258</td>
|
||||
<td>258</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.navigator.vendor</th>
|
||||
<td>0</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
<td>7</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.navigator.vendorSub</th>
|
||||
<td>0</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
<td>1</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.screen.colorDepth</th>
|
||||
<td>0</td>
|
||||
<td>22</td>
|
||||
<td>22</td>
|
||||
<td>22</td>
|
||||
<td>22</td>
|
||||
<td>22</td>
|
||||
<td>22</td>
|
||||
<td>22</td>
|
||||
<td>22</td>
|
||||
<td>22</td>
|
||||
<td>22</td>
|
||||
<td>22</td>
|
||||
<td>22</td>
|
||||
<td>22</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.screen.pixelDepth</th>
|
||||
<td>0</td>
|
||||
<td>5</td>
|
||||
<td>5</td>
|
||||
<td>5</td>
|
||||
<td>5</td>
|
||||
<td>5</td>
|
||||
<td>5</td>
|
||||
<td>5</td>
|
||||
<td>5</td>
|
||||
<td>5</td>
|
||||
<td>5</td>
|
||||
<td>5</td>
|
||||
<td>5</td>
|
||||
<td>5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>window.sessionStorage</th>
|
||||
<td>0</td>
|
||||
<td>65</td>
|
||||
<td>65</td>
|
||||
<td>65</td>
|
||||
<td>65</td>
|
||||
<td>65</td>
|
||||
<td>65</td>
|
||||
<td>65</td>
|
||||
<td>65</td>
|
||||
<td>65</td>
|
||||
<td>65</td>
|
||||
<td>65</td>
|
||||
<td>65</td>
|
||||
<td>65</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
```python
|
||||
result.corr()
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
<div>
|
||||
<style scoped>
|
||||
.dataframe tbody tr th:only-of-type {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.dataframe tbody tr th {
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
.dataframe thead th {
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
<th></th>
|
||||
<th>crawl_id</th>
|
||||
<th>file_number</th>
|
||||
<th>in_iframe</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>crawl_id</th>
|
||||
<td>NaN</td>
|
||||
<td>NaN</td>
|
||||
<td>NaN</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>file_number</th>
|
||||
<td>NaN</td>
|
||||
<td>1.000000</td>
|
||||
<td>0.137485</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>in_iframe</th>
|
||||
<td>NaN</td>
|
||||
<td>0.137485</td>
|
||||
<td>1.000000</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,116 @@
|
|||
* __call_stack:__
|
||||
* __Type:__ String
|
||||
* __Description:__ The call stack at the point when the function is called. The output is in the format: (function_name)(@)(javascript_source_file)(:)(line_number)(column_number)(new_line_character)
|
||||
* __Example:__
|
||||
```
|
||||
jQuery.cookie@https://cdn.livechatinc.com/js/embedded.20171215135707.js:5:8393\nStore</s.get@https://cdn.livechatinc.com/js/embedded.20171215135707.js:8:3323\nStore</</s[p]@https://cdn.livechatinc.com/js/embedded.20171215135707.js:8:3746\nWindowsCommunicator.prototype.startCheckingForMainWindow/e<@https://cdn.livechatinc.com/js/embedded.20171215135707.js:10:11730
|
||||
```
|
||||
* __crawl_id:__
|
||||
* __Type:__ Integer
|
||||
* __Description:__ Crawl_id appears to be the value 1 for all json files. It is possible this field was not used when generating the data using the crawler.
|
||||
* __Example:__ 1
|
||||
* __func_name:__
|
||||
* __Type:__ String
|
||||
* __Description:__ The name of the javascript function. Due to obfuscation the functions are often nonsensical and thus can be thought of as tokens. Anonymous functions will not have a name and the value will be an empty string.
|
||||
* __Examples:__
|
||||
```
|
||||
""
|
||||
a<4k
|
||||
getName
|
||||
```
|
||||
* __in_iframe:__
|
||||
* __Type:__ boolean
|
||||
* __Description:__ in_iframe is a boolean that indicates that the javascript code was run inside of an iframe. This is new functionality that was added ontop of the origional OpenWPM repository.
|
||||
* __location:__
|
||||
* __Type:__ string
|
||||
* __Description:__ The url of the file that was being crawled to generate the json file. All objects in a json file should have the same location value. The url can be for any type of file such as .html, .js or have no file extension.
|
||||
* __Examples:__
|
||||
```
|
||||
https://www.dresslily.com/bottom-c-36.html
|
||||
http://www.vidalfrance.com/component/forme/?fid=2
|
||||
```
|
||||
* __operation:__
|
||||
* __Type:__ string
|
||||
* __Description:__ Corresponds to the "symbol" field. Operation is a call if the symbol is a method. Get/set operations get and set symbols that are properties with values.
|
||||
* __Possible Values:__ get, call, set
|
||||
* __script_col:__
|
||||
* __Type:__ string
|
||||
* __Description:__ The column in the `script_line` where the function call starts. Note: currently some string do not contain numbers, but instead they contain urls such as the example bellow.
|
||||
* __Examples:__
|
||||
```
|
||||
57
|
||||
211
|
||||
//hdjs.hiido.com/hiido_internal.js?siteid=mhssj
|
||||
```
|
||||
* __script_line:__
|
||||
* __Type:__ string
|
||||
* __Description:__ The line in the file, indicated in the above `location` element, where the function call is located. Note: Currently some strings do not contain numbers, but instead they contain the protocol identifier for a url, such as in the example bellow.
|
||||
* __Examples:__
|
||||
```
|
||||
12
|
||||
129
|
||||
http
|
||||
https
|
||||
```
|
||||
* __script_loc_eval:__
|
||||
* __Type:__ string
|
||||
* __Description:__ If a function call is generated using the `eval()` function, or is created using `new Function()`, then the "script_loc_eval" value will be set. For example `eval("console.log('my message')")` or `var log = new Function("message", "console.log(message)"); log("my message");` will both cause the "script_loc_evel" value be set when the function calls were collected. The format of "scipt_loc_eval" is: (line) (LINE_NUMBER) (>) (eval | Function) and can be repeated multiple times. Additional information on how the eval line number is generated can be found at the bottom of the [MDN page](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Error/Stack) which discusses the `Error` objects `stack` property. The "script_loc_eval" element is generated from this stack property.
|
||||
* __Examples:__
|
||||
```
|
||||
""
|
||||
line 2 > eval
|
||||
line 70 > Function
|
||||
line 140 > eval line 232 > Function
|
||||
line 1 > Function line 1 > eval line 1 > eval
|
||||
```
|
||||
* __script_url:__
|
||||
* __Type:__ string
|
||||
* __Description:__ The url of the file where the javascript function call was run. This may be the same value at "location", or it may be an external web url that was loaded into the website with the use of the `<script>` tag.
|
||||
* __Examples:__
|
||||
```
|
||||
http://www.google-analytics.com/analytics.js
|
||||
http://ajax.googleapis.com/ajax/libs/jquery/1.6/jquery.min.js
|
||||
http://pw.myersinfosys.com/javascripts/jquery-cookie.js?rwdv2
|
||||
https://g.alicdn.com/alilog/oneplus/blk.html#coid=52m7EjiWaj8CASPiP1nwaYXC&noid=&grd=n
|
||||
```
|
||||
* __symbol:__
|
||||
* __Type:__ string
|
||||
* __Description:__ Either a Web API interface property (with a value) or method (which may take args as listed in "arguments" field). Symbol corresponds to "operation" field.
|
||||
* __Examples:__
|
||||
```
|
||||
window.Storage.getItem
|
||||
window.navigator.userAgent
|
||||
CanvasRenderingContext2D.textBaseline
|
||||
```
|
||||
* __time_stamp:__
|
||||
* __Type:__ string
|
||||
* __Description:__ The timestamp of when the javascript function information was collected. The timestamp is collected using Javascripts Date.now() function. It is in the format YYYY-MM-DDTHH:mm:ss.sssZ.
|
||||
* YYYY-MM-DD is the: year-month-day.
|
||||
* "T" is a delimiter to seperate the two sections.
|
||||
* HH:mm:ss.sss represents the: hours, minutes, seconds, and milliseconds.
|
||||
* Z is optional and denotes the time zone. Z represents the time zone UTC+0.
|
||||
* __Examples:__
|
||||
```
|
||||
2017-12-16T00:17:37.973Z
|
||||
2017-12-16T00:24:09.355Z
|
||||
2017-12-16T08:10:24.749Z
|
||||
```
|
||||
* __value:__
|
||||
* __Type:__ string
|
||||
* __Description:__ The value that the function returned.
|
||||
* __Examples:__
|
||||
```
|
||||
""
|
||||
{}
|
||||
Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0
|
||||
\_ga=GA1.2.1076416180.1513383458; \_gid=GA1.2.1940452730.1513383458
|
||||
{"name": "example", "Browser": "Mozilla/5.0"}
|
||||
```
|
||||
* __arguments:__
|
||||
* __Type:__ object
|
||||
* __Description:__ Optional property which lists the arguments taken by the method in "symbol" field.
|
||||
* __Examples:__
|
||||
```
|
||||
{\"0\":\"liveAgentPc\"}
|
||||
{\"0\":\"liveAgentPage_0\",\"1\":\"http://www.alamy.com/help/what-is-model-release-property-release.aspx\"}
|
||||
```
|
|
@ -0,0 +1,191 @@
|
|||
import boto3
|
||||
import botocore
|
||||
import json
|
||||
import pandas as pd
|
||||
import requests
|
||||
import random
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
BUCKET_NAME = "safe-ucosp-2017"
|
||||
|
||||
pd.set_option('display.max_columns', 500)
|
||||
pd.set_option('display.expand_frame_repr', False)
|
||||
s3 = boto3.resource('s3')
|
||||
bucket = s3.Bucket(BUCKET_NAME)
|
||||
|
||||
this_files_directory = os.path.dirname(os.path.realpath(__file__))
|
||||
project_root_directory = os.path.join(this_files_directory, "..")
|
||||
cache_file_directory = os.path.join(project_root_directory, "cache")
|
||||
if not os.path.exists(cache_file_directory):
|
||||
os.makedirs(cache_file_directory)
|
||||
|
||||
def load_data(number_of_files, to_output_csv = True, cache_s3_data = False):
|
||||
"""Load the files from the beginning of the bucket
|
||||
Keyword arguments:
|
||||
number_of_files -- number of files to load
|
||||
to_output_csv -- boolean to save the result of the data into a csv file called result.csv (default True)
|
||||
cache_s3_data -- boolean to cache each file once downloaded in a json file to speed up future data loads. (default False)
|
||||
"""
|
||||
file_number = 0
|
||||
frames = []
|
||||
|
||||
#Take the first number_of_files and transform them into dataframes
|
||||
for bucket_data_object in bucket.objects.limit(number_of_files):
|
||||
file_number += 1
|
||||
data_frame = load_data_to_dataframe(bucket_data_object, file_number, cache_s3_data)
|
||||
frames.append(data_frame)
|
||||
|
||||
#Concat DataFrames generated from each file into a large DataFrame
|
||||
result = pd.concat(frames)
|
||||
|
||||
#Output the results to a csv if desired
|
||||
if (to_output_csv):
|
||||
result.to_csv('result.csv', header=True, index=False, encoding='utf-8')
|
||||
return result
|
||||
|
||||
|
||||
def transform_into_dataframe(data, file_number):
|
||||
"""Process the data, read it into a pandas DataFrame and add a column for file number
|
||||
Keyword arguments:
|
||||
data -- the string containing the file data
|
||||
file_number -- the file number the data came from
|
||||
"""
|
||||
frame = pd.read_json(data)
|
||||
frame['file_number'] = file_number
|
||||
return frame
|
||||
|
||||
|
||||
def process_string(data):
|
||||
return "[" + data[1:-1] + "]"
|
||||
|
||||
|
||||
def create_file_index():
|
||||
"""Function used to create the file index file"""
|
||||
count = 0
|
||||
with open("file_index.json", "a+") as f:
|
||||
for key in bucket.objects.all():
|
||||
count += 1
|
||||
f.write(key.key + "\n")
|
||||
if (count%1000 == 0):
|
||||
print(count)
|
||||
|
||||
|
||||
def validate_file_fetch():
|
||||
"""Function used to validate the file index has unique entries"""
|
||||
with open("file_index.json") as f:
|
||||
index_list = f.readlines()
|
||||
index_set = set(index_list)
|
||||
|
||||
return len(index_list) == len(index_set)
|
||||
|
||||
|
||||
def fetch_file(file_url, file_name, mode):
|
||||
"""Fetch file from base_url and store it in the project root directory.
|
||||
Keyword arguments:
|
||||
file_url -- url of where the file is
|
||||
file_name -- name of the file to be saved as
|
||||
mode -- mode of the file to open it in (for example: wb+)
|
||||
"""
|
||||
file = requests.get(file_url + file_name)
|
||||
|
||||
file_path = os.path.join(project_root_directory, file_name)
|
||||
with open(file_path, mode) as f:
|
||||
f.write(file.content)
|
||||
|
||||
|
||||
def load_index_file():
|
||||
"""Load the file containing all indexes of the bucket and return it as an array of indexes - will download the index file if it's not in local storage"""
|
||||
file_name = "file_index.txt"
|
||||
file_path = os.path.join(project_root_directory, file_name)
|
||||
if not os.path.isfile(file_path):
|
||||
fetch_file("http://www.arewedatayet.com/", file_name, 'wb+')
|
||||
|
||||
with open(file_path, "r") as f:
|
||||
lines = f.readlines()
|
||||
return lines
|
||||
|
||||
|
||||
def load_random_data(number_of_files, to_output_csv = True, seed = None, cache_s3_data = False):
|
||||
"""Load random files from the bucket
|
||||
Keyword arguments:
|
||||
number_of_files -- number of files to load
|
||||
to_output_csv -- boolean to save the result of the data into a csv file called result.csv (default True)
|
||||
seed -- seed for generating random samples (default None)
|
||||
cache_s3_data -- boolean to cache each file once downloaded in a json file to speed up future data loads. (default False)
|
||||
"""
|
||||
|
||||
frames = []
|
||||
file_number = 0
|
||||
|
||||
lines = load_index_file()
|
||||
|
||||
random.seed(seed)
|
||||
#Get x number of random files
|
||||
samples = random.sample(lines, number_of_files)
|
||||
|
||||
#Take the first number_of_files and transform them into dataframes
|
||||
for sample in samples:
|
||||
sample = sample.strip()
|
||||
file_number += 1
|
||||
|
||||
bucket_data_object = s3.ObjectSummary(BUCKET_NAME, sample)
|
||||
data_frame = load_data_to_dataframe(bucket_data_object, file_number, cache_s3_data)
|
||||
frames.append(data_frame)
|
||||
|
||||
#Concat DataFrames generated from each file into a large DataFrame
|
||||
result = pd.concat(frames)
|
||||
|
||||
#Output the results to a csv if desired
|
||||
if (to_output_csv):
|
||||
result.to_csv('result.csv', header=True, index=False, encoding='utf-8')
|
||||
return result
|
||||
|
||||
|
||||
def get_json_data(bucket_data_object, cache_s3_data):
|
||||
"""Get the json data from either the given s3 bucket or the cache.
|
||||
Keyword arguments:
|
||||
bucket_data_object -- a boto3 ObjectSummary class that is connected to the json data we want to load
|
||||
cache_s3_data -- boolean which determines if we cache the downloaded data in a file in the cache directory
|
||||
"""
|
||||
file_name = os.path.join(cache_file_directory, bucket_data_object.key)
|
||||
|
||||
if os.path.isfile(file_name): # get data from file
|
||||
json_data = json.load(open(file_name))
|
||||
|
||||
return json.dumps(json_data)
|
||||
else: # get data from S3
|
||||
bucket_json_data = bucket_data_object.get()
|
||||
|
||||
# data is in a byte format so we must decode it to utf-8.
|
||||
json_data = bucket_json_data['Body'].read().decode("utf-8")
|
||||
json_data = process_string(json_data)
|
||||
|
||||
if cache_s3_data:
|
||||
data_file = open(file_name, "w")
|
||||
|
||||
# pretty print json data to file
|
||||
json_dump = json.dumps(json.loads(json_data), sort_keys=True, indent=2)
|
||||
data_file.write(json_dump)
|
||||
data_file.close()
|
||||
|
||||
return json_data
|
||||
|
||||
def load_data_to_dataframe(bucket_data_object, file_number, cache_s3_data):
|
||||
"""Download the file from the given bucket object and transform it into a DataFrame
|
||||
Keyword arguments:
|
||||
bucket_data_object -- a boto3 ObjectSummary class that is connected to the json data we want to load
|
||||
file_number -- file number to be attached to the DataFrame
|
||||
"""
|
||||
|
||||
data = get_json_data(bucket_data_object, cache_s3_data)
|
||||
|
||||
return transform_into_dataframe(data, file_number)
|
||||
|
||||
def extract_column_json_to_list(series_to_be_processed):
|
||||
"""Parse the selected column, transform the string into json, and throw the values into a list
|
||||
Keyword arguments:
|
||||
series_to_be_processed -- a pandas Series that contains strings in the form of json
|
||||
"""
|
||||
return series_to_be_processed.map(lambda arguments: list(json.loads(arguments).values()) if isinstance(arguments, str) else [])
|
||||
|
Загрузка…
Ссылка в новой задаче