зеркало из https://github.com/openwpm/OpenWPM.git
Including the browser ID in every call to the logger
This commit is contained in:
Родитель
b8e4668a06
Коммит
9bfc144006
|
@ -69,6 +69,7 @@ class Browser:
|
|||
tempdir = tempfile.mkdtemp() + "/"
|
||||
profile_commands.dump_profile(crashed_profile_path,
|
||||
self.manager_params,
|
||||
self.browser_params,
|
||||
tempdir,
|
||||
close_webdriver=False,
|
||||
browser_settings=self.browser_settings,
|
||||
|
|
|
@ -23,7 +23,8 @@ def execute_command(command, webdriver, proxy_queue, browser_settings, browser_p
|
|||
|
||||
if command[0] == 'DUMP_PROF':
|
||||
profile_commands.dump_profile(browser_params['profile_path'], manager_params,
|
||||
command[1], command[2], webdriver, browser_settings,
|
||||
browser_params command[1], command[2], webdriver,
|
||||
browser_settings,
|
||||
save_flash=browser_params['disable_flash'] is False)
|
||||
|
||||
if command[0] == 'EXTRACT_LINKS':
|
||||
|
|
|
@ -36,7 +36,7 @@ def load_browser_settings(location):
|
|||
browser_settings = None
|
||||
return browser_settings
|
||||
|
||||
def save_flash_files(logger, dump_location, clear=False):
|
||||
def save_flash_files(logger, browser_params, dump_location, clear=False):
|
||||
"""
|
||||
save all files from the default flash storage locations
|
||||
clear: sets whether to clear storage locations after backup
|
||||
|
@ -47,10 +47,10 @@ def save_flash_files(logger, dump_location, clear=False):
|
|||
#Copy all flash objects over to dump location
|
||||
for location in FLASH_LOCS:
|
||||
if not os.path.isdir(location):
|
||||
logger.warning("%s not found when attempting to save flash files, skipping..." % location)
|
||||
logger.warning("BROWSER %i: %s not found when attempting to save flash files, skipping..." % (browser_params['crawl_id'], location))
|
||||
continue
|
||||
|
||||
logger.debug("SAVING %s during flash file archive" % location)
|
||||
logger.debug("BROWSER %i: SAVING %s during flash file archive" % (browser_params['crawl_id'], location))
|
||||
(head, tail) = os.path.split(location)
|
||||
|
||||
#Remove old backups if exist
|
||||
|
@ -61,18 +61,18 @@ def save_flash_files(logger, dump_location, clear=False):
|
|||
shutil.copytree(location, os.path.join(dump_location,tail))
|
||||
|
||||
if clear:
|
||||
logger.debug("CLEARING %s during flash file archive" % location)
|
||||
logger.debug("BROWSER %i: CLEARING %s during flash file archive" % (browser_params['crawl_id'], location))
|
||||
rmsubtree(location)
|
||||
|
||||
def load_flash_files(logger, tar_location):
|
||||
def load_flash_files(logger, browser_params, tar_location):
|
||||
""" clear old flash cookies and load ones from dump """
|
||||
#Clear previous objects prior to loading
|
||||
for location in FLASH_LOCS:
|
||||
if not os.path.isdir(location):
|
||||
logger.warning("%s not found when attempting to load flash files, skipping..." % location)
|
||||
logger.warning("BROWSER %i: %s not found when attempting to load flash files, skipping..." % (browser_params['crawl_id'], location))
|
||||
continue
|
||||
|
||||
logger.debug("CLEARING %s before loading flash files" % location)
|
||||
logger.debug("BROWSER %i: CLEARING %s before loading flash files" % (browser_params['crawl_id'], location))
|
||||
shutil.rmtree(location)
|
||||
|
||||
#Copy flash storage objects from tar_location
|
||||
|
@ -80,11 +80,12 @@ def load_flash_files(logger, tar_location):
|
|||
if os.path.exists(os.path.join(tar_location,tail)):
|
||||
shutil.copytree(os.path.join(tar_location,tail),location)
|
||||
else:
|
||||
logger.warning("%s not found while loading flash files, skipping..." % os.path.join(tar_location, tail))
|
||||
logger.warning("BROWSER %i: %s not found while loading flash files, skipping..." % (browser_params['crawl_id'], os.path.join(tar_location, tail)))
|
||||
continue
|
||||
|
||||
def dump_profile(browser_profile_folder, manager_params, tar_location, close_webdriver,
|
||||
webdriver=None, browser_settings=None, save_flash=False, full_profile=True):
|
||||
def dump_profile(browser_profile_folder, manager_params, browser_params, tar_location,
|
||||
close_webdriver, webdriver=None, browser_settings=None, save_flash=False,
|
||||
full_profile=True):
|
||||
"""
|
||||
dumps a browser profile currently stored in <browser_profile_folder> to
|
||||
<tar_location> in which both folders are absolute paths.
|
||||
|
@ -121,7 +122,7 @@ def dump_profile(browser_profile_folder, manager_params, tar_location, close_web
|
|||
# backup and tar profile
|
||||
tar = tarfile.open(tar_location + tar_name, 'w:gz')
|
||||
if full_profile: # backup all storage vectors
|
||||
logger.debug("Backing up full profile from %s to %s" % (browser_profile_folder, tar_location + tar_name))
|
||||
logger.debug("BROWSER %i: Backing up full profile from %s to %s" % (browser_params['crawl_id'], browser_profile_folder, tar_location + tar_name))
|
||||
storage_vector_files = [
|
||||
'cookies.sqlite', 'cookies.sqlite-shm', 'cookies.sqlite-wal', # cookies
|
||||
'places.sqlite', 'places.sqlite-shm', 'places.sqlite-wal', # history
|
||||
|
@ -134,36 +135,36 @@ def dump_profile(browser_profile_folder, manager_params, tar_location, close_web
|
|||
for item in storage_vector_files:
|
||||
full_path = os.path.join(browser_profile_folder, item)
|
||||
if not os.path.isfile(full_path) and full_path[-3:] != 'shm' and full_path[-3:] != 'wal':
|
||||
logger.critical("%s NOT FOUND IN profile folder, skipping." % full_path)
|
||||
logger.critical("BROWSER %i: %s NOT FOUND IN profile folder, skipping." % (browser_params['crawl_id'], full_path))
|
||||
elif not os.path.isfile(full_path) and (full_path[-3:] == 'shm' or full_path[-3:] == 'wal'):
|
||||
continue # These are just checkpoint files
|
||||
tar.add(full_path, arcname=item)
|
||||
for item in storage_vector_dirs:
|
||||
full_path = os.path.join(browser_profile_folder, item)
|
||||
if not os.path.isdir(full_path):
|
||||
logger.warning("%s NOT FOUND IN profile folder, skipping." % full_path)
|
||||
logger.warning("BROWSER %i: %s NOT FOUND IN profile folder, skipping." % (browser_params['crawl_id'], full_path))
|
||||
continue
|
||||
tar.add(full_path, arcname=item)
|
||||
|
||||
else: # only backup cookies and history
|
||||
logger.debug("Backing up limited profile from %s to %s" % (browser_profile_folder, tar_location + tar_name))
|
||||
logger.debug("BROWSER %i: Backing up limited profile from %s to %s" % (browser_params['crawl_id'], browser_profile_folder, tar_location + tar_name))
|
||||
for db in ["cookies.sqlite", "cookies.sqlite-shm", "cookies.sqlite-wal",
|
||||
"places.sqlite", "places.sqlite-shm", "places.sqlite-wal"]:
|
||||
if os.path.isfile(browser_profile_folder + db):
|
||||
logger.debug("Adding %s from profile folder to archive." % full_path)
|
||||
logger.debug("BROWSER %i: Adding %s from profile folder to archive." % (browser_params['crawl_id'], full_path))
|
||||
tar.add(browser_profile_folder + db, arcname=db)
|
||||
tar.close()
|
||||
|
||||
# save flash cookies
|
||||
if save_flash:
|
||||
save_flash_files(logger, tar_location)
|
||||
save_flash_files(logger, browser_params, tar_location)
|
||||
|
||||
# save the browser settings
|
||||
if browser_settings is not None:
|
||||
save_browser_settings(tar_location, browser_settings)
|
||||
|
||||
|
||||
def load_profile(browser_profile_folder, manager_params, tar_location, load_flash=False):
|
||||
def load_profile(browser_profile_folder, manager_params, browser_params, tar_location, load_flash=False):
|
||||
"""
|
||||
loads a zipped cookie-based profile stored in <tar_location> and
|
||||
unzips it to <browser_profile_folder>. This will load whatever profile
|
||||
|
@ -184,7 +185,7 @@ def load_profile(browser_profile_folder, manager_params, tar_location, load_flas
|
|||
tar_name = 'profile.tar.gz'
|
||||
|
||||
# Copy and untar the loaded profile
|
||||
logger.debug("Copying profile tar from %s to %s" % (tar_location+tar_name, browser_profile_folder))
|
||||
logger.debug("BROWSER %i: Copying profile tar from %s to %s" % (browser_params['crawl_id'], tar_location+tar_name, browser_profile_folder))
|
||||
subprocess.call(["cp", tar_location + tar_name, browser_profile_folder])
|
||||
opener, mode = tarfile.open, 'r:gz'
|
||||
f = opener(browser_profile_folder + tar_name, mode)
|
||||
|
@ -194,13 +195,13 @@ def load_profile(browser_profile_folder, manager_params, tar_location, load_flas
|
|||
|
||||
# clear and load flash cookies
|
||||
if load_flash:
|
||||
load_flash_files(logger, tar_location)
|
||||
load_flash_files(logger, browser_params, tar_location)
|
||||
|
||||
# load the browser settings
|
||||
browser_settings = load_browser_settings(tar_location)
|
||||
|
||||
except Exception as ex:
|
||||
logger.error("Error: %s while attempting to load profile" % str(ex))
|
||||
logger.error("BROWSER %i: Error: %s while attempting to load profile" % (browser_params['crawl_id'],str(ex)))
|
||||
browser_settings = None
|
||||
|
||||
return browser_settings
|
||||
|
|
|
@ -32,12 +32,12 @@ def deploy_firefox(status_queue, browser_params, manager_params, crash_recovery)
|
|||
profile_settings = None # Imported browser settings
|
||||
if browser_params['profile_tar'] and not crash_recovery:
|
||||
logger.debug("BROWSER %i: Loading initial browser profile from: %s" % (browser_params['crawl_id'], browser_params['profile_tar']))
|
||||
profile_settings = load_profile(browser_profile_path, manager_params,
|
||||
profile_settings = load_profile(browser_profile_path, manager_params, browser_params,
|
||||
browser_params['profile_tar'],
|
||||
load_flash=browser_params['disable_flash'] is False)
|
||||
elif browser_params['profile_tar']:
|
||||
logger.debug("BROWSER %i: Loading recovered browser profile from: %s" % (browser_params['crawl_id'], browser_params['profile_tar']))
|
||||
profile_settings = load_profile(browser_profile_path, manager_params,
|
||||
profile_settings = load_profile(browser_profile_path, manager_params, browser_params,
|
||||
browser_params['profile_tar'])
|
||||
|
||||
if browser_params['random_attributes'] and profile_settings is None:
|
||||
|
|
|
@ -85,18 +85,18 @@ def save_javascript_content(logger, browser_params, manager_params, msg):
|
|||
try:
|
||||
script = zlib.decompress(msg.response.content, zlib.MAX_WBITS|16)
|
||||
except zlib.error as e:
|
||||
logger.error('Received zlib error when trying to decompress gzipped javascript: %s' % str(e))
|
||||
logger.error('BROWSER %i: Received zlib error when trying to decompress gzipped javascript: %s' % (browser_params['crawl_id'],str(e)))
|
||||
return
|
||||
elif 'deflate' in msg.response.headers['Content-Encoding']:
|
||||
try:
|
||||
script = zlib.decompress(msg.response.content, -zlib.MAX_WBITS)
|
||||
except zlib.error as e:
|
||||
logger.error('Received zlib error when trying to decompress deflated javascript: %s' % str(e))
|
||||
logger.error('BROWSER %i: Received zlib error when trying to decompress deflated javascript: %s' % (browser_params['crawl_id'],str(e)))
|
||||
return
|
||||
elif msg.response.headers['Content-Encoding'] == []:
|
||||
script = msg.response.content
|
||||
else:
|
||||
logger.error('Received Content-Encoding %s. Not supported by Firefox, skipping archive.' % str(msg.response.headers['Content-Encoding']))
|
||||
logger.error('BROWSER %i: Received Content-Encoding %s. Not supported by Firefox, skipping archive.' % (browser_params['crawl_id'], str(msg.response.headers['Content-Encoding'])))
|
||||
return
|
||||
path = os.path.join(manager_params['data_directory'],'javascript_files/')
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче