Including the browser ID in every call to the logger

This commit is contained in:
englehardt 2015-09-21 05:51:31 -07:00
Родитель b8e4668a06
Коммит 9bfc144006
5 изменённых файлов: 29 добавлений и 26 удалений

Просмотреть файл

@ -69,6 +69,7 @@ class Browser:
tempdir = tempfile.mkdtemp() + "/"
profile_commands.dump_profile(crashed_profile_path,
self.manager_params,
self.browser_params,
tempdir,
close_webdriver=False,
browser_settings=self.browser_settings,

Просмотреть файл

@ -23,7 +23,8 @@ def execute_command(command, webdriver, proxy_queue, browser_settings, browser_p
if command[0] == 'DUMP_PROF':
profile_commands.dump_profile(browser_params['profile_path'], manager_params,
command[1], command[2], webdriver, browser_settings,
browser_params command[1], command[2], webdriver,
browser_settings,
save_flash=browser_params['disable_flash'] is False)
if command[0] == 'EXTRACT_LINKS':

Просмотреть файл

@ -36,7 +36,7 @@ def load_browser_settings(location):
browser_settings = None
return browser_settings
def save_flash_files(logger, dump_location, clear=False):
def save_flash_files(logger, browser_params, dump_location, clear=False):
"""
save all files from the default flash storage locations
clear: sets whether to clear storage locations after backup
@ -47,10 +47,10 @@ def save_flash_files(logger, dump_location, clear=False):
#Copy all flash objects over to dump location
for location in FLASH_LOCS:
if not os.path.isdir(location):
logger.warning("%s not found when attempting to save flash files, skipping..." % location)
logger.warning("BROWSER %i: %s not found when attempting to save flash files, skipping..." % (browser_params['crawl_id'], location))
continue
logger.debug("SAVING %s during flash file archive" % location)
logger.debug("BROWSER %i: SAVING %s during flash file archive" % (browser_params['crawl_id'], location))
(head, tail) = os.path.split(location)
#Remove old backups if exist
@ -61,18 +61,18 @@ def save_flash_files(logger, dump_location, clear=False):
shutil.copytree(location, os.path.join(dump_location,tail))
if clear:
logger.debug("CLEARING %s during flash file archive" % location)
logger.debug("BROWSER %i: CLEARING %s during flash file archive" % (browser_params['crawl_id'], location))
rmsubtree(location)
def load_flash_files(logger, tar_location):
def load_flash_files(logger, browser_params, tar_location):
""" clear old flash cookies and load ones from dump """
#Clear previous objects prior to loading
for location in FLASH_LOCS:
if not os.path.isdir(location):
logger.warning("%s not found when attempting to load flash files, skipping..." % location)
logger.warning("BROWSER %i: %s not found when attempting to load flash files, skipping..." % (browser_params['crawl_id'], location))
continue
logger.debug("CLEARING %s before loading flash files" % location)
logger.debug("BROWSER %i: CLEARING %s before loading flash files" % (browser_params['crawl_id'], location))
shutil.rmtree(location)
#Copy flash storage objects from tar_location
@ -80,11 +80,12 @@ def load_flash_files(logger, tar_location):
if os.path.exists(os.path.join(tar_location,tail)):
shutil.copytree(os.path.join(tar_location,tail),location)
else:
logger.warning("%s not found while loading flash files, skipping..." % os.path.join(tar_location, tail))
logger.warning("BROWSER %i: %s not found while loading flash files, skipping..." % (browser_params['crawl_id'], os.path.join(tar_location, tail)))
continue
def dump_profile(browser_profile_folder, manager_params, tar_location, close_webdriver,
webdriver=None, browser_settings=None, save_flash=False, full_profile=True):
def dump_profile(browser_profile_folder, manager_params, browser_params, tar_location,
close_webdriver, webdriver=None, browser_settings=None, save_flash=False,
full_profile=True):
"""
dumps a browser profile currently stored in <browser_profile_folder> to
<tar_location> in which both folders are absolute paths.
@ -121,7 +122,7 @@ def dump_profile(browser_profile_folder, manager_params, tar_location, close_web
# backup and tar profile
tar = tarfile.open(tar_location + tar_name, 'w:gz')
if full_profile: # backup all storage vectors
logger.debug("Backing up full profile from %s to %s" % (browser_profile_folder, tar_location + tar_name))
logger.debug("BROWSER %i: Backing up full profile from %s to %s" % (browser_params['crawl_id'], browser_profile_folder, tar_location + tar_name))
storage_vector_files = [
'cookies.sqlite', 'cookies.sqlite-shm', 'cookies.sqlite-wal', # cookies
'places.sqlite', 'places.sqlite-shm', 'places.sqlite-wal', # history
@ -134,36 +135,36 @@ def dump_profile(browser_profile_folder, manager_params, tar_location, close_web
for item in storage_vector_files:
full_path = os.path.join(browser_profile_folder, item)
if not os.path.isfile(full_path) and full_path[-3:] != 'shm' and full_path[-3:] != 'wal':
logger.critical("%s NOT FOUND IN profile folder, skipping." % full_path)
logger.critical("BROWSER %i: %s NOT FOUND IN profile folder, skipping." % (browser_params['crawl_id'], full_path))
elif not os.path.isfile(full_path) and (full_path[-3:] == 'shm' or full_path[-3:] == 'wal'):
continue # These are just checkpoint files
tar.add(full_path, arcname=item)
for item in storage_vector_dirs:
full_path = os.path.join(browser_profile_folder, item)
if not os.path.isdir(full_path):
logger.warning("%s NOT FOUND IN profile folder, skipping." % full_path)
logger.warning("BROWSER %i: %s NOT FOUND IN profile folder, skipping." % (browser_params['crawl_id'], full_path))
continue
tar.add(full_path, arcname=item)
else: # only backup cookies and history
logger.debug("Backing up limited profile from %s to %s" % (browser_profile_folder, tar_location + tar_name))
logger.debug("BROWSER %i: Backing up limited profile from %s to %s" % (browser_params['crawl_id'], browser_profile_folder, tar_location + tar_name))
for db in ["cookies.sqlite", "cookies.sqlite-shm", "cookies.sqlite-wal",
"places.sqlite", "places.sqlite-shm", "places.sqlite-wal"]:
if os.path.isfile(browser_profile_folder + db):
logger.debug("Adding %s from profile folder to archive." % full_path)
logger.debug("BROWSER %i: Adding %s from profile folder to archive." % (browser_params['crawl_id'], full_path))
tar.add(browser_profile_folder + db, arcname=db)
tar.close()
# save flash cookies
if save_flash:
save_flash_files(logger, tar_location)
save_flash_files(logger, browser_params, tar_location)
# save the browser settings
if browser_settings is not None:
save_browser_settings(tar_location, browser_settings)
def load_profile(browser_profile_folder, manager_params, tar_location, load_flash=False):
def load_profile(browser_profile_folder, manager_params, browser_params, tar_location, load_flash=False):
"""
loads a zipped cookie-based profile stored in <tar_location> and
unzips it to <browser_profile_folder>. This will load whatever profile
@ -184,7 +185,7 @@ def load_profile(browser_profile_folder, manager_params, tar_location, load_flas
tar_name = 'profile.tar.gz'
# Copy and untar the loaded profile
logger.debug("Copying profile tar from %s to %s" % (tar_location+tar_name, browser_profile_folder))
logger.debug("BROWSER %i: Copying profile tar from %s to %s" % (browser_params['crawl_id'], tar_location+tar_name, browser_profile_folder))
subprocess.call(["cp", tar_location + tar_name, browser_profile_folder])
opener, mode = tarfile.open, 'r:gz'
f = opener(browser_profile_folder + tar_name, mode)
@ -194,13 +195,13 @@ def load_profile(browser_profile_folder, manager_params, tar_location, load_flas
# clear and load flash cookies
if load_flash:
load_flash_files(logger, tar_location)
load_flash_files(logger, browser_params, tar_location)
# load the browser settings
browser_settings = load_browser_settings(tar_location)
except Exception as ex:
logger.error("Error: %s while attempting to load profile" % str(ex))
logger.error("BROWSER %i: Error: %s while attempting to load profile" % (browser_params['crawl_id'],str(ex)))
browser_settings = None
return browser_settings

Просмотреть файл

@ -32,12 +32,12 @@ def deploy_firefox(status_queue, browser_params, manager_params, crash_recovery)
profile_settings = None # Imported browser settings
if browser_params['profile_tar'] and not crash_recovery:
logger.debug("BROWSER %i: Loading initial browser profile from: %s" % (browser_params['crawl_id'], browser_params['profile_tar']))
profile_settings = load_profile(browser_profile_path, manager_params,
profile_settings = load_profile(browser_profile_path, manager_params, browser_params,
browser_params['profile_tar'],
load_flash=browser_params['disable_flash'] is False)
elif browser_params['profile_tar']:
logger.debug("BROWSER %i: Loading recovered browser profile from: %s" % (browser_params['crawl_id'], browser_params['profile_tar']))
profile_settings = load_profile(browser_profile_path, manager_params,
profile_settings = load_profile(browser_profile_path, manager_params, browser_params,
browser_params['profile_tar'])
if browser_params['random_attributes'] and profile_settings is None:

Просмотреть файл

@ -85,18 +85,18 @@ def save_javascript_content(logger, browser_params, manager_params, msg):
try:
script = zlib.decompress(msg.response.content, zlib.MAX_WBITS|16)
except zlib.error as e:
logger.error('Received zlib error when trying to decompress gzipped javascript: %s' % str(e))
logger.error('BROWSER %i: Received zlib error when trying to decompress gzipped javascript: %s' % (browser_params['crawl_id'],str(e)))
return
elif 'deflate' in msg.response.headers['Content-Encoding']:
try:
script = zlib.decompress(msg.response.content, -zlib.MAX_WBITS)
except zlib.error as e:
logger.error('Received zlib error when trying to decompress deflated javascript: %s' % str(e))
logger.error('BROWSER %i: Received zlib error when trying to decompress deflated javascript: %s' % (browser_params['crawl_id'],str(e)))
return
elif msg.response.headers['Content-Encoding'] == []:
script = msg.response.content
else:
logger.error('Received Content-Encoding %s. Not supported by Firefox, skipping archive.' % str(msg.response.headers['Content-Encoding']))
logger.error('BROWSER %i: Received Content-Encoding %s. Not supported by Firefox, skipping archive.' % (browser_params['crawl_id'], str(msg.response.headers['Content-Encoding'])))
return
path = os.path.join(manager_params['data_directory'],'javascript_files/')