зеркало из https://github.com/openwpm/OpenWPM.git
Improve profile dumping logic
Move the core implementation of profile dumping into a `dump_profile` function, which can be used both internally when closing or restarting a crashed browser and from the `execute()` method of `DumpProfileCommand`. Also, make compression the default in `DumpProfileCommand`. Finally, do not compress the tar archive of the crashed browser's profile when restarting from a crash. We should avoid the extra compression/ decompression step as this is a short-lived tar file.
This commit is contained in:
Родитель
3f7efc2490
Коммит
1e16513370
|
@ -18,7 +18,7 @@ from multiprocess import Queue
|
|||
from selenium.common.exceptions import WebDriverException
|
||||
from tblib import pickling_support
|
||||
|
||||
from .commands.profile_commands import DumpProfileCommand
|
||||
from .commands.profile_commands import dump_profile
|
||||
from .commands.types import BaseCommand, ShutdownSignal
|
||||
from .config import BrowserParamsInternal, ManagerParamsInternal
|
||||
from .deploy_browsers import deploy_firefox
|
||||
|
@ -105,17 +105,13 @@ class Browser:
|
|||
if self.current_profile_path is not None:
|
||||
# tar contents of crashed profile to a temp dir
|
||||
tempdir = tempfile.mkdtemp(prefix="openwpm_profile_archive_")
|
||||
tar_path = Path(tempdir) / "profile.tar.gz"
|
||||
tar_path = Path(tempdir) / "profile.tar"
|
||||
|
||||
self.browser_params.profile_path = self.current_profile_path
|
||||
dump_profile_command = DumpProfileCommand(
|
||||
tar_path=tar_path, close_webdriver=False, compress=True
|
||||
)
|
||||
dump_profile_command.execute(
|
||||
webdriver=None,
|
||||
dump_profile(
|
||||
browser_profile_path=self.current_profile_path,
|
||||
tar_path=tar_path,
|
||||
compress=False,
|
||||
browser_params=self.browser_params,
|
||||
manager_params=self.manager_params,
|
||||
extension_socket=None,
|
||||
)
|
||||
|
||||
# make sure browser loads crashed profile
|
||||
|
@ -412,17 +408,12 @@ class Browser:
|
|||
% (self.browser_id, self.browser_params.profile_archive_dir)
|
||||
)
|
||||
tar_path = self.browser_params.profile_archive_dir / "profile.tar.gz"
|
||||
self.browser_params.profile_path = self.current_profile_path
|
||||
dump_profile_command = DumpProfileCommand(
|
||||
assert self.current_profile_path is not None
|
||||
dump_profile(
|
||||
browser_profile_path=self.current_profile_path,
|
||||
tar_path=tar_path,
|
||||
close_webdriver=False,
|
||||
compress=True,
|
||||
)
|
||||
dump_profile_command.execute(
|
||||
webdriver=None,
|
||||
browser_params=self.browser_params,
|
||||
manager_params=self.manager_params,
|
||||
extension_socket=None,
|
||||
)
|
||||
|
||||
# Clean up temporary files
|
||||
|
|
|
@ -2,7 +2,6 @@ import logging
|
|||
import shutil
|
||||
import tarfile
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from selenium.webdriver import Firefox
|
||||
|
||||
|
@ -16,13 +15,85 @@ from .utils.firefox_profile import sleep_until_sqlite_checkpoint
|
|||
logger = logging.getLogger("openwpm")
|
||||
|
||||
|
||||
def dump_profile(
|
||||
browser_profile_path: Path,
|
||||
tar_path: Path,
|
||||
compress: bool,
|
||||
browser_params: BrowserParamsInternal,
|
||||
) -> None:
|
||||
"""Dumps a browser profile to a tar file."""
|
||||
assert browser_params.browser_id is not None
|
||||
|
||||
# Creating the folders if need be
|
||||
tar_path.parent.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
# see if this file exists first
|
||||
# if it does, delete it before we try to save the current session
|
||||
if tar_path.exists():
|
||||
tar_path.unlink()
|
||||
|
||||
# backup and tar profile
|
||||
if compress:
|
||||
tar = tarfile.open(tar_path, "w:gz", errorlevel=1)
|
||||
else:
|
||||
tar = tarfile.open(tar_path, "w", errorlevel=1)
|
||||
logger.debug(
|
||||
"BROWSER %i: Backing up full profile from %s to %s"
|
||||
% (browser_params.browser_id, browser_profile_path, tar_path)
|
||||
)
|
||||
|
||||
storage_vector_files = [
|
||||
"cookies.sqlite", # cookies
|
||||
"cookies.sqlite-shm",
|
||||
"cookies.sqlite-wal",
|
||||
"places.sqlite", # history
|
||||
"places.sqlite-shm",
|
||||
"places.sqlite-wal",
|
||||
"webappsstore.sqlite", # localStorage
|
||||
"webappsstore.sqlite-shm",
|
||||
"webappsstore.sqlite-wal",
|
||||
]
|
||||
storage_vector_dirs = [
|
||||
"webapps", # related to localStorage?
|
||||
"storage", # directory for IndexedDB
|
||||
]
|
||||
for item in storage_vector_files:
|
||||
full_path = browser_profile_path / item
|
||||
if (
|
||||
not full_path.is_file()
|
||||
and not full_path.name.endswith("shm")
|
||||
and not full_path.name.endswith("wal")
|
||||
):
|
||||
logger.critical(
|
||||
"BROWSER %i: %s NOT FOUND IN profile folder, skipping."
|
||||
% (browser_params.browser_id, full_path)
|
||||
)
|
||||
elif not full_path.is_file() and (
|
||||
full_path.name.endswith("shm") or full_path.name.endswith("wal")
|
||||
):
|
||||
continue # These are just checkpoint files
|
||||
tar.add(full_path, arcname=item)
|
||||
for item in storage_vector_dirs:
|
||||
full_path = browser_profile_path / item
|
||||
if not full_path.is_dir():
|
||||
logger.warning(
|
||||
"BROWSER %i: %s NOT FOUND IN profile folder, skipping."
|
||||
% (browser_params.browser_id, full_path)
|
||||
)
|
||||
continue
|
||||
tar.add(full_path, arcname=item)
|
||||
tar.close()
|
||||
|
||||
|
||||
class DumpProfileCommand(BaseCommand):
|
||||
"""
|
||||
Dumps a browser profile currently stored in <browser_params.profile_path> to
|
||||
<tar_path>.
|
||||
"""
|
||||
|
||||
def __init__(self, tar_path: Path, close_webdriver: bool, compress: bool) -> None:
|
||||
def __init__(
|
||||
self, tar_path: Path, close_webdriver: bool, compress: bool = True
|
||||
) -> None:
|
||||
self.tar_path = tar_path
|
||||
self.close_webdriver = close_webdriver
|
||||
self.compress = compress
|
||||
|
@ -37,78 +108,20 @@ class DumpProfileCommand(BaseCommand):
|
|||
webdriver: Firefox,
|
||||
browser_params: BrowserParamsInternal,
|
||||
manager_params: ManagerParamsInternal,
|
||||
extension_socket: Optional[ClientSocket],
|
||||
extension_socket: ClientSocket,
|
||||
) -> None:
|
||||
browser_profile_path = browser_params.profile_path
|
||||
assert browser_profile_path is not None
|
||||
assert browser_params.browser_id is not None
|
||||
|
||||
# Creating the folders if need be
|
||||
self.tar_path.parent.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
# see if this file exists first
|
||||
# if it does, delete it before we try to save the current session
|
||||
if self.tar_path.exists():
|
||||
self.tar_path.unlink() # IDK why it's called like this
|
||||
# if this is a dump on close, close the webdriver and wait for checkpoint
|
||||
if self.close_webdriver:
|
||||
webdriver.close()
|
||||
sleep_until_sqlite_checkpoint(browser_profile_path)
|
||||
sleep_until_sqlite_checkpoint(browser_params.profile_path)
|
||||
|
||||
# backup and tar profile
|
||||
if self.compress:
|
||||
tar = tarfile.open(self.tar_path, "w:gz", errorlevel=1)
|
||||
else:
|
||||
tar = tarfile.open(self.tar_path, "w", errorlevel=1)
|
||||
logger.debug(
|
||||
"BROWSER %i: Backing up full profile from %s to %s"
|
||||
% (
|
||||
browser_params.browser_id,
|
||||
browser_profile_path,
|
||||
self.tar_path,
|
||||
)
|
||||
assert browser_params.profile_path is not None
|
||||
dump_profile(
|
||||
browser_params.profile_path,
|
||||
self.tar_path,
|
||||
self.compress,
|
||||
browser_params,
|
||||
)
|
||||
storage_vector_files = [
|
||||
"cookies.sqlite", # cookies
|
||||
"cookies.sqlite-shm",
|
||||
"cookies.sqlite-wal",
|
||||
"places.sqlite", # history
|
||||
"places.sqlite-shm",
|
||||
"places.sqlite-wal",
|
||||
"webappsstore.sqlite", # localStorage
|
||||
"webappsstore.sqlite-shm",
|
||||
"webappsstore.sqlite-wal",
|
||||
]
|
||||
storage_vector_dirs = [
|
||||
"webapps", # related to localStorage?
|
||||
"storage", # directory for IndexedDB
|
||||
]
|
||||
for item in storage_vector_files:
|
||||
full_path = browser_profile_path / item
|
||||
if (
|
||||
not full_path.is_file()
|
||||
and not full_path.name.endswith("shm")
|
||||
and not full_path.name.endswith("wal")
|
||||
):
|
||||
logger.critical(
|
||||
"BROWSER %i: %s NOT FOUND IN profile folder, skipping."
|
||||
% (browser_params.browser_id, full_path)
|
||||
)
|
||||
elif not full_path.is_file() and (
|
||||
full_path.name.endswith("shm") or full_path.name.endswith("wal")
|
||||
):
|
||||
continue # These are just checkpoint files
|
||||
tar.add(full_path, arcname=item)
|
||||
for item in storage_vector_dirs:
|
||||
full_path = browser_profile_path / item
|
||||
if not full_path.is_dir():
|
||||
logger.warning(
|
||||
"BROWSER %i: %s NOT FOUND IN profile folder, skipping."
|
||||
% (browser_params.browser_id, full_path)
|
||||
)
|
||||
continue
|
||||
tar.add(full_path, arcname=item)
|
||||
tar.close()
|
||||
|
||||
|
||||
def load_profile(
|
||||
|
|
Загрузка…
Ссылка в новой задаче