Move the core implementation of profile dumping into a `dump_profile`
function, which can be used both internally when closing or restarting a
crashed browser and from the `execute()` method of `DumpProfileCommand`.
Also, make compression the default in `DumpProfileCommand`. Finally, do
not compress the tar archive of the crashed browser's profile when
restarting from a crash. We should avoid the extra compression/
decompression step as this is a short-lived tar file.
This commit is contained in:
Georgia Kokkinou 2021-03-16 17:10:50 +02:00
Родитель 3f7efc2490
Коммит 1e16513370
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B6458C50C55732ED
2 изменённых файлов: 90 добавлений и 86 удалений

Просмотреть файл

@ -18,7 +18,7 @@ from multiprocess import Queue
from selenium.common.exceptions import WebDriverException
from tblib import pickling_support
from .commands.profile_commands import DumpProfileCommand
from .commands.profile_commands import dump_profile
from .commands.types import BaseCommand, ShutdownSignal
from .config import BrowserParamsInternal, ManagerParamsInternal
from .deploy_browsers import deploy_firefox
@ -105,17 +105,13 @@ class Browser:
if self.current_profile_path is not None:
# tar contents of crashed profile to a temp dir
tempdir = tempfile.mkdtemp(prefix="openwpm_profile_archive_")
tar_path = Path(tempdir) / "profile.tar.gz"
tar_path = Path(tempdir) / "profile.tar"
self.browser_params.profile_path = self.current_profile_path
dump_profile_command = DumpProfileCommand(
tar_path=tar_path, close_webdriver=False, compress=True
)
dump_profile_command.execute(
webdriver=None,
dump_profile(
browser_profile_path=self.current_profile_path,
tar_path=tar_path,
compress=False,
browser_params=self.browser_params,
manager_params=self.manager_params,
extension_socket=None,
)
# make sure browser loads crashed profile
@ -412,17 +408,12 @@ class Browser:
% (self.browser_id, self.browser_params.profile_archive_dir)
)
tar_path = self.browser_params.profile_archive_dir / "profile.tar.gz"
self.browser_params.profile_path = self.current_profile_path
dump_profile_command = DumpProfileCommand(
assert self.current_profile_path is not None
dump_profile(
browser_profile_path=self.current_profile_path,
tar_path=tar_path,
close_webdriver=False,
compress=True,
)
dump_profile_command.execute(
webdriver=None,
browser_params=self.browser_params,
manager_params=self.manager_params,
extension_socket=None,
)
# Clean up temporary files

Просмотреть файл

@ -2,7 +2,6 @@ import logging
import shutil
import tarfile
from pathlib import Path
from typing import Optional
from selenium.webdriver import Firefox
@ -16,13 +15,85 @@ from .utils.firefox_profile import sleep_until_sqlite_checkpoint
logger = logging.getLogger("openwpm")
def dump_profile(
browser_profile_path: Path,
tar_path: Path,
compress: bool,
browser_params: BrowserParamsInternal,
) -> None:
"""Dumps a browser profile to a tar file."""
assert browser_params.browser_id is not None
# Creating the folders if need be
tar_path.parent.mkdir(exist_ok=True, parents=True)
# see if this file exists first
# if it does, delete it before we try to save the current session
if tar_path.exists():
tar_path.unlink()
# backup and tar profile
if compress:
tar = tarfile.open(tar_path, "w:gz", errorlevel=1)
else:
tar = tarfile.open(tar_path, "w", errorlevel=1)
logger.debug(
"BROWSER %i: Backing up full profile from %s to %s"
% (browser_params.browser_id, browser_profile_path, tar_path)
)
storage_vector_files = [
"cookies.sqlite", # cookies
"cookies.sqlite-shm",
"cookies.sqlite-wal",
"places.sqlite", # history
"places.sqlite-shm",
"places.sqlite-wal",
"webappsstore.sqlite", # localStorage
"webappsstore.sqlite-shm",
"webappsstore.sqlite-wal",
]
storage_vector_dirs = [
"webapps", # related to localStorage?
"storage", # directory for IndexedDB
]
for item in storage_vector_files:
full_path = browser_profile_path / item
if (
not full_path.is_file()
and not full_path.name.endswith("shm")
and not full_path.name.endswith("wal")
):
logger.critical(
"BROWSER %i: %s NOT FOUND IN profile folder, skipping."
% (browser_params.browser_id, full_path)
)
elif not full_path.is_file() and (
full_path.name.endswith("shm") or full_path.name.endswith("wal")
):
continue # These are just checkpoint files
tar.add(full_path, arcname=item)
for item in storage_vector_dirs:
full_path = browser_profile_path / item
if not full_path.is_dir():
logger.warning(
"BROWSER %i: %s NOT FOUND IN profile folder, skipping."
% (browser_params.browser_id, full_path)
)
continue
tar.add(full_path, arcname=item)
tar.close()
class DumpProfileCommand(BaseCommand):
"""
Dumps a browser profile currently stored in <browser_params.profile_path> to
<tar_path>.
"""
def __init__(self, tar_path: Path, close_webdriver: bool, compress: bool) -> None:
def __init__(
self, tar_path: Path, close_webdriver: bool, compress: bool = True
) -> None:
self.tar_path = tar_path
self.close_webdriver = close_webdriver
self.compress = compress
@ -37,78 +108,20 @@ class DumpProfileCommand(BaseCommand):
webdriver: Firefox,
browser_params: BrowserParamsInternal,
manager_params: ManagerParamsInternal,
extension_socket: Optional[ClientSocket],
extension_socket: ClientSocket,
) -> None:
browser_profile_path = browser_params.profile_path
assert browser_profile_path is not None
assert browser_params.browser_id is not None
# Creating the folders if need be
self.tar_path.parent.mkdir(exist_ok=True, parents=True)
# see if this file exists first
# if it does, delete it before we try to save the current session
if self.tar_path.exists():
self.tar_path.unlink() # IDK why it's called like this
# if this is a dump on close, close the webdriver and wait for checkpoint
if self.close_webdriver:
webdriver.close()
sleep_until_sqlite_checkpoint(browser_profile_path)
sleep_until_sqlite_checkpoint(browser_params.profile_path)
# backup and tar profile
if self.compress:
tar = tarfile.open(self.tar_path, "w:gz", errorlevel=1)
else:
tar = tarfile.open(self.tar_path, "w", errorlevel=1)
logger.debug(
"BROWSER %i: Backing up full profile from %s to %s"
% (
browser_params.browser_id,
browser_profile_path,
self.tar_path,
)
assert browser_params.profile_path is not None
dump_profile(
browser_params.profile_path,
self.tar_path,
self.compress,
browser_params,
)
storage_vector_files = [
"cookies.sqlite", # cookies
"cookies.sqlite-shm",
"cookies.sqlite-wal",
"places.sqlite", # history
"places.sqlite-shm",
"places.sqlite-wal",
"webappsstore.sqlite", # localStorage
"webappsstore.sqlite-shm",
"webappsstore.sqlite-wal",
]
storage_vector_dirs = [
"webapps", # related to localStorage?
"storage", # directory for IndexedDB
]
for item in storage_vector_files:
full_path = browser_profile_path / item
if (
not full_path.is_file()
and not full_path.name.endswith("shm")
and not full_path.name.endswith("wal")
):
logger.critical(
"BROWSER %i: %s NOT FOUND IN profile folder, skipping."
% (browser_params.browser_id, full_path)
)
elif not full_path.is_file() and (
full_path.name.endswith("shm") or full_path.name.endswith("wal")
):
continue # These are just checkpoint files
tar.add(full_path, arcname=item)
for item in storage_vector_dirs:
full_path = browser_profile_path / item
if not full_path.is_dir():
logger.warning(
"BROWSER %i: %s NOT FOUND IN profile folder, skipping."
% (browser_params.browser_id, full_path)
)
continue
tar.add(full_path, arcname=item)
tar.close()
def load_profile(