Add command to migrate files to new directory structure (#19239)

* Add command to migrate files to new directory structure

* Remove unused import
This commit is contained in:
Mathieu Pillard 2022-05-17 17:45:13 +02:00 коммит произвёл GitHub
Родитель 1c50e675c4
Коммит 2f25d00211
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
2 изменённых файлов: 272 добавлений и 1 удалений

Просмотреть файл

@ -0,0 +1,94 @@
import os
from datetime import datetime, timedelta
from django.conf import settings
from django.core.management.base import BaseCommand
from olympia.files.models import File
from olympia.files.utils import id_to_path
class Command(BaseCommand):
def handle(self, *args, **options):
self.migrate()
def print_eta(self, *, elapsed, processed_count, remaining_count):
# total_seconds() keeps microseconds - it should often be 0.xxxxxx in
# our case. We use the full precision for computing the ETA, but round
# to the second when displaying.
total_seconds = elapsed.total_seconds()
eta = (
timedelta(seconds=int(total_seconds / processed_count * remaining_count))
if processed_count
else 'Unknown'
)
self.stdout.write(f'ETA {eta} ; Remaining entries {remaining_count}\n')
def migrate(self):
# Number of entries to migrate is number of links to the directory
# minus `.`, the parent dir and `temp/` which we're not touching.
entries_total = os.stat(settings.ADDONS_PATH).st_nlink - 3
processed_count = 0
migrated_count = 0
entries = os.scandir(settings.ADDONS_PATH)
start_time = datetime.now()
for entry in entries:
if not entry.name.isdigit():
entries_total -= 1
self.stderr.write(f'Ignoring non-addon entry {entry.name}')
continue
result = self.migrate_directory_contents(entry.name)
migrated_count += result
# Since we use the add-ons and not the files to compute the ETA
# it's never going to be 100% accurate, but it should be good
# enough.
processed_count += 1
if processed_count == 1 or processed_count % 1000 == 0:
elapsed = datetime.now() - start_time
self.print_eta(
elapsed=elapsed,
processed_count=processed_count,
remaining_count=entries_total - processed_count,
)
self.stdout.write(
f'Processed {processed_count} entries (migrated {migrated_count}) '
f'in {elapsed.total_seconds()} seconds.'
)
def migrate_directory_contents(self, dirname):
old_dirpath = os.path.join(settings.ADDONS_PATH, dirname)
new_dirpath = os.path.join(settings.ADDONS_PATH, id_to_path(dirname, breadth=2))
os.makedirs(new_dirpath, exist_ok=True)
migrrated_count_in_dir = 0
for entry in os.scandir(old_dirpath):
if entry.is_file():
result = self.migrate_file(dirname, entry.name)
if result:
migrrated_count_in_dir += 1
return migrrated_count_in_dir
def migrate_file(self, addon_pk, filename):
filename_with_dirname = os.path.join(addon_pk, filename)
old_path = os.path.join(settings.ADDONS_PATH, filename_with_dirname)
try:
instance = File.objects.select_related('version', 'version__addon').get(
file=filename, version__addon=addon_pk
)
except File.DoesNotExist:
self.stderr.write(
f'Ignoring likely obsolete or already migrated {filename_with_dirname}'
)
return False
new_filename_with_dirnames = instance._meta.get_field('file').upload_to(
instance, filename
)
new_path = os.path.join(settings.ADDONS_PATH, new_filename_with_dirnames)
try:
os.link(old_path, new_path)
except FileExistsError:
# If we're here, it means the file has likely already been migrated
# on the filesystem but the database hasn't been updated yet (maybe
# we stopped the script and re-triggered it).
self.stderr.write(f'Ignoring already migrated {filename_with_dirname}')
instance.update(file=new_filename_with_dirnames)
return True

Просмотреть файл

@ -5,12 +5,189 @@ from unittest import mock
from django.conf import settings
from olympia.amo.tests import TestCase
from olympia.amo.tests import addon_factory, TestCase
from olympia.files.management.commands.migrate_files_to_new_structure import (
Command as MigrateFilesToNewStructure,
)
from olympia.files.management.commands.migrate_guarded_addons import (
Command as MigrateGuardedAddons,
)
class TestMigrateFilesToNewStructure(TestCase):
def setUp(self):
self.command = MigrateFilesToNewStructure()
self.command.verbosity = 1
self.command.stderr = io.StringIO()
self.command.stdout = io.StringIO()
def test_print_eta(self):
self.command.print_eta(
elapsed=timedelta(seconds=0, microseconds=191332),
processed_count=4,
remaining_count=815162342,
)
self.command.stdout.seek(0)
assert (
self.command.stdout.read()
== 'ETA 451 days, 7:01:00 ; Remaining entries 815162342\n'
)
def test_print_eta_shorter(self):
self.command.print_eta(
elapsed=timedelta(minutes=20, seconds=30, microseconds=424567),
processed_count=12345,
remaining_count=678,
)
self.command.stdout.seek(0)
assert self.command.stdout.read() == 'ETA 0:01:07 ; Remaining entries 678\n'
def test_print_eta_0_migrated(self):
self.command.print_eta(
elapsed=timedelta(seconds=1),
processed_count=0,
remaining_count=9999,
)
self.command.stdout.seek(0)
assert self.command.stdout.read() == 'ETA Unknown ; Remaining entries 9999\n'
def test_print_eta_0_remaining(self):
self.command.print_eta(
elapsed=timedelta(hours=8, minutes=7, seconds=6, microseconds=540530),
processed_count=1000000000,
remaining_count=0,
)
self.command.stdout.seek(0)
assert self.command.stdout.read() == 'ETA 0:00:00 ; Remaining entries 0\n'
def test_migrate_file(self):
file_ = addon_factory(
pk=123456,
slug='someaddon',
version_kw={'version': '42.0'},
file_kw={'is_signed': True},
).current_version.file
file_.update(file='bar.xpi')
with mock.patch(
'olympia.files.management.commands.migrate_guarded_addons.os.link'
) as mocked_link:
rval = self.command.migrate_file('123456', 'bar.xpi')
expected_source_path = os.path.join(settings.ADDONS_PATH, '123456', 'bar.xpi')
expected_target_path = os.path.join(
settings.ADDONS_PATH, '56/3456/123456', 'someaddon-42.0.xpi'
)
assert mocked_link.call_count == 1
assert mocked_link.call_args == ((expected_source_path, expected_target_path),)
file_.reload()
assert file_.file.path == expected_target_path
assert rval is True
self.command.stderr.seek(0)
assert self.command.stderr.read() == ''
def test_migrate_file_not_in_db(self):
with mock.patch(
'olympia.files.management.commands.migrate_guarded_addons.os.link'
) as mocked_link:
rval = self.command.migrate_file('123456', 'bar.xpi')
assert mocked_link.call_count == 0
assert rval is False
self.command.stderr.seek(0)
assert (
self.command.stderr.read()
== 'Ignoring likely obsolete or already migrated 123456/bar.xpi'
)
def test_migrate_file_already_exists(self):
file_ = addon_factory(
pk=123456,
slug='someaddon',
version_kw={'version': '42.0'},
file_kw={'is_signed': True},
).current_version.file
file_.update(file='bar.xpi')
with mock.patch(
'olympia.files.management.commands.migrate_guarded_addons.os.link'
) as mocked_link:
mocked_link.side_effect = FileExistsError
self.command.migrate_file('123456', 'bar.xpi')
expected_source_path = os.path.join(settings.ADDONS_PATH, '123456', 'bar.xpi')
expected_target_path = os.path.join(
settings.ADDONS_PATH, '56/3456/123456', 'someaddon-42.0.xpi'
)
assert mocked_link.call_count == 1
assert mocked_link.call_args == ((expected_source_path, expected_target_path),)
# We ignored the error and proceeded as normal (file had already been
# migrated on the filesystem but database had not been updated yet, now
# it should be).
file_.reload()
assert file_.file.path == expected_target_path
self.command.stderr.seek(0)
assert self.command.stderr.read() == 'Ignoring already migrated 123456/bar.xpi'
def test_migrate_directory_contents(self):
addon_path = os.path.join(settings.ADDONS_PATH, '4815162342')
os.makedirs(addon_path)
file_path = os.path.join(addon_path, 'bar.xpi')
with open(file_path, 'w') as f:
f.write('a')
with mock.patch.object(self.command, 'migrate_file') as migrate_file_mock:
self.command.migrate_directory_contents('4815162342')
assert migrate_file_mock.call_count == 1
assert migrate_file_mock.call_args[0] == ('4815162342', 'bar.xpi')
def test_migrate_directory_contents_empty_dir(self):
addon_path = os.path.join(settings.ADDONS_PATH, '4815162342')
os.makedirs(addon_path)
with mock.patch.object(self.command, 'migrate_file') as migrate_file_mock:
self.command.migrate_directory_contents('4815162342')
assert migrate_file_mock.call_count == 0
@mock.patch(
'olympia.files.management.commands.migrate_files_to_new_structure.os.scandir'
)
def test_migrate(self, scandir_mock):
# Return 2001 addon directories and a couple additional entries that should be
# ignored.
scandir_mock.return_value = [
mock.Mock(
spec=os.DirEntry,
_name=str(x),
path=os.path.join(settings.ADDONS_PATH, str(x)),
)
for x in range(1, 2002)
] + [
mock.Mock(
spec=os.DirEntry,
_name='.123456',
path=os.path.join(settings.ADDONS_PATH, '.123456'),
),
mock.Mock(
spec=os.DirEntry,
_name='temp',
path=os.path.join(settings.ADDONS_PATH, 'temp'),
),
]
for mocked in scandir_mock.return_value:
mocked.name = (
mocked._name
) # Ugly but `name` is a special attribute in mocks.
with mock.patch.object(
self.command, 'migrate_directory_contents'
) as migrate_directory_contents_mock:
migrate_directory_contents_mock.return_value = 1
self.command.migrate()
# We're mocking migrate_directory_contents() so scandir() should have
# been called only once.
assert scandir_mock.call_count == 1
assert scandir_mock.call_args == ((settings.ADDONS_PATH,),)
assert migrate_directory_contents_mock.call_count == 2001
assert migrate_directory_contents_mock.call_args_list[23] == (('24',),)
self.command.stdout.seek(0)
# We should have printed the ETA 3 times and added a summary in the 4th
# line.
assert len(self.command.stdout.read().strip().split('\n')) == 4
class TestMigrateGuardedAddons(TestCase):
def setUp(self):
self.command = MigrateGuardedAddons()