Add command to migrate files to new directory structure (#19239)
* Add command to migrate files to new directory structure * Remove unused import
This commit is contained in:
Родитель
1c50e675c4
Коммит
2f25d00211
|
@ -0,0 +1,94 @@
|
|||
import os
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from olympia.files.models import File
|
||||
from olympia.files.utils import id_to_path
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
def handle(self, *args, **options):
|
||||
self.migrate()
|
||||
|
||||
def print_eta(self, *, elapsed, processed_count, remaining_count):
|
||||
# total_seconds() keeps microseconds - it should often be 0.xxxxxx in
|
||||
# our case. We use the full precision for computing the ETA, but round
|
||||
# to the second when displaying.
|
||||
total_seconds = elapsed.total_seconds()
|
||||
eta = (
|
||||
timedelta(seconds=int(total_seconds / processed_count * remaining_count))
|
||||
if processed_count
|
||||
else 'Unknown'
|
||||
)
|
||||
self.stdout.write(f'ETA {eta} ; Remaining entries {remaining_count}\n')
|
||||
|
||||
def migrate(self):
|
||||
# Number of entries to migrate is number of links to the directory
|
||||
# minus `.`, the parent dir and `temp/` which we're not touching.
|
||||
entries_total = os.stat(settings.ADDONS_PATH).st_nlink - 3
|
||||
processed_count = 0
|
||||
migrated_count = 0
|
||||
entries = os.scandir(settings.ADDONS_PATH)
|
||||
start_time = datetime.now()
|
||||
for entry in entries:
|
||||
if not entry.name.isdigit():
|
||||
entries_total -= 1
|
||||
self.stderr.write(f'Ignoring non-addon entry {entry.name}')
|
||||
continue
|
||||
result = self.migrate_directory_contents(entry.name)
|
||||
migrated_count += result
|
||||
# Since we use the add-ons and not the files to compute the ETA
|
||||
# it's never going to be 100% accurate, but it should be good
|
||||
# enough.
|
||||
processed_count += 1
|
||||
if processed_count == 1 or processed_count % 1000 == 0:
|
||||
elapsed = datetime.now() - start_time
|
||||
self.print_eta(
|
||||
elapsed=elapsed,
|
||||
processed_count=processed_count,
|
||||
remaining_count=entries_total - processed_count,
|
||||
)
|
||||
self.stdout.write(
|
||||
f'Processed {processed_count} entries (migrated {migrated_count}) '
|
||||
f'in {elapsed.total_seconds()} seconds.'
|
||||
)
|
||||
|
||||
def migrate_directory_contents(self, dirname):
|
||||
old_dirpath = os.path.join(settings.ADDONS_PATH, dirname)
|
||||
new_dirpath = os.path.join(settings.ADDONS_PATH, id_to_path(dirname, breadth=2))
|
||||
os.makedirs(new_dirpath, exist_ok=True)
|
||||
migrrated_count_in_dir = 0
|
||||
for entry in os.scandir(old_dirpath):
|
||||
if entry.is_file():
|
||||
result = self.migrate_file(dirname, entry.name)
|
||||
if result:
|
||||
migrrated_count_in_dir += 1
|
||||
return migrrated_count_in_dir
|
||||
|
||||
def migrate_file(self, addon_pk, filename):
|
||||
filename_with_dirname = os.path.join(addon_pk, filename)
|
||||
old_path = os.path.join(settings.ADDONS_PATH, filename_with_dirname)
|
||||
try:
|
||||
instance = File.objects.select_related('version', 'version__addon').get(
|
||||
file=filename, version__addon=addon_pk
|
||||
)
|
||||
except File.DoesNotExist:
|
||||
self.stderr.write(
|
||||
f'Ignoring likely obsolete or already migrated {filename_with_dirname}'
|
||||
)
|
||||
return False
|
||||
new_filename_with_dirnames = instance._meta.get_field('file').upload_to(
|
||||
instance, filename
|
||||
)
|
||||
new_path = os.path.join(settings.ADDONS_PATH, new_filename_with_dirnames)
|
||||
try:
|
||||
os.link(old_path, new_path)
|
||||
except FileExistsError:
|
||||
# If we're here, it means the file has likely already been migrated
|
||||
# on the filesystem but the database hasn't been updated yet (maybe
|
||||
# we stopped the script and re-triggered it).
|
||||
self.stderr.write(f'Ignoring already migrated {filename_with_dirname}')
|
||||
instance.update(file=new_filename_with_dirnames)
|
||||
return True
|
|
@ -5,12 +5,189 @@ from unittest import mock
|
|||
|
||||
from django.conf import settings
|
||||
|
||||
from olympia.amo.tests import TestCase
|
||||
from olympia.amo.tests import addon_factory, TestCase
|
||||
from olympia.files.management.commands.migrate_files_to_new_structure import (
|
||||
Command as MigrateFilesToNewStructure,
|
||||
)
|
||||
from olympia.files.management.commands.migrate_guarded_addons import (
|
||||
Command as MigrateGuardedAddons,
|
||||
)
|
||||
|
||||
|
||||
class TestMigrateFilesToNewStructure(TestCase):
|
||||
def setUp(self):
|
||||
self.command = MigrateFilesToNewStructure()
|
||||
self.command.verbosity = 1
|
||||
self.command.stderr = io.StringIO()
|
||||
self.command.stdout = io.StringIO()
|
||||
|
||||
def test_print_eta(self):
|
||||
self.command.print_eta(
|
||||
elapsed=timedelta(seconds=0, microseconds=191332),
|
||||
processed_count=4,
|
||||
remaining_count=815162342,
|
||||
)
|
||||
self.command.stdout.seek(0)
|
||||
assert (
|
||||
self.command.stdout.read()
|
||||
== 'ETA 451 days, 7:01:00 ; Remaining entries 815162342\n'
|
||||
)
|
||||
|
||||
def test_print_eta_shorter(self):
|
||||
self.command.print_eta(
|
||||
elapsed=timedelta(minutes=20, seconds=30, microseconds=424567),
|
||||
processed_count=12345,
|
||||
remaining_count=678,
|
||||
)
|
||||
self.command.stdout.seek(0)
|
||||
assert self.command.stdout.read() == 'ETA 0:01:07 ; Remaining entries 678\n'
|
||||
|
||||
def test_print_eta_0_migrated(self):
|
||||
self.command.print_eta(
|
||||
elapsed=timedelta(seconds=1),
|
||||
processed_count=0,
|
||||
remaining_count=9999,
|
||||
)
|
||||
self.command.stdout.seek(0)
|
||||
assert self.command.stdout.read() == 'ETA Unknown ; Remaining entries 9999\n'
|
||||
|
||||
def test_print_eta_0_remaining(self):
|
||||
self.command.print_eta(
|
||||
elapsed=timedelta(hours=8, minutes=7, seconds=6, microseconds=540530),
|
||||
processed_count=1000000000,
|
||||
remaining_count=0,
|
||||
)
|
||||
self.command.stdout.seek(0)
|
||||
assert self.command.stdout.read() == 'ETA 0:00:00 ; Remaining entries 0\n'
|
||||
|
||||
def test_migrate_file(self):
|
||||
file_ = addon_factory(
|
||||
pk=123456,
|
||||
slug='someaddon',
|
||||
version_kw={'version': '42.0'},
|
||||
file_kw={'is_signed': True},
|
||||
).current_version.file
|
||||
file_.update(file='bar.xpi')
|
||||
with mock.patch(
|
||||
'olympia.files.management.commands.migrate_guarded_addons.os.link'
|
||||
) as mocked_link:
|
||||
rval = self.command.migrate_file('123456', 'bar.xpi')
|
||||
expected_source_path = os.path.join(settings.ADDONS_PATH, '123456', 'bar.xpi')
|
||||
expected_target_path = os.path.join(
|
||||
settings.ADDONS_PATH, '56/3456/123456', 'someaddon-42.0.xpi'
|
||||
)
|
||||
assert mocked_link.call_count == 1
|
||||
assert mocked_link.call_args == ((expected_source_path, expected_target_path),)
|
||||
file_.reload()
|
||||
assert file_.file.path == expected_target_path
|
||||
assert rval is True
|
||||
self.command.stderr.seek(0)
|
||||
assert self.command.stderr.read() == ''
|
||||
|
||||
def test_migrate_file_not_in_db(self):
|
||||
with mock.patch(
|
||||
'olympia.files.management.commands.migrate_guarded_addons.os.link'
|
||||
) as mocked_link:
|
||||
rval = self.command.migrate_file('123456', 'bar.xpi')
|
||||
assert mocked_link.call_count == 0
|
||||
assert rval is False
|
||||
self.command.stderr.seek(0)
|
||||
assert (
|
||||
self.command.stderr.read()
|
||||
== 'Ignoring likely obsolete or already migrated 123456/bar.xpi'
|
||||
)
|
||||
|
||||
def test_migrate_file_already_exists(self):
|
||||
file_ = addon_factory(
|
||||
pk=123456,
|
||||
slug='someaddon',
|
||||
version_kw={'version': '42.0'},
|
||||
file_kw={'is_signed': True},
|
||||
).current_version.file
|
||||
file_.update(file='bar.xpi')
|
||||
with mock.patch(
|
||||
'olympia.files.management.commands.migrate_guarded_addons.os.link'
|
||||
) as mocked_link:
|
||||
mocked_link.side_effect = FileExistsError
|
||||
self.command.migrate_file('123456', 'bar.xpi')
|
||||
expected_source_path = os.path.join(settings.ADDONS_PATH, '123456', 'bar.xpi')
|
||||
expected_target_path = os.path.join(
|
||||
settings.ADDONS_PATH, '56/3456/123456', 'someaddon-42.0.xpi'
|
||||
)
|
||||
assert mocked_link.call_count == 1
|
||||
assert mocked_link.call_args == ((expected_source_path, expected_target_path),)
|
||||
# We ignored the error and proceeded as normal (file had already been
|
||||
# migrated on the filesystem but database had not been updated yet, now
|
||||
# it should be).
|
||||
file_.reload()
|
||||
assert file_.file.path == expected_target_path
|
||||
self.command.stderr.seek(0)
|
||||
assert self.command.stderr.read() == 'Ignoring already migrated 123456/bar.xpi'
|
||||
|
||||
def test_migrate_directory_contents(self):
|
||||
addon_path = os.path.join(settings.ADDONS_PATH, '4815162342')
|
||||
os.makedirs(addon_path)
|
||||
file_path = os.path.join(addon_path, 'bar.xpi')
|
||||
with open(file_path, 'w') as f:
|
||||
f.write('a')
|
||||
with mock.patch.object(self.command, 'migrate_file') as migrate_file_mock:
|
||||
self.command.migrate_directory_contents('4815162342')
|
||||
assert migrate_file_mock.call_count == 1
|
||||
assert migrate_file_mock.call_args[0] == ('4815162342', 'bar.xpi')
|
||||
|
||||
def test_migrate_directory_contents_empty_dir(self):
|
||||
addon_path = os.path.join(settings.ADDONS_PATH, '4815162342')
|
||||
os.makedirs(addon_path)
|
||||
with mock.patch.object(self.command, 'migrate_file') as migrate_file_mock:
|
||||
self.command.migrate_directory_contents('4815162342')
|
||||
assert migrate_file_mock.call_count == 0
|
||||
|
||||
@mock.patch(
|
||||
'olympia.files.management.commands.migrate_files_to_new_structure.os.scandir'
|
||||
)
|
||||
def test_migrate(self, scandir_mock):
|
||||
# Return 2001 addon directories and a couple additional entries that should be
|
||||
# ignored.
|
||||
scandir_mock.return_value = [
|
||||
mock.Mock(
|
||||
spec=os.DirEntry,
|
||||
_name=str(x),
|
||||
path=os.path.join(settings.ADDONS_PATH, str(x)),
|
||||
)
|
||||
for x in range(1, 2002)
|
||||
] + [
|
||||
mock.Mock(
|
||||
spec=os.DirEntry,
|
||||
_name='.123456',
|
||||
path=os.path.join(settings.ADDONS_PATH, '.123456'),
|
||||
),
|
||||
mock.Mock(
|
||||
spec=os.DirEntry,
|
||||
_name='temp',
|
||||
path=os.path.join(settings.ADDONS_PATH, 'temp'),
|
||||
),
|
||||
]
|
||||
for mocked in scandir_mock.return_value:
|
||||
mocked.name = (
|
||||
mocked._name
|
||||
) # Ugly but `name` is a special attribute in mocks.
|
||||
with mock.patch.object(
|
||||
self.command, 'migrate_directory_contents'
|
||||
) as migrate_directory_contents_mock:
|
||||
migrate_directory_contents_mock.return_value = 1
|
||||
self.command.migrate()
|
||||
# We're mocking migrate_directory_contents() so scandir() should have
|
||||
# been called only once.
|
||||
assert scandir_mock.call_count == 1
|
||||
assert scandir_mock.call_args == ((settings.ADDONS_PATH,),)
|
||||
assert migrate_directory_contents_mock.call_count == 2001
|
||||
assert migrate_directory_contents_mock.call_args_list[23] == (('24',),)
|
||||
self.command.stdout.seek(0)
|
||||
# We should have printed the ETA 3 times and added a summary in the 4th
|
||||
# line.
|
||||
assert len(self.command.stdout.read().strip().split('\n')) == 4
|
||||
|
||||
|
||||
class TestMigrateGuardedAddons(TestCase):
|
||||
def setUp(self):
|
||||
self.command = MigrateGuardedAddons()
|
||||
|
|
Загрузка…
Ссылка в новой задаче