diff --git a/src/olympia/files/management/commands/migrate_files_to_new_structure.py b/src/olympia/files/management/commands/migrate_files_to_new_structure.py new file mode 100644 index 0000000000..52fa20f627 --- /dev/null +++ b/src/olympia/files/management/commands/migrate_files_to_new_structure.py @@ -0,0 +1,94 @@ +import os +from datetime import datetime, timedelta + +from django.conf import settings +from django.core.management.base import BaseCommand + +from olympia.files.models import File +from olympia.files.utils import id_to_path + + +class Command(BaseCommand): + def handle(self, *args, **options): + self.migrate() + + def print_eta(self, *, elapsed, processed_count, remaining_count): + # total_seconds() keeps microseconds - it should often be 0.xxxxxx in + # our case. We use the full precision for computing the ETA, but round + # to the second when displaying. + total_seconds = elapsed.total_seconds() + eta = ( + timedelta(seconds=int(total_seconds / processed_count * remaining_count)) + if processed_count + else 'Unknown' + ) + self.stdout.write(f'ETA {eta} ; Remaining entries {remaining_count}\n') + + def migrate(self): + # Number of entries to migrate is number of links to the directory + # minus `.`, the parent dir and `temp/` which we're not touching. + entries_total = os.stat(settings.ADDONS_PATH).st_nlink - 3 + processed_count = 0 + migrated_count = 0 + entries = os.scandir(settings.ADDONS_PATH) + start_time = datetime.now() + for entry in entries: + if not entry.name.isdigit(): + entries_total -= 1 + self.stderr.write(f'Ignoring non-addon entry {entry.name}') + continue + result = self.migrate_directory_contents(entry.name) + migrated_count += result + # Since we use the add-ons and not the files to compute the ETA + # it's never going to be 100% accurate, but it should be good + # enough. + processed_count += 1 + if processed_count == 1 or processed_count % 1000 == 0: + elapsed = datetime.now() - start_time + self.print_eta( + elapsed=elapsed, + processed_count=processed_count, + remaining_count=entries_total - processed_count, + ) + self.stdout.write( + f'Processed {processed_count} entries (migrated {migrated_count}) ' + f'in {elapsed.total_seconds()} seconds.' + ) + + def migrate_directory_contents(self, dirname): + old_dirpath = os.path.join(settings.ADDONS_PATH, dirname) + new_dirpath = os.path.join(settings.ADDONS_PATH, id_to_path(dirname, breadth=2)) + os.makedirs(new_dirpath, exist_ok=True) + migrrated_count_in_dir = 0 + for entry in os.scandir(old_dirpath): + if entry.is_file(): + result = self.migrate_file(dirname, entry.name) + if result: + migrrated_count_in_dir += 1 + return migrrated_count_in_dir + + def migrate_file(self, addon_pk, filename): + filename_with_dirname = os.path.join(addon_pk, filename) + old_path = os.path.join(settings.ADDONS_PATH, filename_with_dirname) + try: + instance = File.objects.select_related('version', 'version__addon').get( + file=filename, version__addon=addon_pk + ) + except File.DoesNotExist: + self.stderr.write( + f'Ignoring likely obsolete or already migrated {filename_with_dirname}' + ) + return False + new_filename_with_dirnames = instance._meta.get_field('file').upload_to( + instance, filename + ) + new_path = os.path.join(settings.ADDONS_PATH, new_filename_with_dirnames) + try: + os.link(old_path, new_path) + except FileExistsError: + # If we're here, it means the file has likely already been migrated + # on the filesystem but the database hasn't been updated yet (maybe + # we stopped the script and re-triggered it). + self.stderr.write(f'Ignoring already migrated {filename_with_dirname}') + instance.update(file=new_filename_with_dirnames) + return True diff --git a/src/olympia/files/tests/test_commands.py b/src/olympia/files/tests/test_commands.py index 9ac1d144d4..e26528b76c 100644 --- a/src/olympia/files/tests/test_commands.py +++ b/src/olympia/files/tests/test_commands.py @@ -5,12 +5,189 @@ from unittest import mock from django.conf import settings -from olympia.amo.tests import TestCase +from olympia.amo.tests import addon_factory, TestCase +from olympia.files.management.commands.migrate_files_to_new_structure import ( + Command as MigrateFilesToNewStructure, +) from olympia.files.management.commands.migrate_guarded_addons import ( Command as MigrateGuardedAddons, ) +class TestMigrateFilesToNewStructure(TestCase): + def setUp(self): + self.command = MigrateFilesToNewStructure() + self.command.verbosity = 1 + self.command.stderr = io.StringIO() + self.command.stdout = io.StringIO() + + def test_print_eta(self): + self.command.print_eta( + elapsed=timedelta(seconds=0, microseconds=191332), + processed_count=4, + remaining_count=815162342, + ) + self.command.stdout.seek(0) + assert ( + self.command.stdout.read() + == 'ETA 451 days, 7:01:00 ; Remaining entries 815162342\n' + ) + + def test_print_eta_shorter(self): + self.command.print_eta( + elapsed=timedelta(minutes=20, seconds=30, microseconds=424567), + processed_count=12345, + remaining_count=678, + ) + self.command.stdout.seek(0) + assert self.command.stdout.read() == 'ETA 0:01:07 ; Remaining entries 678\n' + + def test_print_eta_0_migrated(self): + self.command.print_eta( + elapsed=timedelta(seconds=1), + processed_count=0, + remaining_count=9999, + ) + self.command.stdout.seek(0) + assert self.command.stdout.read() == 'ETA Unknown ; Remaining entries 9999\n' + + def test_print_eta_0_remaining(self): + self.command.print_eta( + elapsed=timedelta(hours=8, minutes=7, seconds=6, microseconds=540530), + processed_count=1000000000, + remaining_count=0, + ) + self.command.stdout.seek(0) + assert self.command.stdout.read() == 'ETA 0:00:00 ; Remaining entries 0\n' + + def test_migrate_file(self): + file_ = addon_factory( + pk=123456, + slug='someaddon', + version_kw={'version': '42.0'}, + file_kw={'is_signed': True}, + ).current_version.file + file_.update(file='bar.xpi') + with mock.patch( + 'olympia.files.management.commands.migrate_guarded_addons.os.link' + ) as mocked_link: + rval = self.command.migrate_file('123456', 'bar.xpi') + expected_source_path = os.path.join(settings.ADDONS_PATH, '123456', 'bar.xpi') + expected_target_path = os.path.join( + settings.ADDONS_PATH, '56/3456/123456', 'someaddon-42.0.xpi' + ) + assert mocked_link.call_count == 1 + assert mocked_link.call_args == ((expected_source_path, expected_target_path),) + file_.reload() + assert file_.file.path == expected_target_path + assert rval is True + self.command.stderr.seek(0) + assert self.command.stderr.read() == '' + + def test_migrate_file_not_in_db(self): + with mock.patch( + 'olympia.files.management.commands.migrate_guarded_addons.os.link' + ) as mocked_link: + rval = self.command.migrate_file('123456', 'bar.xpi') + assert mocked_link.call_count == 0 + assert rval is False + self.command.stderr.seek(0) + assert ( + self.command.stderr.read() + == 'Ignoring likely obsolete or already migrated 123456/bar.xpi' + ) + + def test_migrate_file_already_exists(self): + file_ = addon_factory( + pk=123456, + slug='someaddon', + version_kw={'version': '42.0'}, + file_kw={'is_signed': True}, + ).current_version.file + file_.update(file='bar.xpi') + with mock.patch( + 'olympia.files.management.commands.migrate_guarded_addons.os.link' + ) as mocked_link: + mocked_link.side_effect = FileExistsError + self.command.migrate_file('123456', 'bar.xpi') + expected_source_path = os.path.join(settings.ADDONS_PATH, '123456', 'bar.xpi') + expected_target_path = os.path.join( + settings.ADDONS_PATH, '56/3456/123456', 'someaddon-42.0.xpi' + ) + assert mocked_link.call_count == 1 + assert mocked_link.call_args == ((expected_source_path, expected_target_path),) + # We ignored the error and proceeded as normal (file had already been + # migrated on the filesystem but database had not been updated yet, now + # it should be). + file_.reload() + assert file_.file.path == expected_target_path + self.command.stderr.seek(0) + assert self.command.stderr.read() == 'Ignoring already migrated 123456/bar.xpi' + + def test_migrate_directory_contents(self): + addon_path = os.path.join(settings.ADDONS_PATH, '4815162342') + os.makedirs(addon_path) + file_path = os.path.join(addon_path, 'bar.xpi') + with open(file_path, 'w') as f: + f.write('a') + with mock.patch.object(self.command, 'migrate_file') as migrate_file_mock: + self.command.migrate_directory_contents('4815162342') + assert migrate_file_mock.call_count == 1 + assert migrate_file_mock.call_args[0] == ('4815162342', 'bar.xpi') + + def test_migrate_directory_contents_empty_dir(self): + addon_path = os.path.join(settings.ADDONS_PATH, '4815162342') + os.makedirs(addon_path) + with mock.patch.object(self.command, 'migrate_file') as migrate_file_mock: + self.command.migrate_directory_contents('4815162342') + assert migrate_file_mock.call_count == 0 + + @mock.patch( + 'olympia.files.management.commands.migrate_files_to_new_structure.os.scandir' + ) + def test_migrate(self, scandir_mock): + # Return 2001 addon directories and a couple additional entries that should be + # ignored. + scandir_mock.return_value = [ + mock.Mock( + spec=os.DirEntry, + _name=str(x), + path=os.path.join(settings.ADDONS_PATH, str(x)), + ) + for x in range(1, 2002) + ] + [ + mock.Mock( + spec=os.DirEntry, + _name='.123456', + path=os.path.join(settings.ADDONS_PATH, '.123456'), + ), + mock.Mock( + spec=os.DirEntry, + _name='temp', + path=os.path.join(settings.ADDONS_PATH, 'temp'), + ), + ] + for mocked in scandir_mock.return_value: + mocked.name = ( + mocked._name + ) # Ugly but `name` is a special attribute in mocks. + with mock.patch.object( + self.command, 'migrate_directory_contents' + ) as migrate_directory_contents_mock: + migrate_directory_contents_mock.return_value = 1 + self.command.migrate() + # We're mocking migrate_directory_contents() so scandir() should have + # been called only once. + assert scandir_mock.call_count == 1 + assert scandir_mock.call_args == ((settings.ADDONS_PATH,),) + assert migrate_directory_contents_mock.call_count == 2001 + assert migrate_directory_contents_mock.call_args_list[23] == (('24',),) + self.command.stdout.seek(0) + # We should have printed the ETA 3 times and added a summary in the 4th + # line. + assert len(self.command.stdout.read().strip().split('\n')) == 4 + + class TestMigrateGuardedAddons(TestCase): def setUp(self): self.command = MigrateGuardedAddons()