From c408995d6ed8e6608bb936b338a0f359f7392034 Mon Sep 17 00:00:00 2001 From: Andrew Williamson Date: Tue, 14 Feb 2023 11:47:23 +0000 Subject: [PATCH] command to return folders with (potentially) changed files instead (#20329) * command to return folders with (potentially) changed files instead * drop xpi uploads and sitemaps from get_changed_files * use models to get directory paths * codestyle fix * change collect functions to use qs.only and iterators * changed collect_files and collect_sources back to manually building paths * codestyle again * add a test for Versions|Files without sources|files --- .../management/commands/get_changed_files.py | 120 +++++++++++ src/olympia/amo/tests/test_commands.py | 190 +++++++++++++++++- 2 files changed, 309 insertions(+), 1 deletion(-) create mode 100644 src/olympia/amo/management/commands/get_changed_files.py diff --git a/src/olympia/amo/management/commands/get_changed_files.py b/src/olympia/amo/management/commands/get_changed_files.py new file mode 100644 index 0000000000..6fb453a357 --- /dev/null +++ b/src/olympia/amo/management/commands/get_changed_files.py @@ -0,0 +1,120 @@ +import os + +from datetime import datetime, timedelta +from os import scandir + +from django.conf import settings +from django.core.management.base import BaseCommand + +from olympia.addons.models import Addon, Preview +from olympia.amo.utils import id_to_path +from olympia.blocklist.utils import datetime_to_ts +from olympia.files.models import File +from olympia.git.utils import AddonGitRepository +from olympia.hero.models import PrimaryHeroImage +from olympia.users.models import UserProfile +from olympia.versions.models import Version, VersionPreview + + +def collect_user_pics(since): + qs = UserProfile.objects.filter(modified__gt=since).only('id', 'username') + return [user.picture_dir for user in qs.iterator()] + + +def collect_files(since): + path = settings.ADDONS_PATH + id_iter = ( + File.objects.filter(modified__gt=since) + .values_list('version__addon_id', flat=True) + .iterator() + ) + return list({os.path.join(path, id_to_path(id_, breadth=2)) for id_ in id_iter}) + + +def collect_sources(since): + path = os.path.join(settings.MEDIA_ROOT, 'version_source') + id_iter = Version.unfiltered.filter(modified__gt=since).values_list('id', flat=True) + return [os.path.join(path, id_to_path(id_, breadth=1)) for id_ in id_iter] + + +def _get_previews(since, PreviewModel): + out = set() + qs = PreviewModel.objects.filter(created__gt=since).only('id', 'sizes') + for preview in qs.iterator(): + out = out | { + os.path.dirname(preview.thumbnail_path), + os.path.dirname(preview.image_path), + os.path.dirname(preview.original_path), + } + return list(out) + + +def collect_addon_previews(since): + return _get_previews(since, Preview) + + +def collect_theme_previews(since): + return _get_previews(since, VersionPreview) + + +def collect_addon_icons(since): + qs = Addon.unfiltered.filter(modified__gt=since).only('id') + return list({addon.get_icon_dir() for addon in qs.iterator()}) + + +def collect_editoral(since): + return ( + [os.path.join(settings.MEDIA_ROOT, 'hero-featured-image')] + if PrimaryHeroImage.objects.filter(modified__gt=since).exists() + else [] + ) + + +def collect_git(since): + qs_iter = ( + File.objects.filter(modified__gt=since) + .values_list('version__addon_id', flat=True) + .iterator() + ) + return list( + {AddonGitRepository(addon_id).git_repository_path for addon_id in qs_iter} + ) + + +def collect_blocklist(since): + path = settings.MLBF_STORAGE_PATH + since_ts = datetime_to_ts(since) + return [ + file_.path + for file_ in scandir(path) + if file_.is_dir() and file_.name.isdigit() and int(file_.name) >= since_ts + ] + + +class Command(BaseCommand): + help = ( + 'Get folders containing files that have changed on the filesystem in the past ' + 'X seconds' + ) + + def add_arguments(self, parser): + parser.add_argument('since', type=int) + + def get_collectors(self): + return [ + collect_user_pics, + collect_files, + collect_sources, + collect_addon_previews, + collect_theme_previews, + collect_addon_icons, + collect_editoral, + collect_git, + collect_blocklist, + ] + + def handle(self, *args, **options): + since = datetime.now() - timedelta(seconds=options['since']) + for func in self.get_collectors(): + items = func(since) + [self.stdout.write(os.path.normpath(item)) for item in items] diff --git a/src/olympia/amo/tests/test_commands.py b/src/olympia/amo/tests/test_commands.py index 8a1f94ec6a..0cacf55af2 100644 --- a/src/olympia/amo/tests/test_commands.py +++ b/src/olympia/amo/tests/test_commands.py @@ -1,6 +1,6 @@ import os import io - +from datetime import datetime, timedelta from importlib import import_module from django.conf import settings @@ -11,6 +11,26 @@ from django.test.utils import override_settings from unittest import mock import pytest +from olympia.addons.models import Preview +from olympia.amo.management.commands.get_changed_files import ( + collect_user_pics, + collect_files, + collect_sources, + collect_addon_previews, + collect_theme_previews, + collect_addon_icons, + collect_editoral, + collect_git, + collect_blocklist, +) +from olympia.amo.tests import addon_factory, TestCase, user_factory, version_factory +from olympia.amo.utils import id_to_path +from olympia.blocklist.utils import datetime_to_ts +from olympia.files.models import File, files_upload_to_callback +from olympia.git.utils import AddonGitRepository +from olympia.hero.models import PrimaryHeroImage +from olympia.versions.models import source_upload_path, VersionPreview + def sample_cron_job(*args): pass @@ -144,3 +164,171 @@ def test_generate_jsi18n_files(): with open(filename) as f: content = f.read() assert 'Erreur' in content + + +class TestGetChangedFilesCommand(TestCase): + fixtures = ['base/addon_5299_gcal'] + + def setUp(self): + self.yesterday = datetime.now() - timedelta(hours=24) + self.newer = self.yesterday + timedelta(seconds=10) + self.older = self.yesterday - timedelta(seconds=10) + + def test_command(self): + user = user_factory() + PrimaryHeroImage.objects.create() + + with io.StringIO() as out: + call_command('get_changed_files', '1', stdout=out) + assert out.getvalue() == ( + f'{user.picture_dir}\n' + f'{os.path.join(settings.MEDIA_ROOT, "hero-featured-image")}\n' + ) + + def test_collect_user_pics(self): + changed = user_factory() + unchanged = user_factory() + unchanged.update(modified=self.older) + assert unchanged.modified < self.yesterday + with self.assertNumQueries(1): + assert collect_user_pics(self.yesterday) == [changed.picture_dir] + + def test_collect_files(self): + new_file = File.objects.get(id=33046) + new_file.update(modified=self.newer) + version_factory( + addon=new_file.addon, + file_kw={'file': files_upload_to_callback(new_file, 'foo.xpi')}, + ) # an extra file to check de-duping + old_file = addon_factory().current_version.file + old_file.update(modified=self.older) + version_factory(addon=new_file.addon, file_kw={'file': None}) # no file + assert old_file.modified < self.yesterday + with self.assertNumQueries(1): + assert collect_files(self.yesterday) == [ + os.path.dirname(new_file.file.path) + ] + + def test_collect_sources(self): + changed = addon_factory().current_version + changed.update(source=source_upload_path(changed, 'foo.zip')) + unchanged = addon_factory().current_version + unchanged.update(modified=self.older) + no_source_version = version_factory(addon=changed.addon, source=None) + assert unchanged.modified < self.yesterday + with self.assertNumQueries(1): + assert collect_sources(self.yesterday) == [ + os.path.join( + settings.MEDIA_ROOT, + 'version_source', + id_to_path(no_source_version.id), + ), + os.path.dirname(changed.source.path), + ] + + def test_collect_addon_previews(self): + preview1 = Preview.objects.create(addon=addon_factory()) + preview2 = Preview.objects.create(addon=addon_factory()) + older_preview = Preview.objects.create( + addon=addon_factory(), id=preview1.id + 1000 + ) + older_preview.update(created=self.older) + assert (preview1.id // 1000) == (preview2.id // 1000) + assert (preview1.id // 1000) != (older_preview.id // 1000) + assert os.path.dirname(preview1.image_path) == os.path.dirname( + preview2.image_path + ) + with self.assertNumQueries(1): + assert sorted(collect_addon_previews(self.yesterday)) == [ + # only one set of dirs because 1 and 2 are in same subdirs + os.path.dirname(preview1.image_path), + os.path.dirname(preview1.original_path), + os.path.dirname(preview1.thumbnail_path), + ] + + def test_collect_theme_previews(self): + preview1 = VersionPreview.objects.create( + version=addon_factory().current_version + ) + preview2 = VersionPreview.objects.create( + version=addon_factory().current_version + ) + older_preview = VersionPreview.objects.create( + version=addon_factory().current_version, id=preview1.id + 1000 + ) + older_preview.update(created=self.older) + assert (preview1.id // 1000) == (preview2.id // 1000) + assert (preview1.id // 1000) != (older_preview.id // 1000) + assert os.path.dirname(preview1.image_path) == os.path.dirname( + preview2.image_path + ) + with self.assertNumQueries(1): + assert sorted(collect_theme_previews(self.yesterday)) == [ + # only one set of dirs because 1 and 2 are in same subdirs + os.path.dirname(preview1.image_path), + os.path.dirname(preview1.original_path), + os.path.dirname(preview1.thumbnail_path), + ] + + def test_collect_addon_icons(self): + changed = addon_factory() + unchanged = addon_factory() + unchanged.update(modified=self.older) + assert unchanged.modified < self.yesterday + with self.assertNumQueries(1): + assert collect_addon_icons(self.yesterday) == [changed.get_icon_dir()] + + def test_collect_editoral(self): + image1 = PrimaryHeroImage.objects.create() + image1.update(modified=self.older) + image2 = PrimaryHeroImage.objects.create() + image2.update(modified=self.older) + # no new hero images so no dir + assert collect_editoral(self.yesterday) == [] + image1.update(modified=self.newer) + image2.update(modified=self.newer) + # one or more updated hero images match then the root should be returned + with self.assertNumQueries(1): + assert collect_editoral(self.yesterday) == [ + os.path.join(settings.MEDIA_ROOT, 'hero-featured-image') + ] + + def test_collect_git(self): + new_file = File.objects.get(id=33046) + new_file.update(modified=self.newer) + version_factory(addon=new_file.addon) # an extra file to check de-duping + old_file = addon_factory().current_version.file + old_file.update(modified=self.older) + assert old_file.modified < self.yesterday + with self.assertNumQueries(1): + assert collect_git(self.yesterday) == [ + AddonGitRepository(new_file.addon).git_repository_path + ] + + def test_collect_blocklist(self): + class FakeEntry: + def __init__(self, name, is_dir=True): + self.name = str(name) + self._is_dir = is_dir + + def is_dir(self): + return self._is_dir + + @property + def path(self): + return f'foo/{self.name}' + + newerer = self.newer + timedelta(seconds=10) + with mock.patch( + 'olympia.amo.management.commands.get_changed_files.scandir' + ) as scandir_mock: + scandir_mock.return_value = [ + FakeEntry('fooo'), # not a datetime + FakeEntry(datetime_to_ts(self.older)), # too old + FakeEntry(datetime_to_ts(self.newer), False), # not a dir + FakeEntry(datetime_to_ts(newerer)), # yes + ] + with self.assertNumQueries(0): + assert collect_blocklist(self.yesterday) == [ + f'foo/{datetime_to_ts(newerer)}' + ]