move the actual mlbf upload to a celery task

This commit is contained in:
Andrew Williamson 2020-04-06 12:01:35 +01:00
Родитель 02a1adce9e
Коммит bc039ac584
8 изменённых файлов: 63 добавлений и 28 удалений

1
.gitignore поставляемый
Просмотреть файл

@ -42,6 +42,7 @@ static/js/i18n/*.js
static/js/node_lib/*
storage/files/*
storage/git-storage/*
storage/mlbf/*
storage/guarded-addons/*
storage/shared_storage/*
supervisord.pid

Просмотреть файл

@ -171,6 +171,7 @@ def test_pre_setup(request, tmpdir, settings):
settings.ADDONS_PATH = _path(storage_root, 'files')
settings.GUARDED_ADDONS_PATH = _path(storage_root, 'guarded-addons')
settings.GIT_FILE_STORAGE_PATH = _path(storage_root, 'git-storage')
settings.MLBF_STORAGE_PATH = _path(storage_root, 'mlbf')
settings.MEDIA_ROOT = _path(shared_storage, 'uploads')
settings.TMP_PATH = _path(shared_storage, 'tmp')

Просмотреть файл

@ -1,21 +1,21 @@
import json
import tempfile
import os
import time
from django.conf import settings
from django.core.files.storage import default_storage as storage
import waffle
import olympia.core.logger
from olympia.lib.kinto import KintoServer
from olympia.zadmin.models import get_config, set_config
from olympia.zadmin.models import get_config
from .mlbf import generate_mlbf, MLBF_KEY_FORMAT
from . import tasks
from .mlbf import generate_mlbf
from .models import Block
from .utils import KINTO_BUCKET, KINTO_COLLECTION_MLBF
log = olympia.core.logger.getLogger('z.cron')
MLBF_TIME_CONFIG_KEY = 'blocklist_mlbf_generation_time'
def _get_blocklist_last_modified_time():
latest_block = Block.objects.order_by('-modified').first()
@ -26,15 +26,14 @@ def upload_mlbf_to_kinto():
if not waffle.switch_is_active('blocklist_mlbf_submit'):
log.info('Upload MLBF to kinto cron job disabled.')
return
last_generation_time = get_config(MLBF_TIME_CONFIG_KEY, 0, json_value=True)
last_generation_time = get_config(
tasks.MLBF_TIME_CONFIG_KEY, 0, json_value=True)
if last_generation_time > _get_blocklist_last_modified_time():
log.info(
'No new/modified Blocks in database; skipping MLBF generation')
return
log.info('Starting Upload MLBF to kinto cron job.')
server = KintoServer(
KINTO_BUCKET, KINTO_COLLECTION_MLBF, kinto_sign_off_needed=False)
stats = {}
# This timestamp represents the point in time when all previous addon
@ -46,15 +45,9 @@ def upload_mlbf_to_kinto():
# https://github.com/mozilla/addons-server/issues/13695
generation_time = int(time.time() * 1000)
bloomfilter = generate_mlbf(stats)
with tempfile.NamedTemporaryFile() as filter_file:
mlbf_path = os.path.join(
settings.MLBF_STORAGE_PATH, f'{generation_time}.filter')
with storage.open(mlbf_path, 'wb') as filter_file:
bloomfilter.tofile(filter_file)
filter_file.seek(0)
data = {
'key_format': MLBF_KEY_FORMAT,
'generation_time': generation_time,
}
attachment = ('filter.bin', filter_file, 'application/octet-stream')
server.publish_attachment(data, attachment)
server.complete_session()
set_config(MLBF_TIME_CONFIG_KEY, generation_time, json_value=True)
tasks.upload_mlbf_to_kinto.delay(generation_time)
log.info(json.dumps(stats))

Просмотреть файл

@ -41,7 +41,7 @@ class Command(BaseCommand):
return [tuple(record) for record in data]
def save_blocklist(self, stats, mlbf, id_):
out_file = os.path.join(settings.TMP_PATH, 'mlbf', id_, 'filter')
out_file = os.path.join(settings.MLBF_STORAGE_PATH, f'{id_}.filter')
os.makedirs(os.path.dirname(out_file), exist_ok=True)
with default_storage.open(out_file, 'wb') as mlbf_file:

Просмотреть файл

@ -1,8 +1,10 @@
import os
import re
import time
from datetime import datetime
from django.conf import settings
from django.core.files.storage import default_storage as storage
from django.db import transaction
import olympia.core.logger
@ -11,10 +13,15 @@ from olympia.addons.models import Addon
from olympia.amo.celery import task
from olympia.amo.decorators import use_primary_db
from olympia.files.models import File
from olympia.lib.kinto import KintoServer
from olympia.users.utils import get_task_user
from olympia.zadmin.models import set_config
from .mlbf import MLBF_KEY_FORMAT
from .models import Block, BlocklistSubmission, KintoImport
from .utils import block_activity_log_save, split_regex_to_list
from .utils import (
block_activity_log_save, KINTO_BUCKET, KINTO_COLLECTION_MLBF,
split_regex_to_list)
log = olympia.core.logger.getLogger('z.amo.blocklist')
@ -22,6 +29,8 @@ log = olympia.core.logger.getLogger('z.amo.blocklist')
bracket_open_regex = re.compile(r'(?<!\\){')
bracket_close_regex = re.compile(r'(?<!\\)}')
MLBF_TIME_CONFIG_KEY = 'blocklist_mlbf_generation_time'
@task
@use_primary_db
@ -130,3 +139,20 @@ def import_block_from_blocklist(record):
kinto_import.outcome = KintoImport.OUTCOME_NOMATCH
log.debug('Kinto %s: No addon found', kinto_id)
kinto_import.save()
@task
def upload_mlbf_to_kinto(generation_time):
server = KintoServer(
KINTO_BUCKET, KINTO_COLLECTION_MLBF, kinto_sign_off_needed=False)
data = {
'key_format': MLBF_KEY_FORMAT,
'generation_time': generation_time,
}
mlbf_path = os.path.join(
settings.MLBF_STORAGE_PATH, f'{generation_time}.filter')
with storage.open(mlbf_path) as filter_file:
attachment = ('filter.bin', filter_file, 'application/octet-stream')
server.publish_attachment(data, attachment)
server.complete_session()
set_config(MLBF_TIME_CONFIG_KEY, generation_time, json_value=True)

Просмотреть файл

@ -276,5 +276,5 @@ class TestExportBlocklist(TestCase):
updated_by=user_factory())
call_command('export_blocklist', '1')
out_path = os.path.join(settings.TMP_PATH, 'mlbf', '1')
assert os.path.exists(os.path.join(out_path, 'filter'))
out_path = os.path.join(settings.MLBF_STORAGE_PATH, '1.filter')
assert os.path.exists(out_path)

Просмотреть файл

@ -1,13 +1,17 @@
import datetime
import os
from unittest import mock
from django.conf import settings
from freezegun import freeze_time
from waffle.testutils import override_switch
from olympia.amo.tests import addon_factory, TestCase, user_factory
from olympia.blocklist.cron import MLBF_TIME_CONFIG_KEY, upload_mlbf_to_kinto
from olympia.blocklist.cron import upload_mlbf_to_kinto
from olympia.blocklist.mlbf import MLBF_KEY_FORMAT
from olympia.blocklist.models import Block
from olympia.blocklist.tasks import MLBF_TIME_CONFIG_KEY
from olympia.lib.kinto import KintoServer
from olympia.zadmin.models import get_config, set_config
@ -26,14 +30,20 @@ class TestUploadToKinto(TestCase):
def test_upload_mlbf_to_kinto(self, publish_mock):
upload_mlbf_to_kinto()
generation_time = int(
datetime.datetime(2020, 1, 1, 12, 34, 56).timestamp() * 1000)
publish_mock.assert_called_with(
{'key_format': MLBF_KEY_FORMAT,
'generation_time':
datetime.datetime(2020, 1, 1, 12, 34, 56).timestamp() * 1000},
'generation_time': generation_time},
('filter.bin', mock.ANY, 'application/octet-stream'))
assert (
get_config(MLBF_TIME_CONFIG_KEY, json_value=True) ==
int(datetime.datetime(2020, 1, 1, 12, 34, 56).timestamp() * 1000))
generation_time)
mlfb_path = os.path.join(
settings.MLBF_STORAGE_PATH, f'{generation_time}.filter')
assert os.path.exists(mlfb_path)
assert os.path.getsize(mlfb_path)
@override_switch('blocklist_mlbf_submit', active=False)
@mock.patch.object(KintoServer, 'publish_attachment')

Просмотреть файл

@ -1104,6 +1104,9 @@ CELERY_TASK_ROUTES = {
'olympia.blocklist.tasks.import_block_from_blocklist': {
'queue': 'priority'
},
'olympia.blocklist.tasks.upload_mlbf_to_kinto': {
'queue': 'priority'
},
'olympia.versions.tasks.generate_static_theme_preview': {
'queue': 'priority'
},
@ -1685,6 +1688,7 @@ STORAGE_ROOT = env('NETAPP_STORAGE_ROOT', default=path('storage'))
ADDONS_PATH = os.path.join(STORAGE_ROOT, 'files')
GUARDED_ADDONS_PATH = os.path.join(STORAGE_ROOT, 'guarded-addons')
GIT_FILE_STORAGE_PATH = os.path.join(STORAGE_ROOT, 'git-storage')
MLBF_STORAGE_PATH = os.path.join(STORAGE_ROOT, 'mlbf')
SHARED_STORAGE = os.path.join(STORAGE_ROOT, 'shared_storage')