- Add TagsTranslationsTool
- Add TagsStatsTool
- Add base classes for TagsTool utils
- Add tests for tags data utils
- Adjust/add fixtures for tags_tool tests
- Add index_together on Translation date, locale
This commit is contained in:
Ryan Northey 2018-02-08 06:45:43 +00:00
Родитель 3f6d58dacb
Коммит eb4fd72aa4
11 изменённых файлов: 778 добавлений и 9 удалений

Просмотреть файл

@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.10.8 on 2018-02-09 22:52
from __future__ import unicode_literals
from django.conf import settings
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('base', '0114_project_path_unique'),
]
operations = [
migrations.AlterIndexTogether(
name='translation',
index_together=set([('entity', 'locale', 'approved'), ('entity', 'user'), ('entity', 'locale', 'fuzzy'), ('date', 'locale')]),
),
]

Просмотреть файл

@ -2383,7 +2383,8 @@ class Translation(DirtyFieldsMixin, models.Model):
('entity', 'user'),
('entity', 'locale', 'approved'),
('entity', 'locale', 'fuzzy'),
('locale', 'user', 'entity'))
('locale', 'user', 'entity'),
('date', 'locale'))
@classmethod
def for_locale_project_paths(self, locale, project, paths):

Просмотреть файл

@ -0,0 +1,8 @@
from .stats import TagsStatsTool
from .translations import TagsLatestTranslationsTool
__all__ = (
'TagsStatsTool',
'TagsLatestTranslationsTool')

203
pontoon/tags/utils/base.py Normal file
Просмотреть файл

@ -0,0 +1,203 @@
from collections import OrderedDict
from django.db.models import Q
from django.utils.functional import cached_property
from pontoon.base.models import (
Locale, Project, Resource, TranslatedResource, Translation)
from pontoon.base.utils import glob_to_regex
from pontoon.tags.models import Tag
class Clonable(object):
"""Instantiated descendants of this class can be called to create a cloned
version of the object.
The clone will be called with attributes listed in `self.clone_kwargs` as
kwargs. These can be overridden when creating the clone.
"""
clone_kwargs = ()
def __init__(self, **kwargs):
for k in self.clone_kwargs:
setattr(self, k, kwargs.get(k))
def __call__(self, **kwargs):
clone_kwargs = dict(
(k, getattr(self, k))
for k
in self.clone_kwargs)
clone_kwargs.update(kwargs)
return self.__class__(**clone_kwargs)
class FilteredDataTool(Clonable):
"""Base Tool for constructing and coalescing aggregating querysets
Descendants of this class will filter a queryset by mapping
self.filter_methods to methods on the class
The data is aggregated and then cached/coalesced to the
data property
It can be cloned to override filtering params
"""
default_groupby = ()
_default_annotations = ()
def __iter__(self):
return iter(self.data)
def __len__(self):
return len(self.data)
@property
def data_manager(self):
"""Entry table through which the query is constructed"""
raise NotImplementedError()
@property
def default_annotations(self):
return OrderedDict(self._default_annotations)
@property
def filtered_data(self):
"""Queryset after applying filter methods"""
data = self.data_manager.all()
for tag_filter in self.filters:
data = tag_filter(data)
return data
@property
def filters(self):
return [
getattr(self, 'filter_%s' % f)
for f
in self.filter_methods]
@cached_property
def data(self):
"""Cached and coalesed copy from get_data result"""
return self.coalesce(self.get_data())
def coalesce(self, data):
"""Coalesce the queryset to python data"""
return data
def get_annotations(self):
"""Fields to aggregate"""
anno = self.default_annotations.copy()
anno.update(self.annotations or {})
return anno
def get_data(self):
"""Get the aggregated queryset"""
return self.filtered_data.values(
*self.get_groupby()).annotate(
**self.get_annotations())
def get_groupby(self):
"""Get groupby fields"""
return self.groupby and [self.groupby] or self.default_groupby
class TagsDataTool(FilteredDataTool):
"""Base Data Tool for retrieving Tag data
This class has the various Pontoon object managers as properties, which
allows the managers to be overridden (theoretically) in a descendant class
"""
_default_annotations = ()
default_groupby = ('resource__tag', )
filter_methods = (
'tag', 'locales', 'projects')
clone_kwargs = (
'locales',
'projects',
'priority',
'slug',
'path')
@property
def locale_manager(self):
return Locale.objects
@property
def project_manager(self):
return Project.objects
@property
def resource_manager(self):
return Resource.objects
@property
def tag_manager(self):
return Tag.objects
@property
def translation_manager(self):
return Translation.objects
@property
def tr_manager(self):
return TranslatedResource.objects
class TagsTRTool(TagsDataTool):
"""Data Tool from the perspective of TranslatedResources
"""
clone_kwargs = TagsDataTool.clone_kwargs + ('annotations', 'groupby')
@property
def data_manager(self):
return self.tr_manager
def filter_locales(self, trs):
return (
trs.filter(locale__in=self.locales)
if self.locales
else trs)
def filter_path(self, trs):
return (
trs.filter(
resource__path__regex=glob_to_regex(self.path)).distinct()
if self.path
else trs)
def filter_projects(self, trs):
return (
trs.filter(resource__project__in=self.projects)
if self.projects
else trs)
def filter_tag(self, trs):
"""Filters on tag.slug and tag.priority
"""
q = Q()
if not self.slug:
# if slug is not specified, then just remove all resources
# that have no tag
q &= ~Q(resource__tag__isnull=True)
if self.slug:
q &= Q(resource__tag__slug__regex=glob_to_regex(self.slug))
if self.priority is not None:
if self.priority is False:
# if priority is False, exclude tags with priority
q &= Q(resource__tag__priority__isnull=True)
elif self.priority is True:
# if priority is True show only tags with priority
q &= Q(resource__tag__priority__isnull=False)
elif isinstance(self.priority, int):
# if priority is an int, filter on that priority
q &= Q(resource__tag__priority=self.priority)
return trs.filter(q)

Просмотреть файл

@ -0,0 +1,68 @@
from django.db.models import F, Sum, Value
from django.db.models.functions import Coalesce
from .base import TagsTRTool
class TagsStatsTool(TagsTRTool):
"""Creates aggregated stat data for tags according to
filters
"""
coalesce = list
filter_methods = (
'tag', 'projects', 'locales', 'path')
# from the perspective of translated resources
_default_annotations = (
('total_strings', Coalesce(
Sum('resource__total_strings'),
Value(0))),
('fuzzy_strings', Coalesce(
Sum('fuzzy_strings'),
Value(0))),
('approved_strings', Coalesce(
Sum('approved_strings'),
Value(0))),
('translated_strings', Coalesce(
Sum('translated_strings'),
Value(0))))
def get_data(self):
"""Stats can be generated either grouping by tag or by locale
Once the tags/locales are found a second query is made to get
their data
"""
if self.get_groupby()[0] == 'resource__tag':
stats = {
stat['resource__tag']: stat
for stat
in super(TagsStatsTool, self).get_data()}
# get the found tags as values
tags = self.tag_manager.filter(pk__in=stats.keys())
tags = tags.values('pk', 'slug', 'name', 'priority', 'project')
tags = tags.annotate(resource__tag=F('pk'))
for tag in tags:
# update the stats with tag data
tag.update(stats[tag['pk']])
return tags
elif self.get_groupby()[0] == 'locale':
result = list(super(TagsStatsTool, self).get_data())
# get the found locales as values
locales = {
loc['pk']: loc
for loc
in self.locale_manager.filter(
pk__in=(
r['locale']
for r
in result)).values('pk', 'name', 'code')}
for r in result:
# update the stats with locale data
r.update(locales[r['locale']])
return sorted(result, key=lambda r: r['name'])

Просмотреть файл

@ -0,0 +1,51 @@
from django.db.models import Max, Q
from .base import TagsTRTool
class TagsLatestTranslationsTool(TagsTRTool):
"""For given filters this tool will find the latest ``Translations``
for a ``Tag``. It uses TranslatedResources to find the translations
but returns translations.
"""
filter_methods = ('tag', 'projects', 'latest', 'locales', 'path')
_default_annotations = (
('last_change', Max('latest_translation__date')), )
@property
def groupby_prefix(self):
# as we find latest_translations for translated_resources
# and use that to retrieve the translations, we need to map the groupby
# field here
groupby = list(self.get_groupby())
if groupby == ['resource__tag']:
return "entity__resource__tag"
elif groupby == ['locale']:
return "locale"
def coalesce(self, data):
return {
translation[self.groupby_prefix]: translation
for translation
in data.iterator()}
def get_data(self):
_translations = self.translation_manager.none()
stats = super(TagsLatestTranslationsTool, self).get_data()
for tr in stats.iterator():
# find translations with matching date and tag/locale
_translations |= self.translation_manager.filter(
Q(**{'date': tr["last_change"],
self.groupby_prefix: tr[self.get_groupby()[0]]}))
return _translations.values(
*('string',
'date',
'approved_date',
'user__first_name',
'user__email')
+ (self.groupby_prefix, ))
def filter_latest(self, qs):
return qs.exclude(latest_translation__isnull=True)

49
tests/fixtures/tags.py поставляемый
Просмотреть файл

@ -155,13 +155,17 @@ def _calculate_tags(**kwargs):
attrs = ['total_strings'] + attrs
# iterate through associated tags for all matching translated resources
for tr, (_pk, _slug, _name) in translated_resource_tags:
if _slug not in totals:
if kwargs.get('groupby'):
key = tr[kwargs['groupby']]
else:
key = _slug
if key not in totals:
# create a totals[tag] with zeros for this tag
totals[_slug] = dict((attr, 0) for attr in attrs)
totals[_slug].update(dict(name=_name, pk=_pk, last_change=None))
totals[key] = dict((attr, 0) for attr in attrs)
totals[key].update(dict(name=_name, pk=_pk, last_change=None))
for attr in attrs:
# add the total for this translated resource to the tags total
totals[_slug][attr] += tr[attr]
totals[key][attr] += tr[attr]
return totals
@ -194,13 +198,17 @@ def _calculate_tags_latest(**kwargs):
translation_tags = _tag_iterator(qs, **kwargs)
# iterate through associated tags for all matching translations
for translation, (tag, __, __) in translation_tags:
if kwargs.get('groupby'):
key = translation[kwargs['groupby']]
else:
key = tag
# get the current latest for this tag
_pk, _date = latest_dates.get(
tag, (None, timezone.make_aware(datetime.min)))
key, (None, timezone.make_aware(datetime.min)))
if translation['date'] > _date:
# set this translation if its newer than the current latest
# for this tag
latest_dates[tag] = (translation['pk'], translation['date'])
latest_dates[key] = (translation['pk'], translation['date'])
return latest_dates
@ -320,7 +328,7 @@ def tag_test_kwargs(request, tag_matrix):
return request.param, kwargs
_tag_init_kwargs = OrderedDict(
_tag_data_init_kwargs = OrderedDict(
(('no_args',
dict(annotations=None,
groupby=None,
@ -339,6 +347,33 @@ _tag_init_kwargs = OrderedDict(
slug=7))))
@pytest.fixture(params=_tag_data_init_kwargs)
def tag_data_init_kwargs(request):
"""This is a parametrized fixture that provides 2 sets
of possible **kwargs to instantiate the TagsDataTools with
The first set of kwargs, are all set to `None` and the
second contain numeric values for testing against
"""
return _tag_data_init_kwargs.get(request.param).copy()
_tag_init_kwargs = OrderedDict(
(('no_args',
dict(locales=None,
path=None,
priority=None,
projects=None,
slug=None)),
('args',
dict(locales=1,
path=2,
priority=3,
projects=4,
slug=5))))
@pytest.fixture(params=_tag_init_kwargs)
def tag_init_kwargs(request):
"""This is a parametrized fixture that provides 2 sets

56
tests/tags/utils/base.py Normal file
Просмотреть файл

@ -0,0 +1,56 @@
import pytest
from pontoon.base.models import (
Locale, Project, Resource, TranslatedResource, Translation)
from pontoon.tags.models import Tag
from pontoon.tags.utils.base import Clonable, TagsDataTool
def test_util_clonable():
# tests that Clonable clones
class MockClonable(Clonable):
clone_kwargs = ('foo', 'bar')
clonable = MockClonable()
assert clonable.foo is None
assert clonable.bar is None
assert clonable().foo is None
assert clonable().bar is None
assert clonable(foo=7).foo == 7
assert clonable(bar=23).bar == 23
clonable = MockClonable(foo=7)
assert clonable.foo == 7
assert clonable.bar is None
assert clonable().foo == 7
assert clonable().bar is None
assert clonable(foo=113).foo == 113
assert clonable(foo=113).bar is None
assert clonable(bar=23).bar == 23
def test_util_tags_data_tool_managers():
# tests that the data tool has expected managers
tool = TagsDataTool()
assert tool.tag_manager == Tag.objects
assert tool.locale_manager == Locale.objects
assert tool.project_manager == Project.objects
assert tool.resource_manager == Resource.objects
assert tool.tr_manager == TranslatedResource.objects
assert tool.translation_manager == Translation.objects
def test_util_tags_data_tool_instance():
# tests that base tool does not implement a data_manager
# and that the default coalesce is to return the data
tool = TagsDataTool()
with pytest.raises(NotImplementedError):
tool.data_manager
assert tool.coalesce('X') == 'X'

184
tests/tags/utils/stats.py Normal file
Просмотреть файл

@ -0,0 +1,184 @@
import fnmatch
import pytest
from mock import MagicMock, patch, PropertyMock
from django.db.models import QuerySet
from pontoon.base.models import TranslatedResource
from pontoon.tags.utils import TagsStatsTool
def test_util_tags_stats_tool(tag_data_init_kwargs):
# tests instantiation of stats tool
kwargs = tag_data_init_kwargs
stats_tool = TagsStatsTool(**kwargs)
for k, v in kwargs.items():
assert getattr(stats_tool, k) == v
assert stats_tool.tr_manager == TranslatedResource.objects
def test_util_tags_stats_tool_annotations():
# tests annotations can be overridden
stats_tool = TagsStatsTool()
assert stats_tool.get_annotations() == stats_tool.default_annotations
anno = dict(foo="foo0", bar="bar0")
stats_tool = TagsStatsTool(annotations=anno)
assert stats_tool.get_annotations() != stats_tool.default_annotations
assert stats_tool.get_annotations() != anno
anno.update(stats_tool.default_annotations)
assert stats_tool.get_annotations() == anno
@patch('pontoon.tags.utils.TagsStatsTool.get_data')
def test_util_tags_stats_tool_data(data_mock):
# tests coalescing and caching of data
stats_tool = TagsStatsTool()
data_mock.return_value = (1, 2, 3)
result = stats_tool.data
assert result == [1, 2, 3]
assert data_mock.called
data_mock.reset_mock()
data_mock.return_value = (4, 5, 6)
result = stats_tool.data
assert not data_mock.called
assert result == [1, 2, 3]
del stats_tool.__dict__["data"]
result = stats_tool.data
assert data_mock.called
assert result == [4, 5, 6]
@patch(
'pontoon.tags.utils.TagsStatsTool.data',
new_callable=PropertyMock)
def test_util_tags_stats_tool_len(data_pmock):
# tests len(stats) is taken from data
stats_tool = TagsStatsTool()
data_pmock.return_value = [7, 23]
result = len(stats_tool)
assert data_pmock.called
assert result == 2
@patch(
'pontoon.tags.utils.TagsStatsTool.data',
new_callable=PropertyMock)
def test_util_tags_stats_tool_iter(data_pmock):
# tests iter(stats) iterates the data
stats_tool = TagsStatsTool()
data_pmock.return_value = [7, 23]
result = list(stats_tool)
assert data_pmock.called
assert result == [7, 23]
def test_util_tags_stats_tool_filters():
# tests stats tool has expected filters
stats_tool = TagsStatsTool()
assert (
stats_tool.filters
== [getattr(stats_tool, "filter_%s" % f)
for f
in stats_tool.filter_methods])
@patch(
'pontoon.tags.utils.TagsStatsTool.tr_manager',
new_callable=PropertyMock)
@patch('pontoon.tags.utils.TagsStatsTool.filter_tag')
@patch('pontoon.tags.utils.TagsStatsTool.filter_projects')
@patch('pontoon.tags.utils.TagsStatsTool.filter_locales')
@patch('pontoon.tags.utils.TagsStatsTool.filter_path')
def test_util_tags_stats_tool_fitered_data(m_path, m_locales,
m_proj, m_tag, trs_mock):
# tests all filter functions are called when filtering data
# and that they are called with the result of previous
stats_tool = TagsStatsTool()
m = m_tag, m_proj, m_locales, m_path
# mock trs for translated_resources.all()
_m = MagicMock()
_m.all.return_value = 0
trs_mock.return_value = _m
for i, _m in enumerate(m):
if i >= len(m) - 1:
_m.return_value = 23
else:
_m.return_value = i
# get the filtered_data
result = stats_tool.filtered_data
assert result == 23
for i, _m in enumerate(m):
assert _m.called
if i > 0:
assert _m.call_args[0][0] == i - 1
@pytest.mark.django_db
def test_util_tags_stats_tool_get_data_empty(calculate_tags, assert_tags):
# tests stats tool and test calculation doesnt break if there is no data
stats_tool = TagsStatsTool()
data = stats_tool.get_data()
assert isinstance(data, QuerySet)
assert list(data) == []
assert_tags(
calculate_tags(),
data)
@pytest.mark.django_db
def test_util_tags_stats_tool_get_data_matrix(tag_matrix, calculate_tags,
assert_tags, tag_test_kwargs):
# for different parametrized kwargs, tests that the calculated stat data
# matches expectations from long-hand calculation
name, kwargs = tag_test_kwargs
stats_tool = TagsStatsTool(**kwargs)
data = stats_tool.get_data()
assert isinstance(data, QuerySet)
_tags = calculate_tags(**kwargs)
assert_tags(_tags, data)
if "exact" in name:
assert len(data) == 1
if "glob" in name:
assert len(data) > 1
assert len(data) < len(tag_matrix['tags'])
if "no_match" in name:
assert len(data) == 0
elif "match" in name:
assert len(data) > 0
if kwargs.get("slug"):
for result in data:
assert fnmatch.fnmatch(result['slug'], kwargs["slug"])
@pytest.mark.django_db
def test_util_tags_stats_tool_groupby_locale(tag_matrix,
calculate_tags,
assert_tags, tag_test_kwargs):
name, kwargs = tag_test_kwargs
# this is only used with slug set to a unique slug, and doesnt work
# correctly without
if name in ['slug_glob', 'party_glob'] or not kwargs.get('slug'):
kwargs['slug'] = tag_matrix['tags'][0].slug
stats_tool = TagsStatsTool(
groupby="locale",
**kwargs)
data = stats_tool.get_data()
# assert isinstance(data, QuerySet)
exp = calculate_tags(groupby='locale', **kwargs)
data = stats_tool.coalesce(data)
assert len(data) == len(exp)
for locale in data:
locale_exp = exp[locale['locale']]
assert locale_exp['total_strings'] == locale['total_strings']
assert locale_exp['fuzzy_strings'] == locale['fuzzy_strings']
assert locale_exp['approved_strings'] == locale['approved_strings']

Просмотреть файл

@ -0,0 +1,142 @@
import pytest
from mock import MagicMock, patch
from pontoon.base.models import Translation
from pontoon.tags.utils import TagsLatestTranslationsTool
def test_util_tags_stats_tool(tag_data_init_kwargs):
# tests instantiation of translations tool
kwargs = tag_data_init_kwargs
tr_tool = TagsLatestTranslationsTool(**kwargs)
for k, v in kwargs.items():
assert getattr(tr_tool, k) == v
@pytest.mark.django_db
def test_util_tags_translation_tool_get_data(tag_matrix,
calculate_tags_latest,
tag_test_kwargs):
# for different parametrized kwargs, tests that the calculated
# latest data matches expectations from long-hand calculation
name, kwargs = tag_test_kwargs
# calculate expectations
exp = calculate_tags_latest(**kwargs)
# get the data, and coalesce to translations dictionary
tr_tool = TagsLatestTranslationsTool(**kwargs)
data = tr_tool.coalesce(tr_tool.get_data())
# get a pk dictionary of all translations
translations = Translation.objects.select_related('user').in_bulk()
assert len(data) == len(exp)
for k, (pk, date) in exp.items():
assert data[k]['date'] == date
assert data[k]['string'] == translations.get(pk).string
if "exact" in name:
assert len(data) == 1
if "glob" in name:
assert len(data) > 1
assert len(data) < len(tag_matrix['tags'])
if "no_match" in name:
assert len(data) == 0
elif "match" in name:
assert len(data) > 0
@patch('pontoon.tags.utils.TagsLatestTranslationsTool.get_data')
def test_util_tags_translation_tool_data(data_mock):
# ensures latest translation data is coalesced and cached
# correctly
tr_tool = TagsLatestTranslationsTool()
# set up mock return for get_data that can be used like
# qs.iterator()
data_m = [
dict(entity__resource__tag='foo'),
dict(entity__resource__tag='bar')]
data_m2 = [dict(entity__resource__tag='baz')]
iterator_m = MagicMock()
iterator_m.iterator.return_value = data_m
data_mock.return_value = iterator_m
# get data from the tool
result = tr_tool.data
# we got back data from data_m coalesced to a dictionary
# with the groupby fields as keys
assert result == dict(foo=data_m[0], bar=data_m[1])
assert iterator_m.iterator.called
# lets reset the mock and change the return value
iterator_m.reset_mock()
iterator_m.iterator.return_value = data_m2
# and get the data again
result = tr_tool.data
# which was cached, so nothing changed
assert not iterator_m.iterator.called
assert result == dict(foo=data_m[0], bar=data_m[1])
# after deleting the cache...
del tr_tool.__dict__["data"]
# ...we get the new value
result = tr_tool.data
assert iterator_m.iterator.called
assert result == dict(baz=data_m2[0])
@pytest.mark.django_db
def test_util_tags_translation_tool_groupby(tag_matrix,
tag_test_kwargs,
calculate_tags_latest,
user0, user1):
name, kwargs = tag_test_kwargs
# hmm, translations have no users
# - set first 3rd to user0, and second 3rd to user1
total = Translation.objects.count()
first_third_users = Translation.objects.all()[
: total / 3].values_list('pk')
second_third_users = Translation.objects.all()[
total / 3: 2 * total / 3].values_list('pk')
Translation.objects.filter(pk__in=first_third_users).update(user=user0)
Translation.objects.filter(pk__in=second_third_users).update(user=user1)
# calculate expectations grouped by locale
exp = calculate_tags_latest(groupby='locale', **kwargs)
# calculate data from tool grouped by locale
tr_tool = TagsLatestTranslationsTool(
groupby='locale',
**kwargs)
data = tr_tool.coalesce(tr_tool.get_data())
# get a pk dictionary of all translations
translations = Translation.objects.select_related('user').in_bulk()
assert len(data) == len(exp)
for k, (pk, date) in exp.items():
# check all of the expected values are correct for the
# translation and user
translation = translations.get(pk)
assert data[k]['date'] == date
assert data[k]['string'] == translation.string
assert (
data[k]['approved_date']
== translation.approved_date)
user = translation.user
if user:
assert data[k]['user__email'] == user.email
assert data[k]['user__first_name'] == user.first_name
else:
assert data[k]['user__email'] is None
assert data[k]['user__first_name'] is None

Просмотреть файл

@ -55,7 +55,7 @@ def sorted_project_locale0_translators(
}
for u in user_factory(batch=3)
]
sorted_translators = sorted(
translators,
key=lambda u: u['email']