Touch #1416256: String tiers utils 1 #828

- Add TagsTranslationsTool - Add TagsStatsTool - Add base classes for TagsTool utils - Add tests for tags data utils - Adjust/add fixtures for tags_tool tests - Add index_together on Translation date, locale
2018-02-08 06:45:43 +00:00 · 2018-02-08 06:45:43 +00:00 · eb4fd72aa4
--- a/pontoon/base/migrations/0115_idx_translation_date_locale.py
+++ b/pontoon/base/migrations/0115_idx_translation_date_locale.py
@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.10.8 on 2018-02-09 22:52
+from __future__ import unicode_literals
+
+from django.conf import settings
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+        ('base', '0114_project_path_unique'),
+    ]
+
+    operations = [
+        migrations.AlterIndexTogether(
+            name='translation',
+            index_together=set([('entity', 'locale', 'approved'), ('entity', 'user'), ('entity', 'locale', 'fuzzy'), ('date', 'locale')]),
+        ),
+    ]
--- a/pontoon/base/models.py
+++ b/pontoon/base/models.py
@ -2383,7 +2383,8 @@ class Translation(DirtyFieldsMixin, models.Model):
            ('entity', 'user'),
            ('entity', 'locale', 'approved'),
            ('entity', 'locale', 'fuzzy'),
-            ('locale', 'user', 'entity'))
+            ('locale', 'user', 'entity'),
+            ('date', 'locale'))

    @classmethod
    def for_locale_project_paths(self, locale, project, paths):
--- a/pontoon/tags/utils/init.py
+++ b/pontoon/tags/utils/init.py
@ -0,0 +1,8 @@
+
+from .stats import TagsStatsTool
+from .translations import TagsLatestTranslationsTool
+
+
+__all__ = (
+    'TagsStatsTool',
+    'TagsLatestTranslationsTool')
--- a/pontoon/tags/utils/base.py
+++ b/pontoon/tags/utils/base.py
@ -0,0 +1,203 @@
+
+from collections import OrderedDict
+
+from django.db.models import Q
+from django.utils.functional import cached_property
+
+from pontoon.base.models import (
+    Locale, Project, Resource, TranslatedResource, Translation)
+from pontoon.base.utils import glob_to_regex
+from pontoon.tags.models import Tag
+
+
+class Clonable(object):
+    """Instantiated descendants of this class can be called to create a cloned
+    version of the object.
+
+    The clone will be called with attributes listed in `self.clone_kwargs` as
+    kwargs. These can be overridden when creating the clone.
+
+    """
+
+    clone_kwargs = ()
+
+    def __init__(self, **kwargs):
+        for k in self.clone_kwargs:
+            setattr(self, k, kwargs.get(k))
+
+    def __call__(self, **kwargs):
+        clone_kwargs = dict(
+            (k, getattr(self, k))
+            for k
+            in self.clone_kwargs)
+        clone_kwargs.update(kwargs)
+        return self.__class__(**clone_kwargs)
+
+
+class FilteredDataTool(Clonable):
+    """Base Tool for constructing and coalescing aggregating querysets
+
+    Descendants of this class will filter a queryset by mapping
+    self.filter_methods to methods on the class
+
+    The data is aggregated and then cached/coalesced to the
+    data property
+
+    It can be cloned to override filtering params
+    """
+
+    default_groupby = ()
+    _default_annotations = ()
+
+    def __iter__(self):
+        return iter(self.data)
+
+    def __len__(self):
+        return len(self.data)
+
+    @property
+    def data_manager(self):
+        """Entry table through which the query is constructed"""
+        raise NotImplementedError()
+
+    @property
+    def default_annotations(self):
+        return OrderedDict(self._default_annotations)
+
+    @property
+    def filtered_data(self):
+        """Queryset after applying filter methods"""
+        data = self.data_manager.all()
+        for tag_filter in self.filters:
+            data = tag_filter(data)
+        return data
+
+    @property
+    def filters(self):
+        return [
+            getattr(self, 'filter_%s' % f)
+            for f
+            in self.filter_methods]
+
+    @cached_property
+    def data(self):
+        """Cached and coalesed copy from get_data result"""
+        return self.coalesce(self.get_data())
+
+    def coalesce(self, data):
+        """Coalesce the queryset to python data"""
+        return data
+
+    def get_annotations(self):
+        """Fields to aggregate"""
+        anno = self.default_annotations.copy()
+        anno.update(self.annotations or {})
+        return anno
+
+    def get_data(self):
+        """Get the aggregated queryset"""
+        return self.filtered_data.values(
+            *self.get_groupby()).annotate(
+                **self.get_annotations())
+
+    def get_groupby(self):
+        """Get groupby fields"""
+        return self.groupby and [self.groupby] or self.default_groupby
+
+
+class TagsDataTool(FilteredDataTool):
+    """Base Data Tool for retrieving Tag data
+
+    This class has the various Pontoon object managers as properties, which
+    allows the managers to be overridden (theoretically) in a descendant class
+    """
+
+    _default_annotations = ()
+    default_groupby = ('resource__tag', )
+    filter_methods = (
+        'tag', 'locales', 'projects')
+    clone_kwargs = (
+        'locales',
+        'projects',
+        'priority',
+        'slug',
+        'path')
+
+    @property
+    def locale_manager(self):
+        return Locale.objects
+
+    @property
+    def project_manager(self):
+        return Project.objects
+
+    @property
+    def resource_manager(self):
+        return Resource.objects
+
+    @property
+    def tag_manager(self):
+        return Tag.objects
+
+    @property
+    def translation_manager(self):
+        return Translation.objects
+
+    @property
+    def tr_manager(self):
+        return TranslatedResource.objects
+
+
+class TagsTRTool(TagsDataTool):
+    """Data Tool from the perspective of TranslatedResources
+    """
+
+    clone_kwargs = TagsDataTool.clone_kwargs + ('annotations', 'groupby')
+
+    @property
+    def data_manager(self):
+        return self.tr_manager
+
+    def filter_locales(self, trs):
+        return (
+            trs.filter(locale__in=self.locales)
+            if self.locales
+            else trs)
+
+    def filter_path(self, trs):
+        return (
+            trs.filter(
+                resource__path__regex=glob_to_regex(self.path)).distinct()
+            if self.path
+            else trs)
+
+    def filter_projects(self, trs):
+        return (
+            trs.filter(resource__project__in=self.projects)
+            if self.projects
+            else trs)
+
+    def filter_tag(self, trs):
+        """Filters on tag.slug and tag.priority
+        """
+
+        q = Q()
+        if not self.slug:
+            # if slug is not specified, then just remove all resources
+            # that have no tag
+            q &= ~Q(resource__tag__isnull=True)
+
+        if self.slug:
+            q &= Q(resource__tag__slug__regex=glob_to_regex(self.slug))
+
+        if self.priority is not None:
+            if self.priority is False:
+                # if priority is False, exclude tags with priority
+                q &= Q(resource__tag__priority__isnull=True)
+            elif self.priority is True:
+                # if priority is True show only tags with priority
+                q &= Q(resource__tag__priority__isnull=False)
+            elif isinstance(self.priority, int):
+                # if priority is an int, filter on that priority
+                q &= Q(resource__tag__priority=self.priority)
+        return trs.filter(q)
--- a/pontoon/tags/utils/stats.py
+++ b/pontoon/tags/utils/stats.py
@ -0,0 +1,68 @@
+
+from django.db.models import F, Sum, Value
+from django.db.models.functions import Coalesce
+
+from .base import TagsTRTool
+
+
+class TagsStatsTool(TagsTRTool):
+    """Creates aggregated stat data for tags according to
+    filters
+    """
+
+    coalesce = list
+
+    filter_methods = (
+        'tag', 'projects', 'locales', 'path')
+
+    # from the perspective of translated resources
+    _default_annotations = (
+        ('total_strings', Coalesce(
+            Sum('resource__total_strings'),
+            Value(0))),
+        ('fuzzy_strings', Coalesce(
+            Sum('fuzzy_strings'),
+            Value(0))),
+        ('approved_strings', Coalesce(
+            Sum('approved_strings'),
+            Value(0))),
+        ('translated_strings', Coalesce(
+            Sum('translated_strings'),
+            Value(0))))
+
+    def get_data(self):
+        """Stats can be generated either grouping by tag or by locale
+
+        Once the tags/locales are found a second query is made to get
+        their data
+
+        """
+        if self.get_groupby()[0] == 'resource__tag':
+            stats = {
+                stat['resource__tag']: stat
+                for stat
+                in super(TagsStatsTool, self).get_data()}
+
+            # get the found tags as values
+            tags = self.tag_manager.filter(pk__in=stats.keys())
+            tags = tags.values('pk', 'slug', 'name', 'priority', 'project')
+            tags = tags.annotate(resource__tag=F('pk'))
+            for tag in tags:
+                # update the stats with tag data
+                tag.update(stats[tag['pk']])
+            return tags
+        elif self.get_groupby()[0] == 'locale':
+            result = list(super(TagsStatsTool, self).get_data())
+            # get the found locales as values
+            locales = {
+                loc['pk']: loc
+                for loc
+                in self.locale_manager.filter(
+                    pk__in=(
+                        r['locale']
+                        for r
+                        in result)).values('pk', 'name', 'code')}
+            for r in result:
+                # update the stats with locale data
+                r.update(locales[r['locale']])
+            return sorted(result, key=lambda r: r['name'])
--- a/pontoon/tags/utils/translations.py
+++ b/pontoon/tags/utils/translations.py
@ -0,0 +1,51 @@
+
+from django.db.models import Max, Q
+
+from .base import TagsTRTool
+
+
+class TagsLatestTranslationsTool(TagsTRTool):
+    """For given filters this tool will find the latest ``Translations``
+    for a ``Tag``. It uses TranslatedResources to find the translations
+    but returns translations.
+    """
+    filter_methods = ('tag', 'projects', 'latest', 'locales', 'path')
+
+    _default_annotations = (
+        ('last_change', Max('latest_translation__date')), )
+
+    @property
+    def groupby_prefix(self):
+        # as we find latest_translations for translated_resources
+        # and use that to retrieve the translations, we need to map the groupby
+        # field here
+        groupby = list(self.get_groupby())
+        if groupby == ['resource__tag']:
+            return "entity__resource__tag"
+        elif groupby == ['locale']:
+            return "locale"
+
+    def coalesce(self, data):
+        return {
+            translation[self.groupby_prefix]: translation
+            for translation
+            in data.iterator()}
+
+    def get_data(self):
+        _translations = self.translation_manager.none()
+        stats = super(TagsLatestTranslationsTool, self).get_data()
+        for tr in stats.iterator():
+            # find translations with matching date and tag/locale
+            _translations |= self.translation_manager.filter(
+                Q(**{'date': tr["last_change"],
+                     self.groupby_prefix: tr[self.get_groupby()[0]]}))
+        return _translations.values(
+            *('string',
+              'date',
+              'approved_date',
+              'user__first_name',
+              'user__email')
+            + (self.groupby_prefix, ))
+
+    def filter_latest(self, qs):
+        return qs.exclude(latest_translation__isnull=True)
--- a/tests/fixtures/tags.py
+++ b/tests/fixtures/tags.py
@ -155,13 +155,17 @@ def _calculate_tags(**kwargs):
    attrs = ['total_strings'] + attrs
    # iterate through associated tags for all matching translated resources
    for tr, (_pk, _slug, _name) in translated_resource_tags:
-        if _slug not in totals:
+        if kwargs.get('groupby'):
+            key = tr[kwargs['groupby']]
+        else:
+            key = _slug
+        if key not in totals:
            # create a totals[tag] with zeros for this tag
-            totals[_slug] = dict((attr, 0) for attr in attrs)
-            totals[_slug].update(dict(name=_name, pk=_pk, last_change=None))
+            totals[key] = dict((attr, 0) for attr in attrs)
+            totals[key].update(dict(name=_name, pk=_pk, last_change=None))
        for attr in attrs:
            # add the total for this translated resource to the tags total
-            totals[_slug][attr] += tr[attr]
+            totals[key][attr] += tr[attr]
    return totals


@ -194,13 +198,17 @@ def _calculate_tags_latest(**kwargs):
    translation_tags = _tag_iterator(qs, **kwargs)
    # iterate through associated tags for all matching translations
    for translation, (tag, __, __) in translation_tags:
+        if kwargs.get('groupby'):
+            key = translation[kwargs['groupby']]
+        else:
+            key = tag
        # get the current latest for this tag
        _pk, _date = latest_dates.get(
-            tag, (None, timezone.make_aware(datetime.min)))
+            key, (None, timezone.make_aware(datetime.min)))
        if translation['date'] > _date:
            # set this translation if its newer than the current latest
            # for this tag
-            latest_dates[tag] = (translation['pk'], translation['date'])
+            latest_dates[key] = (translation['pk'], translation['date'])
    return latest_dates


@ -320,7 +328,7 @@ def tag_test_kwargs(request, tag_matrix):
    return request.param, kwargs


-_tag_init_kwargs = OrderedDict(
+_tag_data_init_kwargs = OrderedDict(
    (('no_args',
      dict(annotations=None,
           groupby=None,
@ -339,6 +347,33 @@ _tag_init_kwargs = OrderedDict(
           slug=7))))


+@pytest.fixture(params=_tag_data_init_kwargs)
+def tag_data_init_kwargs(request):
+    """This is a parametrized fixture that provides 2 sets
+    of possible **kwargs to instantiate the TagsDataTools with
+
+    The first set of kwargs, are all set to `None` and the
+    second contain numeric values for testing against
+    """
+
+    return _tag_data_init_kwargs.get(request.param).copy()
+
+
+_tag_init_kwargs = OrderedDict(
+    (('no_args',
+      dict(locales=None,
+           path=None,
+           priority=None,
+           projects=None,
+           slug=None)),
+     ('args',
+      dict(locales=1,
+           path=2,
+           priority=3,
+           projects=4,
+           slug=5))))
+
+
@pytest.fixture(params=_tag_init_kwargs)
 def tag_init_kwargs(request):
    """This is a parametrized fixture that provides 2 sets
--- a/tests/tags/utils/base.py
+++ b/tests/tags/utils/base.py
@ -0,0 +1,56 @@
+
+import pytest
+
+from pontoon.base.models import (
+    Locale, Project, Resource, TranslatedResource, Translation)
+
+from pontoon.tags.models import Tag
+from pontoon.tags.utils.base import Clonable, TagsDataTool
+
+
+def test_util_clonable():
+    # tests that Clonable clones
+
+    class MockClonable(Clonable):
+        clone_kwargs = ('foo', 'bar')
+
+    clonable = MockClonable()
+    assert clonable.foo is None
+    assert clonable.bar is None
+    assert clonable().foo is None
+    assert clonable().bar is None
+    assert clonable(foo=7).foo == 7
+    assert clonable(bar=23).bar == 23
+
+    clonable = MockClonable(foo=7)
+    assert clonable.foo == 7
+    assert clonable.bar is None
+    assert clonable().foo == 7
+    assert clonable().bar is None
+    assert clonable(foo=113).foo == 113
+    assert clonable(foo=113).bar is None
+    assert clonable(bar=23).bar == 23
+
+
+def test_util_tags_data_tool_managers():
+    # tests that the data tool has expected managers
+
+    tool = TagsDataTool()
+    assert tool.tag_manager == Tag.objects
+    assert tool.locale_manager == Locale.objects
+    assert tool.project_manager == Project.objects
+    assert tool.resource_manager == Resource.objects
+    assert tool.tr_manager == TranslatedResource.objects
+    assert tool.translation_manager == Translation.objects
+
+
+def test_util_tags_data_tool_instance():
+    # tests that base tool does not implement a data_manager
+    # and that the default coalesce is to return the data
+
+    tool = TagsDataTool()
+
+    with pytest.raises(NotImplementedError):
+        tool.data_manager
+
+    assert tool.coalesce('X') == 'X'
--- a/tests/tags/utils/stats.py
+++ b/tests/tags/utils/stats.py
@ -0,0 +1,184 @@
+
+import fnmatch
+
+import pytest
+
+from mock import MagicMock, patch, PropertyMock
+
+from django.db.models import QuerySet
+
+from pontoon.base.models import TranslatedResource
+from pontoon.tags.utils import TagsStatsTool
+
+
+def test_util_tags_stats_tool(tag_data_init_kwargs):
+    # tests instantiation of stats tool
+    kwargs = tag_data_init_kwargs
+    stats_tool = TagsStatsTool(**kwargs)
+    for k, v in kwargs.items():
+        assert getattr(stats_tool, k) == v
+    assert stats_tool.tr_manager == TranslatedResource.objects
+
+
+def test_util_tags_stats_tool_annotations():
+    # tests annotations can be overridden
+    stats_tool = TagsStatsTool()
+    assert stats_tool.get_annotations() == stats_tool.default_annotations
+
+    anno = dict(foo="foo0", bar="bar0")
+    stats_tool = TagsStatsTool(annotations=anno)
+    assert stats_tool.get_annotations() != stats_tool.default_annotations
+    assert stats_tool.get_annotations() != anno
+    anno.update(stats_tool.default_annotations)
+    assert stats_tool.get_annotations() == anno
+
+
+@patch('pontoon.tags.utils.TagsStatsTool.get_data')
+def test_util_tags_stats_tool_data(data_mock):
+    # tests coalescing and caching of data
+    stats_tool = TagsStatsTool()
+    data_mock.return_value = (1, 2, 3)
+    result = stats_tool.data
+    assert result == [1, 2, 3]
+    assert data_mock.called
+    data_mock.reset_mock()
+    data_mock.return_value = (4, 5, 6)
+    result = stats_tool.data
+    assert not data_mock.called
+    assert result == [1, 2, 3]
+    del stats_tool.__dict__["data"]
+    result = stats_tool.data
+    assert data_mock.called
+    assert result == [4, 5, 6]
+
+
+@patch(
+    'pontoon.tags.utils.TagsStatsTool.data',
+    new_callable=PropertyMock)
+def test_util_tags_stats_tool_len(data_pmock):
+    # tests len(stats) is taken from data
+    stats_tool = TagsStatsTool()
+    data_pmock.return_value = [7, 23]
+    result = len(stats_tool)
+    assert data_pmock.called
+    assert result == 2
+
+
+@patch(
+    'pontoon.tags.utils.TagsStatsTool.data',
+    new_callable=PropertyMock)
+def test_util_tags_stats_tool_iter(data_pmock):
+    # tests iter(stats) iterates the data
+    stats_tool = TagsStatsTool()
+    data_pmock.return_value = [7, 23]
+    result = list(stats_tool)
+    assert data_pmock.called
+    assert result == [7, 23]
+
+
+def test_util_tags_stats_tool_filters():
+    # tests stats tool has expected filters
+    stats_tool = TagsStatsTool()
+    assert (
+        stats_tool.filters
+        == [getattr(stats_tool, "filter_%s" % f)
+            for f
+            in stats_tool.filter_methods])
+
+
+@patch(
+    'pontoon.tags.utils.TagsStatsTool.tr_manager',
+    new_callable=PropertyMock)
+@patch('pontoon.tags.utils.TagsStatsTool.filter_tag')
+@patch('pontoon.tags.utils.TagsStatsTool.filter_projects')
+@patch('pontoon.tags.utils.TagsStatsTool.filter_locales')
+@patch('pontoon.tags.utils.TagsStatsTool.filter_path')
+def test_util_tags_stats_tool_fitered_data(m_path, m_locales,
+                                           m_proj, m_tag, trs_mock):
+    # tests all filter functions are called when filtering data
+    # and that they are called with the result of previous
+
+    stats_tool = TagsStatsTool()
+    m = m_tag, m_proj, m_locales, m_path
+
+    # mock trs for translated_resources.all()
+    _m = MagicMock()
+    _m.all.return_value = 0
+    trs_mock.return_value = _m
+
+    for i, _m in enumerate(m):
+        if i >= len(m) - 1:
+            _m.return_value = 23
+        else:
+            _m.return_value = i
+
+    # get the filtered_data
+    result = stats_tool.filtered_data
+    assert result == 23
+    for i, _m in enumerate(m):
+        assert _m.called
+        if i > 0:
+            assert _m.call_args[0][0] == i - 1
+
+
+@pytest.mark.django_db
+def test_util_tags_stats_tool_get_data_empty(calculate_tags, assert_tags):
+    # tests stats tool and test calculation doesnt break if there is no data
+    stats_tool = TagsStatsTool()
+    data = stats_tool.get_data()
+    assert isinstance(data, QuerySet)
+    assert list(data) == []
+    assert_tags(
+        calculate_tags(),
+        data)
+
+
+@pytest.mark.django_db
+def test_util_tags_stats_tool_get_data_matrix(tag_matrix, calculate_tags,
+                                              assert_tags, tag_test_kwargs):
+    # for different parametrized kwargs, tests that the calculated stat data
+    # matches expectations from long-hand calculation
+    name, kwargs = tag_test_kwargs
+    stats_tool = TagsStatsTool(**kwargs)
+    data = stats_tool.get_data()
+    assert isinstance(data, QuerySet)
+    _tags = calculate_tags(**kwargs)
+    assert_tags(_tags, data)
+    if "exact" in name:
+        assert len(data) == 1
+    if "glob" in name:
+        assert len(data) > 1
+        assert len(data) < len(tag_matrix['tags'])
+    if "no_match" in name:
+        assert len(data) == 0
+    elif "match" in name:
+        assert len(data) > 0
+    if kwargs.get("slug"):
+        for result in data:
+            assert fnmatch.fnmatch(result['slug'], kwargs["slug"])
+
+
+@pytest.mark.django_db
+def test_util_tags_stats_tool_groupby_locale(tag_matrix,
+                                             calculate_tags,
+                                             assert_tags, tag_test_kwargs):
+    name, kwargs = tag_test_kwargs
+
+    # this is only used with slug set to a unique slug, and doesnt work
+    # correctly without
+    if name in ['slug_glob', 'party_glob'] or not kwargs.get('slug'):
+        kwargs['slug'] = tag_matrix['tags'][0].slug
+
+    stats_tool = TagsStatsTool(
+        groupby="locale",
+        **kwargs)
+    data = stats_tool.get_data()
+    # assert isinstance(data, QuerySet)
+    exp = calculate_tags(groupby='locale', **kwargs)
+    data = stats_tool.coalesce(data)
+    assert len(data) == len(exp)
+    for locale in data:
+        locale_exp = exp[locale['locale']]
+        assert locale_exp['total_strings'] == locale['total_strings']
+        assert locale_exp['fuzzy_strings'] == locale['fuzzy_strings']
+        assert locale_exp['approved_strings'] == locale['approved_strings']
--- a/tests/tags/utils/translations.py
+++ b/tests/tags/utils/translations.py
@ -0,0 +1,142 @@
+
+import pytest
+
+from mock import MagicMock, patch
+
+from pontoon.base.models import Translation
+from pontoon.tags.utils import TagsLatestTranslationsTool
+
+
+def test_util_tags_stats_tool(tag_data_init_kwargs):
+    # tests instantiation of translations tool
+    kwargs = tag_data_init_kwargs
+    tr_tool = TagsLatestTranslationsTool(**kwargs)
+    for k, v in kwargs.items():
+        assert getattr(tr_tool, k) == v
+
+
+@pytest.mark.django_db
+def test_util_tags_translation_tool_get_data(tag_matrix,
+                                             calculate_tags_latest,
+                                             tag_test_kwargs):
+    # for different parametrized kwargs, tests that the calculated
+    # latest data matches expectations from long-hand calculation
+    name, kwargs = tag_test_kwargs
+
+    # calculate expectations
+    exp = calculate_tags_latest(**kwargs)
+
+    # get the data, and coalesce to translations dictionary
+    tr_tool = TagsLatestTranslationsTool(**kwargs)
+    data = tr_tool.coalesce(tr_tool.get_data())
+
+    # get a pk dictionary of all translations
+    translations = Translation.objects.select_related('user').in_bulk()
+
+    assert len(data) == len(exp)
+
+    for k, (pk, date) in exp.items():
+        assert data[k]['date'] == date
+        assert data[k]['string'] == translations.get(pk).string
+    if "exact" in name:
+        assert len(data) == 1
+    if "glob" in name:
+        assert len(data) > 1
+        assert len(data) < len(tag_matrix['tags'])
+    if "no_match" in name:
+        assert len(data) == 0
+    elif "match" in name:
+        assert len(data) > 0
+
+
+@patch('pontoon.tags.utils.TagsLatestTranslationsTool.get_data')
+def test_util_tags_translation_tool_data(data_mock):
+    # ensures latest translation data is coalesced and cached
+    # correctly
+    tr_tool = TagsLatestTranslationsTool()
+
+    # set up mock return for get_data that can be used like
+    # qs.iterator()
+    data_m = [
+        dict(entity__resource__tag='foo'),
+        dict(entity__resource__tag='bar')]
+    data_m2 = [dict(entity__resource__tag='baz')]
+    iterator_m = MagicMock()
+    iterator_m.iterator.return_value = data_m
+    data_mock.return_value = iterator_m
+
+    # get data from the tool
+    result = tr_tool.data
+
+    # we got back data from data_m coalesced to a dictionary
+    # with the groupby fields as keys
+    assert result == dict(foo=data_m[0], bar=data_m[1])
+    assert iterator_m.iterator.called
+
+    # lets reset the mock and change the return value
+    iterator_m.reset_mock()
+    iterator_m.iterator.return_value = data_m2
+
+    # and get the data again
+    result = tr_tool.data
+
+    # which was cached, so nothing changed
+    assert not iterator_m.iterator.called
+    assert result == dict(foo=data_m[0], bar=data_m[1])
+
+    # after deleting the cache...
+    del tr_tool.__dict__["data"]
+
+    # ...we get the new value
+    result = tr_tool.data
+    assert iterator_m.iterator.called
+    assert result == dict(baz=data_m2[0])
+
+
+@pytest.mark.django_db
+def test_util_tags_translation_tool_groupby(tag_matrix,
+                                            tag_test_kwargs,
+                                            calculate_tags_latest,
+                                            user0, user1):
+    name, kwargs = tag_test_kwargs
+
+    # hmm, translations have no users
+    #  - set first 3rd to user0, and second 3rd to user1
+    total = Translation.objects.count()
+    first_third_users = Translation.objects.all()[
+        : total / 3].values_list('pk')
+    second_third_users = Translation.objects.all()[
+        total / 3: 2 * total / 3].values_list('pk')
+    Translation.objects.filter(pk__in=first_third_users).update(user=user0)
+    Translation.objects.filter(pk__in=second_third_users).update(user=user1)
+
+    # calculate expectations grouped by locale
+    exp = calculate_tags_latest(groupby='locale', **kwargs)
+
+    # calculate data from tool grouped by locale
+    tr_tool = TagsLatestTranslationsTool(
+        groupby='locale',
+        **kwargs)
+    data = tr_tool.coalesce(tr_tool.get_data())
+
+    # get a pk dictionary of all translations
+    translations = Translation.objects.select_related('user').in_bulk()
+
+    assert len(data) == len(exp)
+
+    for k, (pk, date) in exp.items():
+        # check all of the expected values are correct for the
+        # translation and user
+        translation = translations.get(pk)
+        assert data[k]['date'] == date
+        assert data[k]['string'] == translation.string
+        assert (
+            data[k]['approved_date']
+            == translation.approved_date)
+        user = translation.user
+        if user:
+            assert data[k]['user__email'] == user.email
+            assert data[k]['user__first_name'] == user.first_name
+        else:
+            assert data[k]['user__email'] is None
+            assert data[k]['user__first_name'] is None
--- a/tests/teams/views.py
+++ b/tests/teams/views.py
@ -55,7 +55,7 @@ def sorted_project_locale0_translators(
        }
        for u in user_factory(batch=3)
    ]
-    
+
    sorted_translators = sorted(
        translators,
        key=lambda u: u['email']