Fix S's values_list/dict re: ES 1.0

This makes some pretty big changes. 1. .values_list() and .values_dict() will now **always** send the "fields" property to ES. If you don't specify any field arguments, then "fields=[*]". If you do specify fields arguments, then it'll be that list of fields. 2. Elasticsearch 1.0 changed the shape of what gets returned--all values are lists when you specify the "fields" property. To make Elasticsearch 0.90 and Elasticsearch 1.0 consistent, we now convert any non-list values into list values for Elasticsearch 0.90. Wait, wat? Prior to this commit, you'd have the following: # Elasticsearch 0.90 >>> list(S().values_list()) [(501, 'frank'), (602, 'sam')] # values are ints and strings >>> list(S().values_list('id', 'name')) [(501, 'frank'), (602, 'sam')] # values are ints and strings # Elasticsearch 1.0 >>> list(S().values_list()) [(501, 'frank'), (602, 'sam')] # values are ints and strings >>> list(S().values_list('id', 'name')) [([501], ['frank']), ([602], ['sam'])] # values are all lists After this change, we have this (assuming id and name fields are stored): # Elasticsearch 0.90 >>> list(S().values_list()) [([501], ['frank']), ([602], ['sam'])] >>> list(S().values_list('id')) [([501], ['frank']), ([602], ['sam'])] # Elasticsearch 1.0 >>> list(S().values_list()) [([501], ['frank']), ([602], ['sam'])] >>> list(S().values_list('id')) [([501], ['frank']), ([602], ['sam'])] If you plan to call .values_list() and .values_dict() with no arguments, you must add "store: True" to your mappings for the fields you want to get back.
2014-08-13 15:08:03 -04:00 · 2014-08-13 15:08:03 -04:00 · 03f4094e51
--- a/28
+++ b/28
@ -11,6 +11,34 @@ Version 0.10: In development

 **API-breaking changes:**

+* **big ``.values_list()`` and ``.values_dict()`` changes**
+
+  ``.values_list()`` and ``.values_dict()`` will now **always** specify
+  the Elasticsearch ``fields`` property.
+
+  If you call these two functions with no arguments (i.e. you specify
+  no fields), they will send ``fields=*`` to Elasticsearch. It will
+  send any fields marked as stored in the document mapping. If you
+  have no fields marked as stored, then it will return the id and type
+  of the result.
+
+  If you call these two functions with arguments (i.e. you specify
+  fields), then it'll return those fields---same as before.
+
+  However, they now return all values as lists. For example::
+
+      >>> S().values_list()
+      [([100], ['bob'], [40]), ...]
+
+      >>> S().values_list('id')
+      [([100],), ([101],), ...]
+
+      >>> S().values_dict()
+      [({'id': [100], 'name': ['bob'], 'weight': [40]}), ...]
+
+      >>> S().values_dict('id', 'name')
+      [({'id': [100], 'name': ['bob']}), ...]
+
 * Removed ``text`` and ``text_phrase`` queries. They're renamed
  in Elasticsearch to ``match`` and ``match_phrase``.

--- a/docs/samples/sample_facets.py
+++ b/docs/samples/sample_facets.py
@ -98,7 +98,7 @@ print s.facet_counts()
 #     }}

 # Let's do a query for 'cookie' and do a facet count.
-print s.query(title__text='cookie').facet_counts()
+print s.query(title__match='cookie').facet_counts()
 # Pretty-printed output:
 # {u'product': {
 #     u'_type': u'terms',
--- a/docs/samples/sample_quickstart.py
+++ b/docs/samples/sample_quickstart.py
@ -79,7 +79,7 @@ print basic_s.count()

 # Print articles with 'cookie' in the title.
 print [item['title']
-       for item in basic_s.query(title__text='cookie')]
+       for item in basic_s.query(title__match='cookie')]
 # Prints:
 # [u'Deleting cookies', u'What is a cookie?',
 #  u'Websites say cookies are blocked - Unblock them']
@ -87,7 +87,7 @@ print [item['title']
 # Print articles with 'cookie' in the title that are related to
 # websites.
 print [item['title']
-       for item in basic_s.query(title__text='cookie')
+       for item in basic_s.query(title__match='cookie')
                          .filter(topics='websites')]
 # Prints:
 # [u'Websites say cookies are blocked - Unblock them']
@ -100,7 +100,7 @@ print [item['title']

 # Do a query and use the highlighter to denote the matching text.
 print [(item['title'], item.es_meta.highlight['title'])
-       for item in basic_s.query(title__text='cookie').highlight('title')]
+       for item in basic_s.query(title__match='cookie').highlight('title')]
 # Prints:
 # [
 #    (u'Deleting cookies', [u'Deleting <em>cookies</em>']),
--- a/elasticutils/init.py
+++ b/elasticutils/init.py
@ -1,7 +1,6 @@
 import copy
 import logging
 from datetime import datetime
-from operator import itemgetter

 import six
 from six import string_types
@ -601,53 +600,61 @@ class S(PythonMixin):
        return self._clone(next_step=('explain', value))

    def values_list(self, *fields):
-        """
-        Return a new S instance that returns ListSearchResults.
+        """Return a new S instance that returns ListSearchResults.

        :arg fields: the list of fields to have in the results.

-            With no arguments, returns a list of tuples of all the
-            data for that document.
+            With no arguments, passes ``fields=*`` and returns values
+            for any fields you have marked as "stored = True" for that
+            mapping.

-            With arguments, returns a list of tuples where the fields
-            in the tuple are in the order specified.
+            With arguments, passes those field arguments via
+            ``fields`` and returns a list of tuples with values in the
+            order specified.

-        For example:
+        For example (assume id, name and age are stored fields):

        >>> list(S().values_list())
-        [(1, 'fred', 40), (2, 'brian', 30), (3, 'james', 45)]
+        [([1], ['fred'], [40]), ([2], ['brian'], [30]), ...]
        >>> list(S().values_list('id', 'name'))
-        [(1, 'fred'), (2, 'brian'), (3, 'james')]
+        [([1], ['fred']), ([2], ['brian']), ([3], ['james'])]
        >>> list(S().values_list('name', 'id'))
-        [('fred', 1), ('brian', 2), ('james', 3)]
+        [(['fred'], [1]), (['brian'], [2]), (['james'], [3])]

        .. Note::

-           If you don't specify fields, the data comes back in an
-           arbitrary order. It's probably best to specify fields or
-           use ``values_dict``.
+            If you do not specify any fields and you have no fields
+            marked as stored, then you will get back the ``_id`` and
+            ``_type`` of each result and that's it.

        """
        return self._clone(next_step=('values_list', fields))

    def values_dict(self, *fields):
-        """
-        Return a new S instance that returns DictSearchResults.
+        """Return a new S instance that returns DictSearchResults.

        :arg fields: the list of fields to have in the results.

-            With no arguments, this returns a list of dicts with all
-            the fields.
+            With no arguments, passes ``fields=*`` and returns values
+            for any fields you have marked as "stored = True" for that
+            mapping.

-            With arguments, it returns a list of dicts with the
-            specified fields.
+            With arguments, passes those field arguments via
+            ``fields`` and returns a list of dicts with the specified
+            fields.

-        For example:
+        For example (assuming id, name and age are stored):

        >>> list(S().values_dict())
-        [{'id': 1, 'name': 'fred', 'age': 40}, ...]
+        [{'id': [1], 'name': ['fred'], 'age': [40]}, ...]
        >>> list(S().values_dict('id', 'name'))
-        [{'id': 1, 'name': 'fred'}, ...]
+        [{'id': [1], 'name': ['fred']}, ...]
+
+        .. Note::
+
+            If you do not specify any fields and you have no fields
+            marked as stored, then you will get back the ``_id`` and
+            ``_type`` of each result and that's it.

        """
        return self._clone(next_step=('values_dict', fields))
@ -1173,10 +1180,10 @@ class S(PythonMixin):
            elif pq:
                qs['query'] = pq

-        if as_list and list_fields:
-            fields = qs['fields'] = list(list_fields)
-        elif as_dict and dict_fields:
-            fields = qs['fields'] = list(dict_fields)
+        if as_list:
+            fields = qs['fields'] = list(list_fields) if list_fields else ['*']
+        elif as_dict:
+            fields = qs['fields'] = list(dict_fields) if dict_fields else ['*']
        else:
            fields = set()

@ -1816,10 +1823,29 @@ class DictSearchResults(SearchResults):
    dict.
    """
    def set_objects(self, results):
-        key = 'fields' if self.fields else '_source'
-        self.objects = [decorate_with_metadata(DictResult(r[key]), r)
+        def listify(d):
+            return dict([(key, val if isinstance(val, list) else [val])
+                         for key, val in d.items()])
+
+        if results:
+            if 'fields' in results[0]:
+                objs = [(r['fields'], r) for r in results]
+
+            elif '_source' in results[0]:
+                objs = [(r['_source'], r) for r in results]
+
+            else:
+                # No fields and no source, so we just return _id and
+                # _type.
+                objs = [({'_id': r['_id'], '_type': r['_type']}, r)
                        for r in results]

+        else:
+            objs = []
+
+        # Decorate with metadata and listify values
+        self.objects = [decorate_with_metadata(DictResult(listify(obj)), r)
+                        for obj, r in objs]

 class ListSearchResults(SearchResults):
    """
@ -1827,19 +1853,26 @@ class ListSearchResults(SearchResults):
    tuple.
    """
    def set_objects(self, results):
-        if self.fields:
-            getter = itemgetter(*self.fields)
-            objs = [(getter(r['fields']), r) for r in results]
+        def listify(values):
+            return [(val if isinstance(val, list) else [val])
+                    for val in values]

-            # itemgetter returns an item--not a tuple of one item--if
-            # there is only one thing in self.fields. Since we want
-            # this to always return a list of tuples, we need to fix
-            # that case here.
-            if len(self.fields) == 1:
-                objs = [((obj,), r) for obj, r in objs]
+        if results:
+            if 'fields' in results[0]:
+                objs = [(r['fields'].values(), r) for r in results]
+
+            elif '_source' in results[0]:
+                objs = [(r['_source'].values(), r) for r in results]
+
+            else:
+                # No fields and no source, so we just return _id and
+                # _type.
+                objs = [((r['_id'], r['_type']), r) for r in results]
        else:
-            objs = [(r['_source'].values(), r) for r in results]
-        self.objects = [decorate_with_metadata(TupleResult(obj), r)
+            objs = []
+
+        # Decorate with metadata and listify values
+        self.objects = [decorate_with_metadata(TupleResult(listify(obj)), r)
                        for obj, r in objs]


--- a/elasticutils/estestcase.py
+++ b/elasticutils/estestcase.py
@ -53,7 +53,7 @@ class ESTestCase(TestCase):
        # Note: TestCase has no setup_class, so we don't call super()
        # here.
        cls.cleanup_index()
-        cls.create_index(settings={'mappings': cls.mapping})
+        cls.create_index(mappings=cls.mapping)
        if cls.data:
            cls.index_data(cls.data)
            cls.refresh()
@ -98,18 +98,16 @@ class ESTestCase(TestCase):
                 .doctypes(cls.mapping_type_name))

    @classmethod
-    def create_index(cls, settings=None):
+    def create_index(cls, **kwargs):
        """Creates an index with specified settings

        Uses ``cls.index_name`` as the index to create.

-        :arg settings: Any additional settings to use to create the
-            index.
+        :arg kwargs: Any additional args to put in the body like
+            "settings", "mappings", etc.

        """
-        body = {}
-        if settings:
-            body['settings'] = settings
+        body = kwargs if kwargs else {}
        cls.get_es().indices.create(index=cls.index_name, body=body)

    @classmethod
--- a/elasticutils/tests/test_query.py
+++ b/elasticutils/tests/test_query.py
@ -576,7 +576,7 @@ class QueryTest(ESTestCase):
        # Test caching of empty results
        try:
            self.teardown_class()
-            self.create_index(settings={'mappings': self.mapping})
+            self.create_index(mappings=self.mapping)
            self.refresh()

            s = self.get_s()
@ -1501,6 +1501,64 @@ class SearchTypeTest(ESTestCase):
        eq_(len(s[:1]), 2)


+class ValuesTest(ESTestCase):
+    def test_values_list_chaining(self):
+        s = self.get_s()
+        s = s.values_list()
+        eqish_(s.build_search(),
+               {
+                   'fields': ['*']
+               })
+
+        s = s.values_list('id')
+        eqish_(s.build_search(),
+               {
+                   'fields': ['id']
+               })
+
+        # Add new fields
+        s = s.values_list('name', 'title')
+        eqish_(s.build_search(),
+               {
+                   'fields': ['id', 'name', 'title']
+               })
+
+        # Fields don't show up more than once
+        s = s.values_list('id')
+        eqish_(s.build_search(),
+               {
+                   'fields': ['id', 'name', 'title']
+               })
+
+    def test_values_dict_chaining(self):
+        s = self.get_s()
+        s = s.values_dict()
+        eqish_(s.build_search(),
+               {
+                   'fields': ['*']
+               })
+
+        s = s.values_dict('id')
+        eqish_(s.build_search(),
+               {
+                   'fields': ['id']
+               })
+
+        # Add new fields
+        s = s.values_dict('name', 'title')
+        eqish_(s.build_search(),
+               {
+                   'fields': ['id', 'name', 'title']
+               })
+
+        # Fields don't show up more than once
+        s = s.values_dict('id')
+        eqish_(s.build_search(),
+               {
+                   'fields': ['id', 'name', 'title']
+               })
+
+
 class SuggestionTest(ESTestCase):
    data = [
        {'id': 1, 'name': 'bar'},
--- a/elasticutils/tests/test_results.py
+++ b/elasticutils/tests/test_results.py
@ -76,26 +76,45 @@ class TestResultsWithData(ESTestCase):
        searcher = list(self.get_s(FakeMappingType).query(foo='bar'))
        assert isinstance(searcher[0], FakeMappingType)

-    def test_values_dict_results(self):
+    def test_values_dict_no_fields(self):
        """With values_dict, return list of dicts."""
        searcher = list(self.get_s().query(foo='bar').values_dict())
        assert isinstance(searcher[0], dict)

+    def test_values_dict_results(self):
+        """With values_dict, return list of dicts."""
+        searcher = list(self.get_s()
+                        .query(foo='bar')
+                        .values_dict('foo', 'width'))
+        assert isinstance(searcher[0], dict)
+        eq_(
+            sorted(searcher[0].items()),
+            sorted([(u'foo', [u'bar']), (u'width', [u'2'])])
+        )
+
    def test_values_list_no_fields(self):
-        """Specifying no fields with values_list defaults to ['id']."""
-        searcher = list(self.get_s().query(foo='bar').values_list())
+        """Specifying no fields with values_list returns what's stored."""
+        searcher = list(self.get_s()
+                        .query(foo='bar')
+                        .values_list())
        assert isinstance(searcher[0], tuple)
        # We sort the result and expected result here so that the
        # order is stable and comparable.
        eq_(
-            sorted(searcher[0], key=str),
-            sorted((u'2', u'bar', u'awesome', 1), key=str))
+            sorted(searcher[0]),
+            sorted([[u'1'], [u'elasticutilsmappingtype']])
+        )

    def test_values_list_results(self):
        """With values_list fields, returns list of tuples."""
-        searcher = list(self.get_s().query(foo='bar')
-                                    .values_list('foo', 'width'))
+        searcher = list(self.get_s()
+                        .query(foo='bar')
+                        .values_list('foo', 'width'))
        assert isinstance(searcher[0], tuple)
+        eq_(
+            sorted(searcher[0]),
+            sorted(([u'2'], [u'bar']))
+        )

    def test_default_results_form_has_metadata(self):
        """Test default results form has metadata."""
@ -135,14 +154,126 @@ class TestResultsWithData(ESTestCase):
        eq_(S().query(fld1=2)
               .values_dict()
               .build_search(),
-            {"query": {"term": {"fld1": 2}}})
+            {"query": {"term": {"fld1": 2}}, 'fields': ['*']})

    def test_values_list_no_args(self):
-        """Calling values() with no args fetches only id."""
+        """Calling values_list() with no args fetches all fields."""
        eq_(S().query(fld1=2)
               .values_list()
               .build_search(),
-            {'query': {"term": {"fld1": 2}}})
+            {'query': {"term": {"fld1": 2}}, 'fields': ['*']})
+
+
+class TestResultsWithStoredFields(ESTestCase):
+    def test_values_list_no_args_no_stored_fields(self):
+        # If there are no fields specified in the values_list() call
+        # and no stored fields for that document, then we pass in
+        # fields=['*'] and ES returns nothing, so we return the _id
+        # and _type.
+        self.cleanup_index()
+        self.create_index(
+            mappings={
+                self.mapping_type_name: {
+                    'id': {'type': 'integer'},
+                    'name': {'type': 'string'},
+                    'weight': {'type': 'integer'},
+                }
+            }
+        )
+        data = [
+            {'id': 1, 'name': 'bob', 'weight': 40}
+        ]
+
+        self.index_data(data)
+        self.refresh()
+
+        results = list(self.get_s().values_list())
+        eq_(sorted(results[0], key=repr),
+            # Note: This is the _id of the document--not the "id" in
+            # the document.
+            sorted(([u'1'], [u'elasticutilsmappingtype']), key=repr)
+        )
+
+    def test_values_list_no_args_with_stored_fields(self):
+        # If there are no fields specified, then ES returns the fields
+        # marked as stored.
+        self.cleanup_index()
+        self.create_index(
+            mappings={
+                self.mapping_type_name: {
+                    'properties': {
+                        'id': {'type': 'integer', 'store': True},
+                        'name': {'type': 'string', 'store': True},
+                        'weight': {'type': 'integer'},
+                    }
+                }
+            }
+        )
+
+        data = [
+            {'id': 1, 'name': 'bob', 'weight': 40}
+        ]
+
+        self.index_data(data)
+        self.refresh()
+
+        results = list(self.get_s().values_list())
+        eq_(sorted(results[0], key=repr),
+            sorted(([1], [u'bob']), key=repr)
+        )
+
+    def test_values_dict_no_args_no_stored_fields(self):
+        self.cleanup_index()
+        self.create_index(
+            mappings={
+                self.mapping_type_name: {
+                    'id': {'type': 'integer'},
+                    'name': {'type': 'string'},
+                    'weight': {'type': 'integer'},
+                }
+            }
+        )
+        data = [
+            {'id': 1, 'name': 'bob', 'weight': 40}
+        ]
+
+        self.index_data(data)
+        self.refresh()
+
+        results = list(self.get_s().values_dict())
+        eq_(sorted(results[0].items()),
+            # Note: This is the _id of the document--not the "id" in
+            # the document.
+            sorted([('_id', [u'1']), ('_type', [u'elasticutilsmappingtype'])])
+        )
+
+    def test_values_dict_no_args_with_stored_fields(self):
+        # If there are no fields specified, then ES returns the fields
+        # marked as stored.
+        self.cleanup_index()
+        self.create_index(
+            mappings={
+                self.mapping_type_name: {
+                    'properties': {
+                        'id': {'type': 'integer', 'store': True},
+                        'name': {'type': 'string', 'store': True},
+                        'weight': {'type': 'integer'},
+                    }
+                }
+            }
+        )
+
+        data = [
+            {'id': 1, 'name': 'bob', 'weight': 40}
+        ]
+
+        self.index_data(data)
+        self.refresh()
+
+        results = list(self.get_s().values_dict())
+        eq_(sorted(results[0].items()),
+            sorted([(u'id', [1]), (u'name', [u'bob'])])
+        )


 class TestFakeMappingType(ESTestCase):
@ -175,13 +306,11 @@ class TestResultsWithDates(ESTestCase):
        """Datetime strings in ES results get converted to Python datetimes"""
        self.cleanup_index()
        self.create_index(
-            settings={
-                'mappings': {
-                    self.mapping_type_name: {
-                        'id': {'type': 'integer'},
-                        'bday': {'type': 'date', 'format': 'YYYY-mm-dd'},
-                        'btime': {'type': 'date'}
-                    }
+            mappings={
+                self.mapping_type_name: {
+                    'id': {'type': 'integer'},
+                    'bday': {'type': 'date', 'format': 'YYYY-mm-dd'},
+                    'btime': {'type': 'date'}
                }
            }
        )
@ -193,36 +322,34 @@ class TestResultsWithDates(ESTestCase):
        self.index_data(data)
        self.refresh()

-        results = list(self.get_s().values_dict())
+        results = list(self.get_s().values_dict('id', 'bday', 'btime'))
        eq_(results,
-            [{u'bday': datetime(2012, 12, 1, 0, 0),
-              u'btime': datetime(2012, 12, 1, 12, 0),
-              u'id': 1}]
+            [{u'bday': [datetime(2012, 12, 1, 0, 0)],
+              u'btime': [datetime(2012, 12, 1, 12, 0)],
+              u'id': [1]}]
        )

    def test_dates_lookalikes(self):
        """Datetime strings in ES results get converted to Python datetimes"""
        self.cleanup_index()
        self.create_index(
-            settings={
-                'mappings': {
-                    self.mapping_type_name: {
-                        'id': {'type': 'integer'},
-                        'bday': {'type': 'string', 'analyzer': 'keyword'}
-                    }
+            mappings={
+                self.mapping_type_name: {
+                    'id': {'type': 'integer'},
+                    'bday': {'type': 'string', 'analyzer': 'keyword'}
                }
            }
        )
        data = [
-            {'id': 1, 'bday': 'xxxx-xx-xxTxx:xx:xx'}
+            {'id': [1], 'bday': ['xxxx-xx-xxTxx:xx:xx']}
        ]

        self.index_data(data)
        self.refresh()

-        results = list(self.get_s().values_dict())
+        results = list(self.get_s().values_dict('id', 'bday'))
        eq_(results,
-            [{u'id': 1, u'bday': u'xxxx-xx-xxTxx:xx:xx'}]
+            [{u'id': [1], u'bday': [u'xxxx-xx-xxTxx:xx:xx']}]
        )