Fix S's values_list/dict re: ES 1.0

This makes some pretty big changes.

1. .values_list() and .values_dict() will now **always** send the
   "fields" property to ES. If you don't specify any field arguments,
   then "fields=[*]". If you do specify fields arguments, then it'll
   be that list of fields.

2. Elasticsearch 1.0 changed the shape of what gets returned--all
   values are lists when you specify the "fields" property. To
   make Elasticsearch 0.90 and Elasticsearch 1.0 consistent, we
   now convert any non-list values into list values for Elasticsearch
   0.90.

Wait, wat? Prior to this commit, you'd have the following:

    # Elasticsearch 0.90
    >>> list(S().values_list())
    [(501, 'frank'), (602, 'sam')]           # values are ints and strings

    >>> list(S().values_list('id', 'name'))
    [(501, 'frank'), (602, 'sam')]           # values are ints and strings

    # Elasticsearch 1.0
    >>> list(S().values_list())
    [(501, 'frank'), (602, 'sam')]           # values are ints and strings

    >>> list(S().values_list('id', 'name'))
    [([501], ['frank']), ([602], ['sam'])]   # values are all lists

After this change, we have this (assuming id and name fields are stored):

    # Elasticsearch 0.90
    >>> list(S().values_list())
    [([501], ['frank']), ([602], ['sam'])]

    >>> list(S().values_list('id'))
    [([501], ['frank']), ([602], ['sam'])]

    # Elasticsearch 1.0
    >>> list(S().values_list())
    [([501], ['frank']), ([602], ['sam'])]

    >>> list(S().values_list('id'))
    [([501], ['frank']), ([602], ['sam'])]

If you plan to call .values_list() and .values_dict() with no arguments,
you must add "store: True" to your mappings for the fields you want to
get back.
This commit is contained in:
Will Kahn-Greene 2014-08-13 15:08:03 -04:00
Родитель 14cc365519
Коммит 03f4094e51
7 изменённых файлов: 326 добавлений и 82 удалений

Просмотреть файл

@ -11,6 +11,34 @@ Version 0.10: In development
**API-breaking changes:**
* **big ``.values_list()`` and ``.values_dict()`` changes**
``.values_list()`` and ``.values_dict()`` will now **always** specify
the Elasticsearch ``fields`` property.
If you call these two functions with no arguments (i.e. you specify
no fields), they will send ``fields=*`` to Elasticsearch. It will
send any fields marked as stored in the document mapping. If you
have no fields marked as stored, then it will return the id and type
of the result.
If you call these two functions with arguments (i.e. you specify
fields), then it'll return those fields---same as before.
However, they now return all values as lists. For example::
>>> S().values_list()
[([100], ['bob'], [40]), ...]
>>> S().values_list('id')
[([100],), ([101],), ...]
>>> S().values_dict()
[({'id': [100], 'name': ['bob'], 'weight': [40]}), ...]
>>> S().values_dict('id', 'name')
[({'id': [100], 'name': ['bob']}), ...]
* Removed ``text`` and ``text_phrase`` queries. They're renamed
in Elasticsearch to ``match`` and ``match_phrase``.

Просмотреть файл

@ -98,7 +98,7 @@ print s.facet_counts()
# }}
# Let's do a query for 'cookie' and do a facet count.
print s.query(title__text='cookie').facet_counts()
print s.query(title__match='cookie').facet_counts()
# Pretty-printed output:
# {u'product': {
# u'_type': u'terms',

Просмотреть файл

@ -79,7 +79,7 @@ print basic_s.count()
# Print articles with 'cookie' in the title.
print [item['title']
for item in basic_s.query(title__text='cookie')]
for item in basic_s.query(title__match='cookie')]
# Prints:
# [u'Deleting cookies', u'What is a cookie?',
# u'Websites say cookies are blocked - Unblock them']
@ -87,7 +87,7 @@ print [item['title']
# Print articles with 'cookie' in the title that are related to
# websites.
print [item['title']
for item in basic_s.query(title__text='cookie')
for item in basic_s.query(title__match='cookie')
.filter(topics='websites')]
# Prints:
# [u'Websites say cookies are blocked - Unblock them']
@ -100,7 +100,7 @@ print [item['title']
# Do a query and use the highlighter to denote the matching text.
print [(item['title'], item.es_meta.highlight['title'])
for item in basic_s.query(title__text='cookie').highlight('title')]
for item in basic_s.query(title__match='cookie').highlight('title')]
# Prints:
# [
# (u'Deleting cookies', [u'Deleting <em>cookies</em>']),

Просмотреть файл

@ -1,7 +1,6 @@
import copy
import logging
from datetime import datetime
from operator import itemgetter
import six
from six import string_types
@ -601,53 +600,61 @@ class S(PythonMixin):
return self._clone(next_step=('explain', value))
def values_list(self, *fields):
"""
Return a new S instance that returns ListSearchResults.
"""Return a new S instance that returns ListSearchResults.
:arg fields: the list of fields to have in the results.
With no arguments, returns a list of tuples of all the
data for that document.
With no arguments, passes ``fields=*`` and returns values
for any fields you have marked as "stored = True" for that
mapping.
With arguments, returns a list of tuples where the fields
in the tuple are in the order specified.
With arguments, passes those field arguments via
``fields`` and returns a list of tuples with values in the
order specified.
For example:
For example (assume id, name and age are stored fields):
>>> list(S().values_list())
[(1, 'fred', 40), (2, 'brian', 30), (3, 'james', 45)]
[([1], ['fred'], [40]), ([2], ['brian'], [30]), ...]
>>> list(S().values_list('id', 'name'))
[(1, 'fred'), (2, 'brian'), (3, 'james')]
[([1], ['fred']), ([2], ['brian']), ([3], ['james'])]
>>> list(S().values_list('name', 'id'))
[('fred', 1), ('brian', 2), ('james', 3)]
[(['fred'], [1]), (['brian'], [2]), (['james'], [3])]
.. Note::
If you don't specify fields, the data comes back in an
arbitrary order. It's probably best to specify fields or
use ``values_dict``.
If you do not specify any fields and you have no fields
marked as stored, then you will get back the ``_id`` and
``_type`` of each result and that's it.
"""
return self._clone(next_step=('values_list', fields))
def values_dict(self, *fields):
"""
Return a new S instance that returns DictSearchResults.
"""Return a new S instance that returns DictSearchResults.
:arg fields: the list of fields to have in the results.
With no arguments, this returns a list of dicts with all
the fields.
With no arguments, passes ``fields=*`` and returns values
for any fields you have marked as "stored = True" for that
mapping.
With arguments, it returns a list of dicts with the
specified fields.
With arguments, passes those field arguments via
``fields`` and returns a list of dicts with the specified
fields.
For example:
For example (assuming id, name and age are stored):
>>> list(S().values_dict())
[{'id': 1, 'name': 'fred', 'age': 40}, ...]
[{'id': [1], 'name': ['fred'], 'age': [40]}, ...]
>>> list(S().values_dict('id', 'name'))
[{'id': 1, 'name': 'fred'}, ...]
[{'id': [1], 'name': ['fred']}, ...]
.. Note::
If you do not specify any fields and you have no fields
marked as stored, then you will get back the ``_id`` and
``_type`` of each result and that's it.
"""
return self._clone(next_step=('values_dict', fields))
@ -1173,10 +1180,10 @@ class S(PythonMixin):
elif pq:
qs['query'] = pq
if as_list and list_fields:
fields = qs['fields'] = list(list_fields)
elif as_dict and dict_fields:
fields = qs['fields'] = list(dict_fields)
if as_list:
fields = qs['fields'] = list(list_fields) if list_fields else ['*']
elif as_dict:
fields = qs['fields'] = list(dict_fields) if dict_fields else ['*']
else:
fields = set()
@ -1816,10 +1823,29 @@ class DictSearchResults(SearchResults):
dict.
"""
def set_objects(self, results):
key = 'fields' if self.fields else '_source'
self.objects = [decorate_with_metadata(DictResult(r[key]), r)
def listify(d):
return dict([(key, val if isinstance(val, list) else [val])
for key, val in d.items()])
if results:
if 'fields' in results[0]:
objs = [(r['fields'], r) for r in results]
elif '_source' in results[0]:
objs = [(r['_source'], r) for r in results]
else:
# No fields and no source, so we just return _id and
# _type.
objs = [({'_id': r['_id'], '_type': r['_type']}, r)
for r in results]
else:
objs = []
# Decorate with metadata and listify values
self.objects = [decorate_with_metadata(DictResult(listify(obj)), r)
for obj, r in objs]
class ListSearchResults(SearchResults):
"""
@ -1827,19 +1853,26 @@ class ListSearchResults(SearchResults):
tuple.
"""
def set_objects(self, results):
if self.fields:
getter = itemgetter(*self.fields)
objs = [(getter(r['fields']), r) for r in results]
def listify(values):
return [(val if isinstance(val, list) else [val])
for val in values]
# itemgetter returns an item--not a tuple of one item--if
# there is only one thing in self.fields. Since we want
# this to always return a list of tuples, we need to fix
# that case here.
if len(self.fields) == 1:
objs = [((obj,), r) for obj, r in objs]
if results:
if 'fields' in results[0]:
objs = [(r['fields'].values(), r) for r in results]
elif '_source' in results[0]:
objs = [(r['_source'].values(), r) for r in results]
else:
# No fields and no source, so we just return _id and
# _type.
objs = [((r['_id'], r['_type']), r) for r in results]
else:
objs = [(r['_source'].values(), r) for r in results]
self.objects = [decorate_with_metadata(TupleResult(obj), r)
objs = []
# Decorate with metadata and listify values
self.objects = [decorate_with_metadata(TupleResult(listify(obj)), r)
for obj, r in objs]

Просмотреть файл

@ -53,7 +53,7 @@ class ESTestCase(TestCase):
# Note: TestCase has no setup_class, so we don't call super()
# here.
cls.cleanup_index()
cls.create_index(settings={'mappings': cls.mapping})
cls.create_index(mappings=cls.mapping)
if cls.data:
cls.index_data(cls.data)
cls.refresh()
@ -98,18 +98,16 @@ class ESTestCase(TestCase):
.doctypes(cls.mapping_type_name))
@classmethod
def create_index(cls, settings=None):
def create_index(cls, **kwargs):
"""Creates an index with specified settings
Uses ``cls.index_name`` as the index to create.
:arg settings: Any additional settings to use to create the
index.
:arg kwargs: Any additional args to put in the body like
"settings", "mappings", etc.
"""
body = {}
if settings:
body['settings'] = settings
body = kwargs if kwargs else {}
cls.get_es().indices.create(index=cls.index_name, body=body)
@classmethod

Просмотреть файл

@ -576,7 +576,7 @@ class QueryTest(ESTestCase):
# Test caching of empty results
try:
self.teardown_class()
self.create_index(settings={'mappings': self.mapping})
self.create_index(mappings=self.mapping)
self.refresh()
s = self.get_s()
@ -1501,6 +1501,64 @@ class SearchTypeTest(ESTestCase):
eq_(len(s[:1]), 2)
class ValuesTest(ESTestCase):
def test_values_list_chaining(self):
s = self.get_s()
s = s.values_list()
eqish_(s.build_search(),
{
'fields': ['*']
})
s = s.values_list('id')
eqish_(s.build_search(),
{
'fields': ['id']
})
# Add new fields
s = s.values_list('name', 'title')
eqish_(s.build_search(),
{
'fields': ['id', 'name', 'title']
})
# Fields don't show up more than once
s = s.values_list('id')
eqish_(s.build_search(),
{
'fields': ['id', 'name', 'title']
})
def test_values_dict_chaining(self):
s = self.get_s()
s = s.values_dict()
eqish_(s.build_search(),
{
'fields': ['*']
})
s = s.values_dict('id')
eqish_(s.build_search(),
{
'fields': ['id']
})
# Add new fields
s = s.values_dict('name', 'title')
eqish_(s.build_search(),
{
'fields': ['id', 'name', 'title']
})
# Fields don't show up more than once
s = s.values_dict('id')
eqish_(s.build_search(),
{
'fields': ['id', 'name', 'title']
})
class SuggestionTest(ESTestCase):
data = [
{'id': 1, 'name': 'bar'},

Просмотреть файл

@ -76,26 +76,45 @@ class TestResultsWithData(ESTestCase):
searcher = list(self.get_s(FakeMappingType).query(foo='bar'))
assert isinstance(searcher[0], FakeMappingType)
def test_values_dict_results(self):
def test_values_dict_no_fields(self):
"""With values_dict, return list of dicts."""
searcher = list(self.get_s().query(foo='bar').values_dict())
assert isinstance(searcher[0], dict)
def test_values_dict_results(self):
"""With values_dict, return list of dicts."""
searcher = list(self.get_s()
.query(foo='bar')
.values_dict('foo', 'width'))
assert isinstance(searcher[0], dict)
eq_(
sorted(searcher[0].items()),
sorted([(u'foo', [u'bar']), (u'width', [u'2'])])
)
def test_values_list_no_fields(self):
"""Specifying no fields with values_list defaults to ['id']."""
searcher = list(self.get_s().query(foo='bar').values_list())
"""Specifying no fields with values_list returns what's stored."""
searcher = list(self.get_s()
.query(foo='bar')
.values_list())
assert isinstance(searcher[0], tuple)
# We sort the result and expected result here so that the
# order is stable and comparable.
eq_(
sorted(searcher[0], key=str),
sorted((u'2', u'bar', u'awesome', 1), key=str))
sorted(searcher[0]),
sorted([[u'1'], [u'elasticutilsmappingtype']])
)
def test_values_list_results(self):
"""With values_list fields, returns list of tuples."""
searcher = list(self.get_s().query(foo='bar')
.values_list('foo', 'width'))
searcher = list(self.get_s()
.query(foo='bar')
.values_list('foo', 'width'))
assert isinstance(searcher[0], tuple)
eq_(
sorted(searcher[0]),
sorted(([u'2'], [u'bar']))
)
def test_default_results_form_has_metadata(self):
"""Test default results form has metadata."""
@ -135,14 +154,126 @@ class TestResultsWithData(ESTestCase):
eq_(S().query(fld1=2)
.values_dict()
.build_search(),
{"query": {"term": {"fld1": 2}}})
{"query": {"term": {"fld1": 2}}, 'fields': ['*']})
def test_values_list_no_args(self):
"""Calling values() with no args fetches only id."""
"""Calling values_list() with no args fetches all fields."""
eq_(S().query(fld1=2)
.values_list()
.build_search(),
{'query': {"term": {"fld1": 2}}})
{'query': {"term": {"fld1": 2}}, 'fields': ['*']})
class TestResultsWithStoredFields(ESTestCase):
def test_values_list_no_args_no_stored_fields(self):
# If there are no fields specified in the values_list() call
# and no stored fields for that document, then we pass in
# fields=['*'] and ES returns nothing, so we return the _id
# and _type.
self.cleanup_index()
self.create_index(
mappings={
self.mapping_type_name: {
'id': {'type': 'integer'},
'name': {'type': 'string'},
'weight': {'type': 'integer'},
}
}
)
data = [
{'id': 1, 'name': 'bob', 'weight': 40}
]
self.index_data(data)
self.refresh()
results = list(self.get_s().values_list())
eq_(sorted(results[0], key=repr),
# Note: This is the _id of the document--not the "id" in
# the document.
sorted(([u'1'], [u'elasticutilsmappingtype']), key=repr)
)
def test_values_list_no_args_with_stored_fields(self):
# If there are no fields specified, then ES returns the fields
# marked as stored.
self.cleanup_index()
self.create_index(
mappings={
self.mapping_type_name: {
'properties': {
'id': {'type': 'integer', 'store': True},
'name': {'type': 'string', 'store': True},
'weight': {'type': 'integer'},
}
}
}
)
data = [
{'id': 1, 'name': 'bob', 'weight': 40}
]
self.index_data(data)
self.refresh()
results = list(self.get_s().values_list())
eq_(sorted(results[0], key=repr),
sorted(([1], [u'bob']), key=repr)
)
def test_values_dict_no_args_no_stored_fields(self):
self.cleanup_index()
self.create_index(
mappings={
self.mapping_type_name: {
'id': {'type': 'integer'},
'name': {'type': 'string'},
'weight': {'type': 'integer'},
}
}
)
data = [
{'id': 1, 'name': 'bob', 'weight': 40}
]
self.index_data(data)
self.refresh()
results = list(self.get_s().values_dict())
eq_(sorted(results[0].items()),
# Note: This is the _id of the document--not the "id" in
# the document.
sorted([('_id', [u'1']), ('_type', [u'elasticutilsmappingtype'])])
)
def test_values_dict_no_args_with_stored_fields(self):
# If there are no fields specified, then ES returns the fields
# marked as stored.
self.cleanup_index()
self.create_index(
mappings={
self.mapping_type_name: {
'properties': {
'id': {'type': 'integer', 'store': True},
'name': {'type': 'string', 'store': True},
'weight': {'type': 'integer'},
}
}
}
)
data = [
{'id': 1, 'name': 'bob', 'weight': 40}
]
self.index_data(data)
self.refresh()
results = list(self.get_s().values_dict())
eq_(sorted(results[0].items()),
sorted([(u'id', [1]), (u'name', [u'bob'])])
)
class TestFakeMappingType(ESTestCase):
@ -175,13 +306,11 @@ class TestResultsWithDates(ESTestCase):
"""Datetime strings in ES results get converted to Python datetimes"""
self.cleanup_index()
self.create_index(
settings={
'mappings': {
self.mapping_type_name: {
'id': {'type': 'integer'},
'bday': {'type': 'date', 'format': 'YYYY-mm-dd'},
'btime': {'type': 'date'}
}
mappings={
self.mapping_type_name: {
'id': {'type': 'integer'},
'bday': {'type': 'date', 'format': 'YYYY-mm-dd'},
'btime': {'type': 'date'}
}
}
)
@ -193,36 +322,34 @@ class TestResultsWithDates(ESTestCase):
self.index_data(data)
self.refresh()
results = list(self.get_s().values_dict())
results = list(self.get_s().values_dict('id', 'bday', 'btime'))
eq_(results,
[{u'bday': datetime(2012, 12, 1, 0, 0),
u'btime': datetime(2012, 12, 1, 12, 0),
u'id': 1}]
[{u'bday': [datetime(2012, 12, 1, 0, 0)],
u'btime': [datetime(2012, 12, 1, 12, 0)],
u'id': [1]}]
)
def test_dates_lookalikes(self):
"""Datetime strings in ES results get converted to Python datetimes"""
self.cleanup_index()
self.create_index(
settings={
'mappings': {
self.mapping_type_name: {
'id': {'type': 'integer'},
'bday': {'type': 'string', 'analyzer': 'keyword'}
}
mappings={
self.mapping_type_name: {
'id': {'type': 'integer'},
'bday': {'type': 'string', 'analyzer': 'keyword'}
}
}
)
data = [
{'id': 1, 'bday': 'xxxx-xx-xxTxx:xx:xx'}
{'id': [1], 'bday': ['xxxx-xx-xxTxx:xx:xx']}
]
self.index_data(data)
self.refresh()
results = list(self.get_s().values_dict())
results = list(self.get_s().values_dict('id', 'bday'))
eq_(results,
[{u'id': 1, u'bday': u'xxxx-xx-xxTxx:xx:xx'}]
[{u'id': [1], u'bday': [u'xxxx-xx-xxTxx:xx:xx']}]
)