super-rad automatic caching and invalidation

This commit is contained in:
Jeff Balogh 2009-12-22 22:56:02 -05:00
Родитель 028f7ced30
Коммит be13194d2c
9 изменённых файлов: 397 добавлений и 0 удалений

201
apps/caching/__init__.py Normal file
Просмотреть файл

@ -0,0 +1,201 @@
"""
To enable caching for a model, add the :class:`~caching.CachingManager` to that
class. If you want related (foreign key) lookups to hit the cache,
``CachingManager`` must be the default manager. If you have multiple managers
that should be cached, return a :class:`~caching.CachingQuerySet` from the
other manager's ``get_query_set`` method instead of subclassing
``CachingManager``, since that would hook up the post_save and post_delete
signals multiple times.
Whenever you run a query, ``CachingQuerySet`` will try to find that query in
the cache. Queries are keyed by ``{locale}:{sql}``. If it's there, we return
the cached result set and everyone is happy. If the query isn't in the cache,
the normal codepath to run a database query is executed. As the objects in the
result set are iterated over, they are added to a list that will get cached
once iteration is done.
.. note::
Nothing will be cached if the QuerySet is not iterated through completely.
To support easy cache invalidation, we use "flush lists" to mark the cached
queries an object belongs to. That way, all queries where an object was found
will be invalidated when that object changes. Flush lists map an object key to
a list of query keys.
When an object is saved or deleted, all query keys in its flush list will be
deleted. In addition, the flush lists of its foreign key relations will be
cleared. To avoid stale foreign key relations, any cached objects will be
flushed when the object their foreign key points to is invalidated.
During cache invalidation, we explicitly set a None value instead of just
deleting so we don't have any race condtions where:
* Thread 1 -> Cache miss, get object from DB
* Thread 2 -> Object saved, deleted from cache
* Thread 1 -> Store (stale) object fetched from DB in cache
The foundations of this module were derived from `Mike Malone's`_
`django-caching`_.
.. _`Mike Malone's`: http://immike.net/
.. _django-caching: http://github.com/mmalone/django-caching/
"""
import hashlib
import logging
from django.conf import settings
from django.db import models
from django.db.models import signals
from django.db.models.sql import query
from django.utils import translation, encoding
from .backends import cache
FOREVER = 0
log = logging.getLogger('z.caching')
class CachingManager(models.Manager):
# Tell Django to use this manager when resolving foreign keys.
use_for_related_fields = True
def get_query_set(self):
return CachingQuerySet(self.model)
def contribute_to_class(self, cls, name):
signals.post_save.connect(self.post_save, sender=cls)
signals.post_delete.connect(self.post_delete, sender=cls)
return super(CachingManager, self).contribute_to_class(cls, name)
def post_save(self, instance, **kwargs):
log.debug('post_save signal for %s' % instance)
self.invalidate(instance)
def post_delete(self, instance, **kwargs):
log.debug('post_delete signal for %s' % instance)
self.invalidate(instance)
def invalidate(self, obj):
keys = [key for key in map(flush_key, obj._cache_keys())]
# Add other flush keys from the lists, which happens when a parent
# object includes a foreign key.
for flush_list in cache.get_many(*keys):
if flush_list is not None:
keys.extend(k for k in flush_list if k.startswith('flush:'))
flush = []
for flush_list in cache.get_many(*keys):
if flush_list is not None:
flush.extend(flush_list)
log.debug('invalidating %s' % keys)
log.debug('flushing %s' % flush)
cache.set_many(dict((k, None) for k in flush), 5)
cache.delete_many(*keys)
class CachingQuerySet(models.query.QuerySet):
def iterator(self):
try:
query_key = self._query_key()
except query.EmptyResultSet:
raise StopIteration
# Try to fetch from the cache.
cached = cache.get(query_key)
if cached is not None:
log.debug('cache hit: %s' % query_key)
for obj in cached:
obj.from_cache = True
yield obj
return
# Do the database query, cache it once we have all the objects.
superiter = super(CachingQuerySet, self).iterator()
to_cache = []
try:
while True:
obj = superiter.next()
obj.from_cache = False
to_cache.append(obj)
yield obj
except StopIteration:
self._cache_objects(to_cache)
raise
def _query_key(self):
"""Generate a cache key for this QuerySet."""
lang = translation.get_language()
key = '%s:%s' % (lang, self.query)
# memcached keys must be < 250 bytes and w/o whitespace, but it's nice
# to see the keys when using locmem.
if cache.scheme == 'memcached':
return hashlib.md5(key).hexdigest()
else:
return key
def _cache_objects(self, objects):
"""Cache query_key => objects, then update the flush lists."""
# Adding to the flush lists has a race condition: if simultaneous
# processes are adding to the same list, one of the query keys will be
# dropped. Using redis would be safer.
def add_to_flush_list(flush_keys, new_key):
"""Add new_key to all the flush lists keyed by flush_keys."""
flush_lists = cache.get_dict(*flush_keys)
for key, list_ in flush_lists.items():
if list_ is None:
flush_lists[key] = [new_key]
else:
list_.append(new_key)
cache.set_many(flush_lists)
query_key = self._query_key()
cache.add(query_key, objects, settings.CACHE_DURATION)
flush_keys = map(flush_key, objects)
add_to_flush_list(flush_keys, query_key)
for obj in objects:
obj_flush = flush_key(obj)
keys = map(flush_key, obj._cache_keys())
keys.remove(obj_flush)
add_to_flush_list(keys, obj_flush)
def flush_key(obj):
"""We put flush lists in the flush: namespace."""
key = obj if isinstance(obj, basestring) else obj.cache_key
return 'flush:%s' % key
class CachingMixin:
@property
def cache_key(self):
"""Return a cache key based on the object's primary key."""
return self._cache_key(self.pk)
@classmethod
def _cache_key(cls, pk):
"""
Return a string that uniquely identifies the object.
For the Addon class, with a pk of 2, we get "o:addons.addon:2".
"""
key_parts = ('o', cls._meta, pk)
return ':'.join(map(encoding.smart_unicode, key_parts))
def _cache_keys(self):
"""Return the cache key for self plus all related foreign keys."""
fks = dict((f, getattr(self, f.attname)) for f in self._meta.fields
if isinstance(f, models.ForeignKey))
keys = [fk.rel.to._cache_key(val) for fk, val in fks.items()
if val is not None]
return (self.cache_key,) + tuple(keys)

64
apps/caching/backends.py Normal file
Просмотреть файл

@ -0,0 +1,64 @@
from django.conf import settings
from django.core.cache import parse_backend_uri
from werkzeug.contrib import cache as wcache
BACKENDS = {
'memcached': wcache.MemcachedCache,
'locmem': wcache.SimpleCache,
'file': wcache.FileSystemCache,
'dummy': wcache.NullCache,
}
# Set up the cache using Django's URI scheme.
scheme, host, params = parse_backend_uri(settings.CACHE_BACKEND)
if host:
cache = BACKENDS[scheme](host.split(';'), **params)
else:
cache = BACKENDS[scheme](**params)
cache.scheme = scheme
class CacheFixer(cache.__class__):
def get(self, key, default=None):
# Werkzeug's get doesn't have a default.
val = super(CacheFixer, self).get(key)
return default if val is None else val
# Werkzeug's non-memcached backends don't handle a timeout of 0 correctly.
# In memcached, the object is cached forever, while the other backends will
# expire it immediately. We introduce Infinity to cache things forever.
if not isinstance(cache, wcache.MemcachedCache):
def add(self, key, value, timeout=None):
if timeout == 0:
timeout = Infinity
return super(CacheFixer, self).add(key, value, timeout)
def set(self, key, value, timeout=None):
if timeout == 0:
timeout = Infinity
return super(CacheFixer, self).set(key, value, timeout)
cache.__class__ = CacheFixer
class _Infinity(object):
"""Always compares greater than numbers."""
def __radd__(self, _):
return self
def __cmp__(self, o):
return 0 if self is o else 1
def __repr__(self):
return 'Infinity'
Infinity = _Infinity()
del _Infinity

Просмотреть файл

Просмотреть файл

@ -0,0 +1,70 @@
from nose.tools import eq_
from test_utils import ExtraAppTestCase
import caching
from testapp.models import Addon, User
class CachingTestCase(ExtraAppTestCase):
fixtures = ['testapp/test_cache.json']
extra_apps = ['caching.tests.testapp']
def setUp(self):
caching.cache.clear()
def test_flush_key(self):
"""flush_key should work for objects or strings."""
a = Addon.objects.get(id=1)
eq_(caching.flush_key(a), 'flush:%s' % a.cache_key)
eq_(caching.flush_key(a.cache_key), caching.flush_key(a))
def test_cache_key(self):
a = Addon.objects.get(id=1)
eq_(a.cache_key, 'o:testapp.addon:1')
eq_(a._cache_keys(), (a.cache_key, a.author1.cache_key,
a.author2.cache_key))
def test_cache(self):
"""Basic cache test: second get comes from cache."""
assert Addon.objects.get(id=1).from_cache is False
assert Addon.objects.get(id=1).from_cache is True
def test_invalidation(self):
assert Addon.objects.get(id=1).from_cache is False
a = [x for x in Addon.objects.all() if x.id == 1][0]
assert a.from_cache is False
assert Addon.objects.get(id=1).from_cache is True
a = [x for x in Addon.objects.all() if x.id == 1][0]
assert a.from_cache is True
a.save()
assert Addon.objects.get(id=1).from_cache is False
a = [x for x in Addon.objects.all() if x.id == 1][0]
assert a.from_cache is False
def test_fk_invalidation(self):
"""When an object is invalidated, its foreign keys get invalidated."""
a = Addon.objects.get(id=1)
assert User.objects.get(name='clouseroo').from_cache is False
a.save()
assert User.objects.get(name='clouseroo').from_cache is False
def test_fk_parent_invalidation(self):
"""When a foreign key changes, any parent objects get invalidated."""
assert Addon.objects.get(id=1).from_cache is False
a = Addon.objects.get(id=1)
assert a.from_cache is True
u = User.objects.get(id=a.author1.id)
assert u.from_cache is True
u.name = 'fffuuu'
u.save()
assert User.objects.get(id=a.author1.id).from_cache is False
a = Addon.objects.get(id=1)
assert a.from_cache is False
eq_(a.author1.name, 'fffuuu')

Просмотреть файл

Просмотреть файл

@ -0,0 +1,25 @@
[
{
"pk": 1,
"model": "testapp.user",
"fields": {
"name": "fliggy"
}
},
{
"pk": 2,
"model": "testapp.user",
"fields": {
"name": "clouseroo"
}
},
{
"pk": 1,
"model": "testapp.addon",
"fields": {
"author2": 1,
"author1": 2,
"val": 42
}
}
]

Просмотреть файл

@ -0,0 +1,17 @@
from django.db import models
from caching import CachingMixin, CachingManager
class User(CachingMixin, models.Model):
name = models.CharField(max_length=30)
objects = CachingManager()
class Addon(CachingMixin, models.Model):
val = models.IntegerField()
author1 = models.ForeignKey(User)
author2 = models.ForeignKey(User, related_name='author2_set')
objects = CachingManager()

18
docs/topics/caching.rst Normal file
Просмотреть файл

@ -0,0 +1,18 @@
.. _caching:
=============
Cache is King
=============
.. automodule:: caching
.. class:: caching.CachingManager
This :class:`manager <django.db.models.Manager>` always returns a
:class:`~caching.CachingQuerySet`, and hooks up ``post_save`` and
``post_delete`` signals to invalidate caches.
.. class:: caching.CachingQuerySet
Overrides the default :class:`~django.db.models.QuerySet` to fetch objects
from cache before hitting the database.

Просмотреть файл

@ -122,3 +122,5 @@ SUPPORTED_APPS = ('firefox', 'thunderbird', 'mobile', 'seamonkey',
SUPPORTED_NONAPPS = ('admin', 'developers', 'editors', 'localizers',
'statistics', )
DEFAULT_APP = 'firefox'
CACHE_DURATION = 60 # seconds