From d8c712ea471ce7a4fd1734ad2211adf8469ddddc Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Thu, 31 Jul 2014 09:07:19 -0700 Subject: [PATCH 1/2] dm bufio: fully initialize shrinker 1d3d4437eae1 ("vmscan: per-node deferred work") added a flags field to struct shrinker assuming that all shrinkers were zero filled. The dm bufio shrinker is not zero filled, which leaves arbitrary kmalloc() data in flags. So far the only defined flags bit is SHRINKER_NUMA_AWARE. But there are proposed patches which add other bits to shrinker.flags (e.g. memcg awareness). Rather than simply initializing the shrinker, this patch uses kzalloc() when allocating the dm_bufio_client to ensure that the embedded shrinker and any other similar structures are zeroed. This fixes theoretical over aggressive shrinking of dm bufio objects. If the uninitialized dm_bufio_client.shrinker.flags contains SHRINKER_NUMA_AWARE then shrink_slab() would call the dm shrinker for each numa node rather than just once. This has been broken since 3.12. Signed-off-by: Greg Thelen Acked-by: Mikulas Patocka Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # v3.12+ --- drivers/md/dm-bufio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 4e84095833db..d724459860d9 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1541,7 +1541,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign BUG_ON(block_size < 1 << SECTOR_SHIFT || (block_size & (block_size - 1))); - c = kmalloc(sizeof(*c), GFP_KERNEL); + c = kzalloc(sizeof(*c), GFP_KERNEL); if (!c) { r = -ENOMEM; goto bad_client; From 44fa816bb778edbab6b6ddaaf24908dd6295937e Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Fri, 1 Aug 2014 11:55:47 -0400 Subject: [PATCH 2/2] dm cache: fix race affecting dirty block count nr_dirty is updated without locking, causing it to drift so that it is non-zero (either a small positive integer, or a very large one when an underflow occurs) even when there are no actual dirty blocks. This was due to a race between the workqueue and map function accessing nr_dirty in parallel without proper protection. People were seeing under runs due to a race on increment/decrement of nr_dirty, see: https://lkml.org/lkml/2014/6/3/648 Fix this by using an atomic_t for nr_dirty. Reported-by: roma1390@gmail.com Signed-off-by: Anssi Hannula Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org --- drivers/md/dm-cache-target.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 5f054c44b485..2c63326638b6 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -231,7 +231,7 @@ struct cache { /* * cache_size entries, dirty if set */ - dm_cblock_t nr_dirty; + atomic_t nr_dirty; unsigned long *dirty_bitset; /* @@ -492,7 +492,7 @@ static bool is_dirty(struct cache *cache, dm_cblock_t b) static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) { if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) { - cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) + 1); + atomic_inc(&cache->nr_dirty); policy_set_dirty(cache->policy, oblock); } } @@ -501,8 +501,7 @@ static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cbl { if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) { policy_clear_dirty(cache->policy, oblock); - cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) - 1); - if (!from_cblock(cache->nr_dirty)) + if (atomic_dec_return(&cache->nr_dirty) == 0) dm_table_event(cache->ti->table); } } @@ -2269,7 +2268,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) atomic_set(&cache->quiescing_ack, 0); r = -ENOMEM; - cache->nr_dirty = 0; + atomic_set(&cache->nr_dirty, 0); cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size)); if (!cache->dirty_bitset) { *error = "could not allocate dirty bitset"; @@ -2808,7 +2807,7 @@ static void cache_status(struct dm_target *ti, status_type_t type, residency = policy_residency(cache->policy); - DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %llu ", + DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ", (unsigned)(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT), (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), (unsigned long long)nr_blocks_metadata, @@ -2821,7 +2820,7 @@ static void cache_status(struct dm_target *ti, status_type_t type, (unsigned) atomic_read(&cache->stats.write_miss), (unsigned) atomic_read(&cache->stats.demotion), (unsigned) atomic_read(&cache->stats.promotion), - (unsigned long long) from_cblock(cache->nr_dirty)); + (unsigned long) atomic_read(&cache->nr_dirty)); if (writethrough_mode(&cache->features)) DMEMIT("1 writethrough ");