radix-tree: delete radix_tree_range_tag_if_tagged()

This is an exceptionally complicated function with just one caller
(tag_pages_for_writeback).  We devote a large portion of the runtime of
the test suite to testing this one function which has one caller.  By
introducing the new function radix_tree_iter_tag_set(), we can eliminate
all of the complexity while keeping the performance.  The caller can now
use a fairly standard radix_tree_for_each() loop, and it doesn't need to
worry about tricksy things like 'start' wrapping.

The test suite continues to spend a large amount of time investigating
this function, but now it's testing the underlying primitives such as
radix_tree_iter_resume() and the radix_tree_for_each_tagged() iterator
which are also used by other parts of the kernel.

Link: http://lkml.kernel.org/r/1480369871-5271-57-git-send-email-mawilcox@linuxonhyperv.com
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Tested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Matthew Wilcox 2016-12-14 15:08:55 -08:00 коммит произвёл Linus Torvalds
Родитель 478922e2b0
Коммит 268f42de71
9 изменённых файлов: 124 добавлений и 162 удалений

Просмотреть файл

@ -120,6 +120,41 @@ static inline bool radix_tree_empty(struct radix_tree_root *root)
return root->rnode == NULL;
}
/**
* struct radix_tree_iter - radix tree iterator state
*
* @index: index of current slot
* @next_index: one beyond the last index for this chunk
* @tags: bit-mask for tag-iterating
* @node: node that contains current slot
* @shift: shift for the node that holds our slots
*
* This radix tree iterator works in terms of "chunks" of slots. A chunk is a
* subinterval of slots contained within one radix tree leaf node. It is
* described by a pointer to its first slot and a struct radix_tree_iter
* which holds the chunk's position in the tree and its size. For tagged
* iteration radix_tree_iter also holds the slots' bit-mask for one chosen
* radix tree tag.
*/
struct radix_tree_iter {
unsigned long index;
unsigned long next_index;
unsigned long tags;
struct radix_tree_node *node;
#ifdef CONFIG_RADIX_TREE_MULTIORDER
unsigned int shift;
#endif
};
static inline unsigned int iter_shift(const struct radix_tree_iter *iter)
{
#ifdef CONFIG_RADIX_TREE_MULTIORDER
return iter->shift;
#else
return 0;
#endif
}
/**
* Radix-tree synchronization
*
@ -283,6 +318,8 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
unsigned long index, unsigned int tag);
int radix_tree_tag_get(struct radix_tree_root *root,
unsigned long index, unsigned int tag);
void radix_tree_iter_tag_set(struct radix_tree_root *root,
const struct radix_tree_iter *iter, unsigned int tag);
unsigned int
radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
unsigned long first_index, unsigned int max_items,
@ -291,10 +328,6 @@ unsigned int
radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
unsigned long first_index, unsigned int max_items,
unsigned int tag);
unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
unsigned long *first_indexp, unsigned long last_index,
unsigned long nr_to_tag,
unsigned int fromtag, unsigned int totag);
int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
static inline void radix_tree_preload_end(void)
@ -302,39 +335,6 @@ static inline void radix_tree_preload_end(void)
preempt_enable();
}
/**
* struct radix_tree_iter - radix tree iterator state
*
* @index: index of current slot
* @next_index: one beyond the last index for this chunk
* @tags: bit-mask for tag-iterating
* @shift: shift for the node that holds our slots
*
* This radix tree iterator works in terms of "chunks" of slots. A chunk is a
* subinterval of slots contained within one radix tree leaf node. It is
* described by a pointer to its first slot and a struct radix_tree_iter
* which holds the chunk's position in the tree and its size. For tagged
* iteration radix_tree_iter also holds the slots' bit-mask for one chosen
* radix tree tag.
*/
struct radix_tree_iter {
unsigned long index;
unsigned long next_index;
unsigned long tags;
#ifdef CONFIG_RADIX_TREE_MULTIORDER
unsigned int shift;
#endif
};
static inline unsigned int iter_shift(struct radix_tree_iter *iter)
{
#ifdef CONFIG_RADIX_TREE_MULTIORDER
return iter->shift;
#else
return 0;
#endif
}
#define RADIX_TREE_ITER_TAG_MASK 0x00FF /* tag index in lower byte */
#define RADIX_TREE_ITER_TAGGED 0x0100 /* lookup tagged slots */
#define RADIX_TREE_ITER_CONTIG 0x0200 /* stop at first hole */

Просмотреть файл

@ -219,6 +219,11 @@ radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag,
return RADIX_TREE_MAP_SIZE;
}
static unsigned int iter_offset(const struct radix_tree_iter *iter)
{
return (iter->index >> iter_shift(iter)) & RADIX_TREE_MAP_MASK;
}
/*
* The maximum index which can be stored in a radix tree
*/
@ -1014,6 +1019,18 @@ static void node_tag_set(struct radix_tree_root *root,
root_tag_set(root, tag);
}
/**
* radix_tree_iter_tag_set - set a tag on the current iterator entry
* @root: radix tree root
* @iter: iterator state
* @tag: tag to set
*/
void radix_tree_iter_tag_set(struct radix_tree_root *root,
const struct radix_tree_iter *iter, unsigned int tag)
{
node_tag_set(root, iter->node, tag, iter_offset(iter));
}
/**
* radix_tree_tag_clear - clear a tag on a radix tree node
* @root: radix tree root
@ -1164,6 +1181,7 @@ void ** __radix_tree_next_slot(void **slot, struct radix_tree_iter *iter,
if (node == RADIX_TREE_RETRY)
return slot;
node = entry_to_node(node);
iter->node = node;
iter->shift = node->shift;
if (flags & RADIX_TREE_ITER_TAGGED) {
@ -1266,6 +1284,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
iter->index = index;
iter->next_index = maxindex + 1;
iter->tags = 1;
iter->node = NULL;
__set_iter_shift(iter, 0);
return (void **)&root->rnode;
}
@ -1308,6 +1327,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
/* Update the iterator state */
iter->index = (index &~ node_maxindex(node)) | (offset << node->shift);
iter->next_index = (index | node_maxindex(node)) + 1;
iter->node = node;
__set_iter_shift(iter, node->shift);
if (flags & RADIX_TREE_ITER_TAGGED)
@ -1317,103 +1337,6 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
}
EXPORT_SYMBOL(radix_tree_next_chunk);
/**
* radix_tree_range_tag_if_tagged - for each item in given range set given
* tag if item has another tag set
* @root: radix tree root
* @first_indexp: pointer to a starting index of a range to scan
* @last_index: last index of a range to scan
* @nr_to_tag: maximum number items to tag
* @iftag: tag index to test
* @settag: tag index to set if tested tag is set
*
* This function scans range of radix tree from first_index to last_index
* (inclusive). For each item in the range if iftag is set, the function sets
* also settag. The function stops either after tagging nr_to_tag items or
* after reaching last_index.
*
* The tags must be set from the leaf level only and propagated back up the
* path to the root. We must do this so that we resolve the full path before
* setting any tags on intermediate nodes. If we set tags as we descend, then
* we can get to the leaf node and find that the index that has the iftag
* set is outside the range we are scanning. This reults in dangling tags and
* can lead to problems with later tag operations (e.g. livelocks on lookups).
*
* The function returns the number of leaves where the tag was set and sets
* *first_indexp to the first unscanned index.
* WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must
* be prepared to handle that.
*/
unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
unsigned long *first_indexp, unsigned long last_index,
unsigned long nr_to_tag,
unsigned int iftag, unsigned int settag)
{
struct radix_tree_node *node, *child;
unsigned long maxindex;
unsigned long tagged = 0;
unsigned long index = *first_indexp;
radix_tree_load_root(root, &child, &maxindex);
last_index = min(last_index, maxindex);
if (index > last_index)
return 0;
if (!nr_to_tag)
return 0;
if (!root_tag_get(root, iftag)) {
*first_indexp = last_index + 1;
return 0;
}
if (!radix_tree_is_internal_node(child)) {
*first_indexp = last_index + 1;
root_tag_set(root, settag);
return 1;
}
node = entry_to_node(child);
for (;;) {
unsigned offset = radix_tree_descend(node, &child, index);
if (!child)
goto next;
if (!tag_get(node, iftag, offset))
goto next;
/* Sibling slots never have tags set on them */
if (radix_tree_is_internal_node(child)) {
node = entry_to_node(child);
continue;
}
tagged++;
node_tag_set(root, node, settag, offset);
next:
/* Go to next entry in node */
index = ((index >> node->shift) + 1) << node->shift;
/* Overflow can happen when last_index is ~0UL... */
if (index > last_index || !index)
break;
offset = (index >> node->shift) & RADIX_TREE_MAP_MASK;
while (offset == 0) {
/*
* We've fully scanned this node. Go up. Because
* last_index is guaranteed to be in the tree, what
* we do below cannot wander astray.
*/
node = node->parent;
offset = (index >> node->shift) & RADIX_TREE_MAP_MASK;
}
if (is_sibling_entry(node, node->slots[offset]))
goto next;
if (tagged >= nr_to_tag)
break;
}
*first_indexp = index;
return tagged;
}
EXPORT_SYMBOL(radix_tree_range_tag_if_tagged);
/**
* radix_tree_gang_lookup - perform multiple lookup on a radix tree
* @root: radix tree root

Просмотреть файл

@ -2106,18 +2106,26 @@ void tag_pages_for_writeback(struct address_space *mapping,
pgoff_t start, pgoff_t end)
{
#define WRITEBACK_TAG_BATCH 4096
unsigned long tagged;
unsigned long tagged = 0;
struct radix_tree_iter iter;
void **slot;
do {
spin_lock_irq(&mapping->tree_lock);
tagged = radix_tree_range_tag_if_tagged(&mapping->page_tree,
&start, end, WRITEBACK_TAG_BATCH,
PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE);
spin_lock_irq(&mapping->tree_lock);
radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, start,
PAGECACHE_TAG_DIRTY) {
if (iter.index > end)
break;
radix_tree_iter_tag_set(&mapping->page_tree, &iter,
PAGECACHE_TAG_TOWRITE);
tagged++;
if ((tagged % WRITEBACK_TAG_BATCH) != 0)
continue;
slot = radix_tree_iter_resume(slot, &iter);
spin_unlock_irq(&mapping->tree_lock);
WARN_ON_ONCE(tagged > WRITEBACK_TAG_BATCH);
cond_resched();
/* We check 'start' to handle wrapping when end == ~0UL */
} while (tagged >= WRITEBACK_TAG_BATCH && start);
spin_lock_irq(&mapping->tree_lock);
}
spin_unlock_irq(&mapping->tree_lock);
}
EXPORT_SYMBOL(tag_pages_for_writeback);

Просмотреть файл

@ -205,8 +205,7 @@ void copy_tag_check(void)
}
// printf("\ncopying tags...\n");
cur = start;
tagged = radix_tree_range_tag_if_tagged(&tree, &cur, end, ITEMS, 0, 1);
tagged = tag_tagged_items(&tree, NULL, start, end, ITEMS, 0, 1);
// printf("checking copied tags\n");
assert(tagged == count);
@ -214,16 +213,13 @@ void copy_tag_check(void)
/* Copy tags in several rounds */
// printf("\ncopying tags...\n");
cur = start;
do {
tmp = rand() % (count/10+2);
tagged = radix_tree_range_tag_if_tagged(&tree, &cur, end, tmp, 0, 2);
} while (tmp == tagged);
tmp = rand() % (count / 10 + 2);
tagged = tag_tagged_items(&tree, NULL, start, end, tmp, 0, 2);
assert(tagged == count);
// printf("%lu %lu %lu\n", tagged, tmp, count);
// printf("checking copied tags\n");
check_copied_tags(&tree, start, end, idx, ITEMS, 0, 2);
assert(tagged < tmp);
verify_tag_consistency(&tree, 0);
verify_tag_consistency(&tree, 1);
verify_tag_consistency(&tree, 2);

Просмотреть файл

@ -26,7 +26,6 @@ static void __multiorder_tag_test(int index, int order)
{
RADIX_TREE(tree, GFP_KERNEL);
int base, err, i;
unsigned long first = 0;
/* our canonical entry */
base = index & ~((1 << order) - 1);
@ -60,7 +59,7 @@ static void __multiorder_tag_test(int index, int order)
assert(!radix_tree_tag_get(&tree, i, 1));
}
assert(radix_tree_range_tag_if_tagged(&tree, &first, ~0UL, 10, 0, 1) == 1);
assert(tag_tagged_items(&tree, NULL, 0, ~0UL, 10, 0, 1) == 1);
assert(radix_tree_tag_clear(&tree, index, 0));
for_each_index(i, base, order) {
@ -251,7 +250,6 @@ void multiorder_tagged_iteration(void)
RADIX_TREE(tree, GFP_KERNEL);
struct radix_tree_iter iter;
void **slot;
unsigned long first = 0;
int i, j;
printf("Multiorder tagged iteration test\n");
@ -296,8 +294,8 @@ void multiorder_tagged_iteration(void)
}
}
radix_tree_range_tag_if_tagged(&tree, &first, ~0UL,
MT_NUM_ENTRIES, 1, 2);
assert(tag_tagged_items(&tree, NULL, 0, ~0UL, TAG_ENTRIES, 1, 2) ==
TAG_ENTRIES);
for (j = 0; j < 256; j++) {
int mask, k;
@ -323,9 +321,8 @@ void multiorder_tagged_iteration(void)
}
}
first = 1;
radix_tree_range_tag_if_tagged(&tree, &first, ~0UL,
MT_NUM_ENTRIES, 1, 0);
assert(tag_tagged_items(&tree, NULL, 1, ~0UL, MT_NUM_ENTRIES * 2, 1, 0)
== TAG_ENTRIES);
i = 0;
radix_tree_for_each_tagged(slot, &tree, &iter, 0, 0) {
assert(iter.index == tag_index[i]);

Просмотреть файл

@ -50,6 +50,7 @@
#include <stdio.h>
#include "regression.h"
#include "test.h"
#define PAGECACHE_TAG_DIRTY 0
#define PAGECACHE_TAG_WRITEBACK 1
@ -90,7 +91,7 @@ void regression2_test(void)
/* 1. */
start = 0;
end = max_slots - 2;
radix_tree_range_tag_if_tagged(&mt_tree, &start, end, 1,
tag_tagged_items(&mt_tree, NULL, start, end, 1,
PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE);
/* 2. */

Просмотреть файл

@ -23,7 +23,7 @@ __simple_checks(struct radix_tree_root *tree, unsigned long index, int tag)
item_tag_set(tree, index, tag);
ret = item_tag_get(tree, index, tag);
assert(ret != 0);
ret = radix_tree_range_tag_if_tagged(tree, &first, ~0UL, 10, tag, !tag);
ret = tag_tagged_items(tree, NULL, first, ~0UL, 10, tag, !tag);
assert(ret == 1);
ret = item_tag_get(tree, index, !tag);
assert(ret != 0);
@ -320,7 +320,7 @@ static void single_check(void)
assert(ret == 0);
verify_tag_consistency(&tree, 0);
verify_tag_consistency(&tree, 1);
ret = radix_tree_range_tag_if_tagged(&tree, &first, 10, 10, 0, 1);
ret = tag_tagged_items(&tree, NULL, first, 10, 10, 0, 1);
assert(ret == 1);
ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 1);
assert(ret == 1);

Просмотреть файл

@ -151,6 +151,40 @@ void item_full_scan(struct radix_tree_root *root, unsigned long start,
assert(nfound == 0);
}
/* Use the same pattern as tag_pages_for_writeback() in mm/page-writeback.c */
int tag_tagged_items(struct radix_tree_root *root, pthread_mutex_t *lock,
unsigned long start, unsigned long end, unsigned batch,
unsigned iftag, unsigned thentag)
{
unsigned long tagged = 0;
struct radix_tree_iter iter;
void **slot;
if (batch == 0)
batch = 1;
if (lock)
pthread_mutex_lock(lock);
radix_tree_for_each_tagged(slot, root, &iter, start, iftag) {
if (iter.index > end)
break;
radix_tree_iter_tag_set(root, &iter, thentag);
tagged++;
if ((tagged % batch) != 0)
continue;
slot = radix_tree_iter_resume(slot, &iter);
if (lock) {
pthread_mutex_unlock(lock);
rcu_barrier();
pthread_mutex_lock(lock);
}
}
if (lock)
pthread_mutex_unlock(lock);
return tagged;
}
/* Use the same pattern as find_swap_entry() in mm/shmem.c */
unsigned long find_item(struct radix_tree_root *root, void *item)
{

Просмотреть файл

@ -25,6 +25,9 @@ void item_full_scan(struct radix_tree_root *root, unsigned long start,
unsigned long nr, int chunk);
void item_kill_tree(struct radix_tree_root *root);
int tag_tagged_items(struct radix_tree_root *, pthread_mutex_t *,
unsigned long start, unsigned long end, unsigned batch,
unsigned iftag, unsigned thentag);
unsigned long find_item(struct radix_tree_root *, void *item);
void tag_check(void);