mm: Convert workingset to XArray
We construct an XA_STATE and use it to delete the node with xas_store() rather than adding a special function for this unique use case. Includes a test that simulates this usage for the test suite. Signed-off-by: Matthew Wilcox <willy@infradead.org>
This commit is contained in:
Родитель
ff9c745b81
Коммит
a97e7904c0
|
@ -306,15 +306,6 @@ void workingset_update_node(struct xa_node *node);
|
||||||
xas_set_update(xas, workingset_update_node); \
|
xas_set_update(xas, workingset_update_node); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
/* Returns workingset_update_node() if the mapping has shadow entries. */
|
|
||||||
#define workingset_lookup_update(mapping) \
|
|
||||||
({ \
|
|
||||||
radix_tree_update_node_t __helper = workingset_update_node; \
|
|
||||||
if (dax_mapping(mapping) || shmem_mapping(mapping)) \
|
|
||||||
__helper = NULL; \
|
|
||||||
__helper; \
|
|
||||||
})
|
|
||||||
|
|
||||||
/* linux/mm/page_alloc.c */
|
/* linux/mm/page_alloc.c */
|
||||||
extern unsigned long totalram_pages;
|
extern unsigned long totalram_pages;
|
||||||
extern unsigned long totalreserve_pages;
|
extern unsigned long totalreserve_pages;
|
||||||
|
|
|
@ -863,6 +863,67 @@ static noinline void check_create_range(struct xarray *xa)
|
||||||
check_create_range_3();
|
check_create_range_3();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static LIST_HEAD(shadow_nodes);
|
||||||
|
|
||||||
|
static void test_update_node(struct xa_node *node)
|
||||||
|
{
|
||||||
|
if (node->count && node->count == node->nr_values) {
|
||||||
|
if (list_empty(&node->private_list))
|
||||||
|
list_add(&shadow_nodes, &node->private_list);
|
||||||
|
} else {
|
||||||
|
if (!list_empty(&node->private_list))
|
||||||
|
list_del_init(&node->private_list);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static noinline void shadow_remove(struct xarray *xa)
|
||||||
|
{
|
||||||
|
struct xa_node *node;
|
||||||
|
|
||||||
|
xa_lock(xa);
|
||||||
|
while ((node = list_first_entry_or_null(&shadow_nodes,
|
||||||
|
struct xa_node, private_list))) {
|
||||||
|
XA_STATE(xas, node->array, 0);
|
||||||
|
XA_BUG_ON(xa, node->array != xa);
|
||||||
|
list_del_init(&node->private_list);
|
||||||
|
xas.xa_node = xa_parent_locked(node->array, node);
|
||||||
|
xas.xa_offset = node->offset;
|
||||||
|
xas.xa_shift = node->shift + XA_CHUNK_SHIFT;
|
||||||
|
xas_set_update(&xas, test_update_node);
|
||||||
|
xas_store(&xas, NULL);
|
||||||
|
}
|
||||||
|
xa_unlock(xa);
|
||||||
|
}
|
||||||
|
|
||||||
|
static noinline void check_workingset(struct xarray *xa, unsigned long index)
|
||||||
|
{
|
||||||
|
XA_STATE(xas, xa, index);
|
||||||
|
xas_set_update(&xas, test_update_node);
|
||||||
|
|
||||||
|
do {
|
||||||
|
xas_lock(&xas);
|
||||||
|
xas_store(&xas, xa_mk_value(0));
|
||||||
|
xas_next(&xas);
|
||||||
|
xas_store(&xas, xa_mk_value(1));
|
||||||
|
xas_unlock(&xas);
|
||||||
|
} while (xas_nomem(&xas, GFP_KERNEL));
|
||||||
|
|
||||||
|
XA_BUG_ON(xa, list_empty(&shadow_nodes));
|
||||||
|
|
||||||
|
xas_lock(&xas);
|
||||||
|
xas_next(&xas);
|
||||||
|
xas_store(&xas, &xas);
|
||||||
|
XA_BUG_ON(xa, !list_empty(&shadow_nodes));
|
||||||
|
|
||||||
|
xas_store(&xas, xa_mk_value(2));
|
||||||
|
xas_unlock(&xas);
|
||||||
|
XA_BUG_ON(xa, list_empty(&shadow_nodes));
|
||||||
|
|
||||||
|
shadow_remove(xa);
|
||||||
|
XA_BUG_ON(xa, !list_empty(&shadow_nodes));
|
||||||
|
XA_BUG_ON(xa, !xa_empty(xa));
|
||||||
|
}
|
||||||
|
|
||||||
static noinline void check_destroy(struct xarray *xa)
|
static noinline void check_destroy(struct xarray *xa)
|
||||||
{
|
{
|
||||||
unsigned long index;
|
unsigned long index;
|
||||||
|
@ -916,6 +977,10 @@ static int xarray_checks(void)
|
||||||
check_create_range(&array);
|
check_create_range(&array);
|
||||||
check_store_iter(&array);
|
check_store_iter(&array);
|
||||||
|
|
||||||
|
check_workingset(&array, 0);
|
||||||
|
check_workingset(&array, 64);
|
||||||
|
check_workingset(&array, 4096);
|
||||||
|
|
||||||
printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
|
printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
|
||||||
return (tests_run == tests_passed) ? 0 : -EINVAL;
|
return (tests_run == tests_passed) ? 0 : -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
|
@ -148,7 +148,7 @@
|
||||||
* and activations is maintained (node->inactive_age).
|
* and activations is maintained (node->inactive_age).
|
||||||
*
|
*
|
||||||
* On eviction, a snapshot of this counter (along with some bits to
|
* On eviction, a snapshot of this counter (along with some bits to
|
||||||
* identify the node) is stored in the now empty page cache radix tree
|
* identify the node) is stored in the now empty page cache
|
||||||
* slot of the evicted page. This is called a shadow entry.
|
* slot of the evicted page. This is called a shadow entry.
|
||||||
*
|
*
|
||||||
* On cache misses for which there are shadow entries, an eligible
|
* On cache misses for which there are shadow entries, an eligible
|
||||||
|
@ -162,7 +162,7 @@
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Eviction timestamps need to be able to cover the full range of
|
* Eviction timestamps need to be able to cover the full range of
|
||||||
* actionable refaults. However, bits are tight in the radix tree
|
* actionable refaults. However, bits are tight in the xarray
|
||||||
* entry, and after storing the identifier for the lruvec there might
|
* entry, and after storing the identifier for the lruvec there might
|
||||||
* not be enough left to represent every single actionable refault. In
|
* not be enough left to represent every single actionable refault. In
|
||||||
* that case, we have to sacrifice granularity for distance, and group
|
* that case, we have to sacrifice granularity for distance, and group
|
||||||
|
@ -339,7 +339,7 @@ out:
|
||||||
|
|
||||||
static struct list_lru shadow_nodes;
|
static struct list_lru shadow_nodes;
|
||||||
|
|
||||||
void workingset_update_node(struct radix_tree_node *node)
|
void workingset_update_node(struct xa_node *node)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Track non-empty nodes that contain only shadow entries;
|
* Track non-empty nodes that contain only shadow entries;
|
||||||
|
@ -368,7 +368,7 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
|
||||||
nodes = list_lru_shrink_count(&shadow_nodes, sc);
|
nodes = list_lru_shrink_count(&shadow_nodes, sc);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Approximate a reasonable limit for the radix tree nodes
|
* Approximate a reasonable limit for the nodes
|
||||||
* containing shadow entries. We don't need to keep more
|
* containing shadow entries. We don't need to keep more
|
||||||
* shadow entries than possible pages on the active list,
|
* shadow entries than possible pages on the active list,
|
||||||
* since refault distances bigger than that are dismissed.
|
* since refault distances bigger than that are dismissed.
|
||||||
|
@ -383,11 +383,11 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
|
||||||
* worst-case density of 1/8th. Below that, not all eligible
|
* worst-case density of 1/8th. Below that, not all eligible
|
||||||
* refaults can be detected anymore.
|
* refaults can be detected anymore.
|
||||||
*
|
*
|
||||||
* On 64-bit with 7 radix_tree_nodes per page and 64 slots
|
* On 64-bit with 7 xa_nodes per page and 64 slots
|
||||||
* each, this will reclaim shadow entries when they consume
|
* each, this will reclaim shadow entries when they consume
|
||||||
* ~1.8% of available memory:
|
* ~1.8% of available memory:
|
||||||
*
|
*
|
||||||
* PAGE_SIZE / radix_tree_nodes / node_entries * 8 / PAGE_SIZE
|
* PAGE_SIZE / xa_nodes / node_entries * 8 / PAGE_SIZE
|
||||||
*/
|
*/
|
||||||
if (sc->memcg) {
|
if (sc->memcg) {
|
||||||
cache = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid,
|
cache = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid,
|
||||||
|
@ -396,7 +396,7 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
|
||||||
cache = node_page_state(NODE_DATA(sc->nid), NR_ACTIVE_FILE) +
|
cache = node_page_state(NODE_DATA(sc->nid), NR_ACTIVE_FILE) +
|
||||||
node_page_state(NODE_DATA(sc->nid), NR_INACTIVE_FILE);
|
node_page_state(NODE_DATA(sc->nid), NR_INACTIVE_FILE);
|
||||||
}
|
}
|
||||||
max_nodes = cache >> (RADIX_TREE_MAP_SHIFT - 3);
|
max_nodes = cache >> (XA_CHUNK_SHIFT - 3);
|
||||||
|
|
||||||
if (!nodes)
|
if (!nodes)
|
||||||
return SHRINK_EMPTY;
|
return SHRINK_EMPTY;
|
||||||
|
@ -409,11 +409,11 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
|
||||||
static enum lru_status shadow_lru_isolate(struct list_head *item,
|
static enum lru_status shadow_lru_isolate(struct list_head *item,
|
||||||
struct list_lru_one *lru,
|
struct list_lru_one *lru,
|
||||||
spinlock_t *lru_lock,
|
spinlock_t *lru_lock,
|
||||||
void *arg)
|
void *arg) __must_hold(lru_lock)
|
||||||
{
|
{
|
||||||
|
struct xa_node *node = container_of(item, struct xa_node, private_list);
|
||||||
|
XA_STATE(xas, node->array, 0);
|
||||||
struct address_space *mapping;
|
struct address_space *mapping;
|
||||||
struct radix_tree_node *node;
|
|
||||||
unsigned int i;
|
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -421,14 +421,13 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
|
||||||
* the shadow node LRU under the i_pages lock and the
|
* the shadow node LRU under the i_pages lock and the
|
||||||
* lru_lock. Because the page cache tree is emptied before
|
* lru_lock. Because the page cache tree is emptied before
|
||||||
* the inode can be destroyed, holding the lru_lock pins any
|
* the inode can be destroyed, holding the lru_lock pins any
|
||||||
* address_space that has radix tree nodes on the LRU.
|
* address_space that has nodes on the LRU.
|
||||||
*
|
*
|
||||||
* We can then safely transition to the i_pages lock to
|
* We can then safely transition to the i_pages lock to
|
||||||
* pin only the address_space of the particular node we want
|
* pin only the address_space of the particular node we want
|
||||||
* to reclaim, take the node off-LRU, and drop the lru_lock.
|
* to reclaim, take the node off-LRU, and drop the lru_lock.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
node = container_of(item, struct xa_node, private_list);
|
|
||||||
mapping = container_of(node->array, struct address_space, i_pages);
|
mapping = container_of(node->array, struct address_space, i_pages);
|
||||||
|
|
||||||
/* Coming from the list, invert the lock order */
|
/* Coming from the list, invert the lock order */
|
||||||
|
@ -450,25 +449,17 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
|
||||||
goto out_invalid;
|
goto out_invalid;
|
||||||
if (WARN_ON_ONCE(node->count != node->nr_values))
|
if (WARN_ON_ONCE(node->count != node->nr_values))
|
||||||
goto out_invalid;
|
goto out_invalid;
|
||||||
for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
|
mapping->nrexceptional -= node->nr_values;
|
||||||
if (node->slots[i]) {
|
xas.xa_node = xa_parent_locked(&mapping->i_pages, node);
|
||||||
if (WARN_ON_ONCE(!xa_is_value(node->slots[i])))
|
xas.xa_offset = node->offset;
|
||||||
goto out_invalid;
|
xas.xa_shift = node->shift + XA_CHUNK_SHIFT;
|
||||||
if (WARN_ON_ONCE(!node->nr_values))
|
xas_set_update(&xas, workingset_update_node);
|
||||||
goto out_invalid;
|
/*
|
||||||
if (WARN_ON_ONCE(!mapping->nrexceptional))
|
* We could store a shadow entry here which was the minimum of the
|
||||||
goto out_invalid;
|
* shadow entries we were tracking ...
|
||||||
node->slots[i] = NULL;
|
*/
|
||||||
node->nr_values--;
|
xas_store(&xas, NULL);
|
||||||
node->count--;
|
|
||||||
mapping->nrexceptional--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (WARN_ON_ONCE(node->nr_values))
|
|
||||||
goto out_invalid;
|
|
||||||
inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM);
|
inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM);
|
||||||
__radix_tree_delete_node(&mapping->i_pages, node,
|
|
||||||
workingset_lookup_update(mapping));
|
|
||||||
|
|
||||||
out_invalid:
|
out_invalid:
|
||||||
xa_unlock_irq(&mapping->i_pages);
|
xa_unlock_irq(&mapping->i_pages);
|
||||||
|
|
Загрузка…
Ссылка в новой задаче