XArray: add xas_split
In order to use multi-index entries for huge pages in the page cache, we need to be able to split a multi-index entry (eg if a file is truncated in the middle of a huge page entry). This version does not support splitting more than one level of the tree at a time. This is an acceptable limitation for the page cache as we do not expect to support order-12 pages in the near future. [akpm@linux-foundation.org: export xas_split_alloc() to modules] [willy@infradead.org: fix xarray split] Link: https://lkml.kernel.org/r/20200910175450.GV6583@casper.infradead.org [willy@infradead.org: fix xarray] Link: https://lkml.kernel.org/r/20201001233943.GW20115@casper.infradead.org Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Cc: "Kirill A . Shutemov" <kirill@shutemov.name> Cc: Qian Cai <cai@lca.pw> Cc: Song Liu <songliubraving@fb.com> Link: https://lkml.kernel.org/r/20200903183029.14930-3-willy@infradead.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Родитель
57417cebc9
Коммит
8fc75643c5
|
@ -475,13 +475,15 @@ or iterations will move the index to the first index in the range.
|
||||||
Each entry will only be returned once, no matter how many indices it
|
Each entry will only be returned once, no matter how many indices it
|
||||||
occupies.
|
occupies.
|
||||||
|
|
||||||
Using xas_next() or xas_prev() with a multi-index xa_state
|
Using xas_next() or xas_prev() with a multi-index xa_state is not
|
||||||
is not supported. Using either of these functions on a multi-index entry
|
supported. Using either of these functions on a multi-index entry will
|
||||||
will reveal sibling entries; these should be skipped over by the caller.
|
reveal sibling entries; these should be skipped over by the caller.
|
||||||
|
|
||||||
Storing ``NULL`` into any index of a multi-index entry will set the entry
|
Storing ``NULL`` into any index of a multi-index entry will set the
|
||||||
at every index to ``NULL`` and dissolve the tie. Splitting a multi-index
|
entry at every index to ``NULL`` and dissolve the tie. A multi-index
|
||||||
entry into entries occupying smaller ranges is not yet supported.
|
entry can be split into entries occupying smaller ranges by calling
|
||||||
|
xas_split_alloc() without the xa_lock held, followed by taking the lock
|
||||||
|
and calling xas_split().
|
||||||
|
|
||||||
Functions and structures
|
Functions and structures
|
||||||
========================
|
========================
|
||||||
|
|
|
@ -1507,11 +1507,24 @@ void xas_create_range(struct xa_state *);
|
||||||
|
|
||||||
#ifdef CONFIG_XARRAY_MULTI
|
#ifdef CONFIG_XARRAY_MULTI
|
||||||
int xa_get_order(struct xarray *, unsigned long index);
|
int xa_get_order(struct xarray *, unsigned long index);
|
||||||
|
void xas_split(struct xa_state *, void *entry, unsigned int order);
|
||||||
|
void xas_split_alloc(struct xa_state *, void *entry, unsigned int order, gfp_t);
|
||||||
#else
|
#else
|
||||||
static inline int xa_get_order(struct xarray *xa, unsigned long index)
|
static inline int xa_get_order(struct xarray *xa, unsigned long index)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void xas_split(struct xa_state *xas, void *entry,
|
||||||
|
unsigned int order)
|
||||||
|
{
|
||||||
|
xas_store(xas, entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void xas_split_alloc(struct xa_state *xas, void *entry,
|
||||||
|
unsigned int order, gfp_t gfp)
|
||||||
|
{
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -1503,6 +1503,49 @@ static noinline void check_store_range(struct xarray *xa)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_XARRAY_MULTI
|
||||||
|
static void check_split_1(struct xarray *xa, unsigned long index,
|
||||||
|
unsigned int order)
|
||||||
|
{
|
||||||
|
XA_STATE(xas, xa, index);
|
||||||
|
void *entry;
|
||||||
|
unsigned int i = 0;
|
||||||
|
|
||||||
|
xa_store_order(xa, index, order, xa, GFP_KERNEL);
|
||||||
|
|
||||||
|
xas_split_alloc(&xas, xa, order, GFP_KERNEL);
|
||||||
|
xas_lock(&xas);
|
||||||
|
xas_split(&xas, xa, order);
|
||||||
|
xas_unlock(&xas);
|
||||||
|
|
||||||
|
xa_for_each(xa, index, entry) {
|
||||||
|
XA_BUG_ON(xa, entry != xa);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
XA_BUG_ON(xa, i != 1 << order);
|
||||||
|
|
||||||
|
xa_set_mark(xa, index, XA_MARK_0);
|
||||||
|
XA_BUG_ON(xa, !xa_get_mark(xa, index, XA_MARK_0));
|
||||||
|
|
||||||
|
xa_destroy(xa);
|
||||||
|
}
|
||||||
|
|
||||||
|
static noinline void check_split(struct xarray *xa)
|
||||||
|
{
|
||||||
|
unsigned int order;
|
||||||
|
|
||||||
|
XA_BUG_ON(xa, !xa_empty(xa));
|
||||||
|
|
||||||
|
for (order = 1; order < 2 * XA_CHUNK_SHIFT; order++) {
|
||||||
|
check_split_1(xa, 0, order);
|
||||||
|
check_split_1(xa, 1UL << order, order);
|
||||||
|
check_split_1(xa, 3UL << order, order);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static void check_split(struct xarray *xa) { }
|
||||||
|
#endif
|
||||||
|
|
||||||
static void check_align_1(struct xarray *xa, char *name)
|
static void check_align_1(struct xarray *xa, char *name)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
@ -1729,6 +1772,7 @@ static int xarray_checks(void)
|
||||||
check_store_range(&array);
|
check_store_range(&array);
|
||||||
check_store_iter(&array);
|
check_store_iter(&array);
|
||||||
check_align(&xa0);
|
check_align(&xa0);
|
||||||
|
check_split(&array);
|
||||||
|
|
||||||
check_workingset(&array, 0);
|
check_workingset(&array, 0);
|
||||||
check_workingset(&array, 64);
|
check_workingset(&array, 64);
|
||||||
|
|
168
lib/xarray.c
168
lib/xarray.c
|
@ -266,13 +266,14 @@ static void xa_node_free(struct xa_node *node)
|
||||||
*/
|
*/
|
||||||
static void xas_destroy(struct xa_state *xas)
|
static void xas_destroy(struct xa_state *xas)
|
||||||
{
|
{
|
||||||
struct xa_node *node = xas->xa_alloc;
|
struct xa_node *next, *node = xas->xa_alloc;
|
||||||
|
|
||||||
if (!node)
|
while (node) {
|
||||||
return;
|
XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
|
||||||
XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
|
next = rcu_dereference_raw(node->parent);
|
||||||
kmem_cache_free(radix_tree_node_cachep, node);
|
radix_tree_node_rcu_free(&node->rcu_head);
|
||||||
xas->xa_alloc = NULL;
|
xas->xa_alloc = node = next;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -304,6 +305,7 @@ bool xas_nomem(struct xa_state *xas, gfp_t gfp)
|
||||||
xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
|
xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
|
||||||
if (!xas->xa_alloc)
|
if (!xas->xa_alloc)
|
||||||
return false;
|
return false;
|
||||||
|
xas->xa_alloc->parent = NULL;
|
||||||
XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
|
XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
|
||||||
xas->xa_node = XAS_RESTART;
|
xas->xa_node = XAS_RESTART;
|
||||||
return true;
|
return true;
|
||||||
|
@ -339,6 +341,7 @@ static bool __xas_nomem(struct xa_state *xas, gfp_t gfp)
|
||||||
}
|
}
|
||||||
if (!xas->xa_alloc)
|
if (!xas->xa_alloc)
|
||||||
return false;
|
return false;
|
||||||
|
xas->xa_alloc->parent = NULL;
|
||||||
XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
|
XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
|
||||||
xas->xa_node = XAS_RESTART;
|
xas->xa_node = XAS_RESTART;
|
||||||
return true;
|
return true;
|
||||||
|
@ -403,7 +406,7 @@ static unsigned long xas_size(const struct xa_state *xas)
|
||||||
/*
|
/*
|
||||||
* Use this to calculate the maximum index that will need to be created
|
* Use this to calculate the maximum index that will need to be created
|
||||||
* in order to add the entry described by @xas. Because we cannot store a
|
* in order to add the entry described by @xas. Because we cannot store a
|
||||||
* multiple-index entry at index 0, the calculation is a little more complex
|
* multi-index entry at index 0, the calculation is a little more complex
|
||||||
* than you might expect.
|
* than you might expect.
|
||||||
*/
|
*/
|
||||||
static unsigned long xas_max(struct xa_state *xas)
|
static unsigned long xas_max(struct xa_state *xas)
|
||||||
|
@ -946,6 +949,153 @@ void xas_init_marks(const struct xa_state *xas)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(xas_init_marks);
|
EXPORT_SYMBOL_GPL(xas_init_marks);
|
||||||
|
|
||||||
|
#ifdef CONFIG_XARRAY_MULTI
|
||||||
|
static unsigned int node_get_marks(struct xa_node *node, unsigned int offset)
|
||||||
|
{
|
||||||
|
unsigned int marks = 0;
|
||||||
|
xa_mark_t mark = XA_MARK_0;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
if (node_get_mark(node, offset, mark))
|
||||||
|
marks |= 1 << (__force unsigned int)mark;
|
||||||
|
if (mark == XA_MARK_MAX)
|
||||||
|
break;
|
||||||
|
mark_inc(mark);
|
||||||
|
}
|
||||||
|
|
||||||
|
return marks;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void node_set_marks(struct xa_node *node, unsigned int offset,
|
||||||
|
struct xa_node *child, unsigned int marks)
|
||||||
|
{
|
||||||
|
xa_mark_t mark = XA_MARK_0;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
if (marks & (1 << (__force unsigned int)mark)) {
|
||||||
|
node_set_mark(node, offset, mark);
|
||||||
|
if (child)
|
||||||
|
node_mark_all(child, mark);
|
||||||
|
}
|
||||||
|
if (mark == XA_MARK_MAX)
|
||||||
|
break;
|
||||||
|
mark_inc(mark);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* xas_split_alloc() - Allocate memory for splitting an entry.
|
||||||
|
* @xas: XArray operation state.
|
||||||
|
* @entry: New entry which will be stored in the array.
|
||||||
|
* @order: New entry order.
|
||||||
|
* @gfp: Memory allocation flags.
|
||||||
|
*
|
||||||
|
* This function should be called before calling xas_split().
|
||||||
|
* If necessary, it will allocate new nodes (and fill them with @entry)
|
||||||
|
* to prepare for the upcoming split of an entry of @order size into
|
||||||
|
* entries of the order stored in the @xas.
|
||||||
|
*
|
||||||
|
* Context: May sleep if @gfp flags permit.
|
||||||
|
*/
|
||||||
|
void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order,
|
||||||
|
gfp_t gfp)
|
||||||
|
{
|
||||||
|
unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
|
||||||
|
unsigned int mask = xas->xa_sibs;
|
||||||
|
|
||||||
|
/* XXX: no support for splitting really large entries yet */
|
||||||
|
if (WARN_ON(xas->xa_shift + 2 * XA_CHUNK_SHIFT < order))
|
||||||
|
goto nomem;
|
||||||
|
if (xas->xa_shift + XA_CHUNK_SHIFT > order)
|
||||||
|
return;
|
||||||
|
|
||||||
|
do {
|
||||||
|
unsigned int i;
|
||||||
|
void *sibling;
|
||||||
|
struct xa_node *node;
|
||||||
|
|
||||||
|
node = kmem_cache_alloc(radix_tree_node_cachep, gfp);
|
||||||
|
if (!node)
|
||||||
|
goto nomem;
|
||||||
|
node->array = xas->xa;
|
||||||
|
for (i = 0; i < XA_CHUNK_SIZE; i++) {
|
||||||
|
if ((i & mask) == 0) {
|
||||||
|
RCU_INIT_POINTER(node->slots[i], entry);
|
||||||
|
sibling = xa_mk_sibling(0);
|
||||||
|
} else {
|
||||||
|
RCU_INIT_POINTER(node->slots[i], sibling);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
RCU_INIT_POINTER(node->parent, xas->xa_alloc);
|
||||||
|
xas->xa_alloc = node;
|
||||||
|
} while (sibs-- > 0);
|
||||||
|
|
||||||
|
return;
|
||||||
|
nomem:
|
||||||
|
xas_destroy(xas);
|
||||||
|
xas_set_err(xas, -ENOMEM);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(xas_split_alloc);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* xas_split() - Split a multi-index entry into smaller entries.
|
||||||
|
* @xas: XArray operation state.
|
||||||
|
* @entry: New entry to store in the array.
|
||||||
|
* @order: New entry order.
|
||||||
|
*
|
||||||
|
* The value in the entry is copied to all the replacement entries.
|
||||||
|
*
|
||||||
|
* Context: Any context. The caller should hold the xa_lock.
|
||||||
|
*/
|
||||||
|
void xas_split(struct xa_state *xas, void *entry, unsigned int order)
|
||||||
|
{
|
||||||
|
unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
|
||||||
|
unsigned int offset, marks;
|
||||||
|
struct xa_node *node;
|
||||||
|
void *curr = xas_load(xas);
|
||||||
|
int values = 0;
|
||||||
|
|
||||||
|
node = xas->xa_node;
|
||||||
|
if (xas_top(node))
|
||||||
|
return;
|
||||||
|
|
||||||
|
marks = node_get_marks(node, xas->xa_offset);
|
||||||
|
|
||||||
|
offset = xas->xa_offset + sibs;
|
||||||
|
do {
|
||||||
|
if (xas->xa_shift < node->shift) {
|
||||||
|
struct xa_node *child = xas->xa_alloc;
|
||||||
|
|
||||||
|
xas->xa_alloc = rcu_dereference_raw(child->parent);
|
||||||
|
child->shift = node->shift - XA_CHUNK_SHIFT;
|
||||||
|
child->offset = offset;
|
||||||
|
child->count = XA_CHUNK_SIZE;
|
||||||
|
child->nr_values = xa_is_value(entry) ?
|
||||||
|
XA_CHUNK_SIZE : 0;
|
||||||
|
RCU_INIT_POINTER(child->parent, node);
|
||||||
|
node_set_marks(node, offset, child, marks);
|
||||||
|
rcu_assign_pointer(node->slots[offset],
|
||||||
|
xa_mk_node(child));
|
||||||
|
if (xa_is_value(curr))
|
||||||
|
values--;
|
||||||
|
} else {
|
||||||
|
unsigned int canon = offset - xas->xa_sibs;
|
||||||
|
|
||||||
|
node_set_marks(node, canon, NULL, marks);
|
||||||
|
rcu_assign_pointer(node->slots[canon], entry);
|
||||||
|
while (offset > canon)
|
||||||
|
rcu_assign_pointer(node->slots[offset--],
|
||||||
|
xa_mk_sibling(canon));
|
||||||
|
values += (xa_is_value(entry) - xa_is_value(curr)) *
|
||||||
|
(xas->xa_sibs + 1);
|
||||||
|
}
|
||||||
|
} while (offset-- > xas->xa_offset);
|
||||||
|
|
||||||
|
node->nr_values += values;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(xas_split);
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* xas_pause() - Pause a walk to drop a lock.
|
* xas_pause() - Pause a walk to drop a lock.
|
||||||
* @xas: XArray operation state.
|
* @xas: XArray operation state.
|
||||||
|
@ -1407,7 +1557,7 @@ EXPORT_SYMBOL(__xa_store);
|
||||||
* @gfp: Memory allocation flags.
|
* @gfp: Memory allocation flags.
|
||||||
*
|
*
|
||||||
* After this function returns, loads from this index will return @entry.
|
* After this function returns, loads from this index will return @entry.
|
||||||
* Storing into an existing multislot entry updates the entry of every index.
|
* Storing into an existing multi-index entry updates the entry of every index.
|
||||||
* The marks associated with @index are unaffected unless @entry is %NULL.
|
* The marks associated with @index are unaffected unless @entry is %NULL.
|
||||||
*
|
*
|
||||||
* Context: Any context. Takes and releases the xa_lock.
|
* Context: Any context. Takes and releases the xa_lock.
|
||||||
|
@ -1549,7 +1699,7 @@ static void xas_set_range(struct xa_state *xas, unsigned long first,
|
||||||
*
|
*
|
||||||
* After this function returns, loads from any index between @first and @last,
|
* After this function returns, loads from any index between @first and @last,
|
||||||
* inclusive will return @entry.
|
* inclusive will return @entry.
|
||||||
* Storing into an existing multislot entry updates the entry of every index.
|
* Storing into an existing multi-index entry updates the entry of every index.
|
||||||
* The marks associated with @index are unaffected unless @entry is %NULL.
|
* The marks associated with @index are unaffected unless @entry is %NULL.
|
||||||
*
|
*
|
||||||
* Context: Process context. Takes and releases the xa_lock. May sleep
|
* Context: Process context. Takes and releases the xa_lock. May sleep
|
||||||
|
|
Загрузка…
Ссылка в новой задаче