WSL2-Linux-Kernel/fs/fscache/io.c

327 строки
8.8 KiB
C
Исходник Обычный вид История

fscache: Provide a means to begin an operation Provide a function to begin a read operation: int fscache_begin_read_operation( struct netfs_cache_resources *cres, struct fscache_cookie *cookie) This is primarily intended to be called by network filesystems on behalf of netfslib, but may also be called to use the I/O access functions directly. It attaches the resources required by the cache to cres struct from the supplied cookie. This holds access to the cache behind the cookie for the duration of the operation and forces cache withdrawal and cookie invalidation to perform synchronisation on the operation. cres->inval_counter is set from the cookie at this point so that it can be compared at the end of the operation. Note that this does not guarantee that the cache state is fully set up and able to perform I/O immediately; looking up and creation may be left in progress in the background. The operations intended to be called by the network filesystem, such as reading and writing, are expected to wait for the cookie to move to the correct state. This will, however, potentially sleep, waiting for a certain minimum state to be set or for operations such as invalidate to advance far enough that I/O can resume. Also provide a function for the cache to call to wait for the cache object to get to a state where it can be used for certain things: bool fscache_wait_for_operation(struct netfs_cache_resources *cres, enum fscache_want_stage stage); This looks at the cache resources provided by the begin function and waits for them to get to an appropriate stage. There's a choice of wanting just some parameters (FSCACHE_WANT_PARAM) or the ability to do I/O (FSCACHE_WANT_READ or FSCACHE_WANT_WRITE). Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Jeff Layton <jlayton@kernel.org> cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819603692.215744.146724961588817028.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906910672.143852.13856103384424986357.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967110245.1823006.2239170567540431836.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021513617.640689.16627329360866150606.stgit@warthog.procyon.org.uk/ # v4
2021-10-20 16:06:34 +03:00
// SPDX-License-Identifier: GPL-2.0-or-later
/* Cache data I/O routines
*
* Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*/
#define FSCACHE_DEBUG_LEVEL OPERATION
#include <linux/fscache-cache.h>
#include <linux/uio.h>
#include <linux/bvec.h>
#include <linux/slab.h>
#include <linux/uio.h>
#include "internal.h"
/**
* fscache_wait_for_operation - Wait for an object become accessible
* @cres: The cache resources for the operation being performed
* @want_state: The minimum state the object must be at
*
* See if the target cache object is at the specified minimum state of
* accessibility yet, and if not, wait for it.
*/
bool fscache_wait_for_operation(struct netfs_cache_resources *cres,
enum fscache_want_state want_state)
{
struct fscache_cookie *cookie = fscache_cres_cookie(cres);
enum fscache_cookie_state state;
again:
if (!fscache_cache_is_live(cookie->volume->cache)) {
_leave(" [broken]");
return false;
}
state = fscache_cookie_state(cookie);
_enter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
switch (state) {
case FSCACHE_COOKIE_STATE_CREATING:
case FSCACHE_COOKIE_STATE_INVALIDATING:
if (want_state == FSCACHE_WANT_PARAMS)
goto ready; /* There can be no content */
fallthrough;
case FSCACHE_COOKIE_STATE_LOOKING_UP:
case FSCACHE_COOKIE_STATE_LRU_DISCARDING:
wait_var_event(&cookie->state,
fscache_cookie_state(cookie) != state);
goto again;
case FSCACHE_COOKIE_STATE_ACTIVE:
goto ready;
case FSCACHE_COOKIE_STATE_DROPPED:
case FSCACHE_COOKIE_STATE_RELINQUISHING:
default:
_leave(" [not live]");
return false;
}
ready:
if (!cres->cache_priv2)
return cookie->volume->cache->ops->begin_operation(cres, want_state);
return true;
}
EXPORT_SYMBOL(fscache_wait_for_operation);
/*
* Begin an I/O operation on the cache, waiting till we reach the right state.
*
* Attaches the resources required to the operation resources record.
*/
static int fscache_begin_operation(struct netfs_cache_resources *cres,
struct fscache_cookie *cookie,
enum fscache_want_state want_state,
enum fscache_access_trace why)
{
enum fscache_cookie_state state;
long timeo;
bool once_only = false;
cres->ops = NULL;
cres->cache_priv = cookie;
cres->cache_priv2 = NULL;
cres->debug_id = cookie->debug_id;
cres->inval_counter = cookie->inval_counter;
if (!fscache_begin_cookie_access(cookie, why))
return -ENOBUFS;
again:
spin_lock(&cookie->lock);
state = fscache_cookie_state(cookie);
_enter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
switch (state) {
case FSCACHE_COOKIE_STATE_LOOKING_UP:
case FSCACHE_COOKIE_STATE_LRU_DISCARDING:
case FSCACHE_COOKIE_STATE_INVALIDATING:
goto wait_for_file_wrangling;
case FSCACHE_COOKIE_STATE_CREATING:
if (want_state == FSCACHE_WANT_PARAMS)
goto ready; /* There can be no content */
goto wait_for_file_wrangling;
case FSCACHE_COOKIE_STATE_ACTIVE:
goto ready;
case FSCACHE_COOKIE_STATE_DROPPED:
case FSCACHE_COOKIE_STATE_RELINQUISHING:
WARN(1, "Can't use cookie in state %u\n", cookie->state);
goto not_live;
default:
goto not_live;
}
ready:
spin_unlock(&cookie->lock);
if (!cookie->volume->cache->ops->begin_operation(cres, want_state))
goto failed;
return 0;
wait_for_file_wrangling:
spin_unlock(&cookie->lock);
trace_fscache_access(cookie->debug_id, refcount_read(&cookie->ref),
atomic_read(&cookie->n_accesses),
fscache_access_io_wait);
timeo = wait_var_event_timeout(&cookie->state,
fscache_cookie_state(cookie) != state, 20 * HZ);
if (timeo <= 1 && !once_only) {
pr_warn("%s: cookie state change wait timed out: cookie->state=%u state=%u",
__func__, fscache_cookie_state(cookie), state);
fscache_print_cookie(cookie, 'O');
once_only = true;
}
goto again;
not_live:
spin_unlock(&cookie->lock);
failed:
cres->cache_priv = NULL;
cres->ops = NULL;
fscache_end_cookie_access(cookie, fscache_access_io_not_live);
_leave(" = -ENOBUFS");
return -ENOBUFS;
}
int __fscache_begin_read_operation(struct netfs_cache_resources *cres,
struct fscache_cookie *cookie)
{
return fscache_begin_operation(cres, cookie, FSCACHE_WANT_PARAMS,
fscache_access_io_read);
}
EXPORT_SYMBOL(__fscache_begin_read_operation);
fscache: Implement higher-level write I/O interface Provide a higher-level function than fscache_write() to perform a write from an inode's pagecache to the cache, whilst fending off concurrent writes by means of the PG_fscache mark on a page: void fscache_write_to_cache(struct fscache_cookie *cookie, struct address_space *mapping, loff_t start, size_t len, loff_t i_size, netfs_io_terminated_t term_func, void *term_func_priv, bool caching); If caching is false, this function does nothing except call (*term_func)() if given. It assumes that, in such a case, PG_fscache will not have been set on the pages. Otherwise, if caching is true, this function requires the source pages to have had PG_fscache set on them before calling. start and len define the region of the file to be modified and i_size indicates the new file size. The source pages are extracted from the mapping. term_func and term_func_priv work as for fscache_write(). The PG_fscache marks will be cleared at the end of the operation, before term_func is called or the function otherwise returns. There is an additonal helper function to clear the PG_fscache bits from a range of pages: void fscache_clear_page_bits(struct fscache_cookie *cookie, struct address_space *mapping, loff_t start, size_t len, bool caching); If caching is true, the pages to be managed are expected to be located on mapping in the range defined by start and len. If caching is false, it does nothing. Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Jeff Layton <jlayton@kernel.org> cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819614155.215744.5528123235123721230.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906916346.143852.15632773570362489926.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967123599.1823006.12946816026724657428.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021522672.640689.4381958316198807813.stgit@warthog.procyon.org.uk/ # v4
2021-10-20 16:06:34 +03:00
nfs: Implement cache I/O by accessing the cache directly Move NFS to using fscache DIO API instead of the old upstream I/O API as that has been removed. This is a stopgap solution as the intention is that at sometime in the future, the cache will move to using larger blocks and won't be able to store individual pages in order to deal with the potential for data corruption due to the backing filesystem being able insert/remove bridging blocks of zeros into its extent list[1]. NFS then reads and writes cache pages synchronously and one page at a time. The preferred change would be to use the netfs lib, but the new I/O API can be used directly. It's just that as the cache now needs to track data for itself, caching blocks may exceed page size... This code is somewhat borrowed from my "fallback I/O" patchset[2]. Changes ======= ver #3: - Restore lost =n fallback for nfs_fscache_release_page()[2]. Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: Dave Wysochanski <dwysocha@redhat.com> Acked-by: Jeff Layton <jlayton@kernel.org> cc: Trond Myklebust <trond.myklebust@hammerspace.com> cc: Anna Schumaker <anna.schumaker@netapp.com> cc: linux-nfs@vger.kernel.org cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/YO17ZNOcq+9PajfQ@mit.edu [1] Link: https://lore.kernel.org/r/202112100957.2oEDT20W-lkp@intel.com/ [2] Link: https://lore.kernel.org/r/163189108292.2509237.12615909591150927232.stgit@warthog.procyon.org.uk/ [2] Link: https://lore.kernel.org/r/163906981318.143852.17220018647843475985.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967184451.1823006.6450645559828329590.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021577632.640689.11069627070150063812.stgit@warthog.procyon.org.uk/ # v4
2021-08-27 17:19:34 +03:00
int __fscache_begin_write_operation(struct netfs_cache_resources *cres,
struct fscache_cookie *cookie)
{
return fscache_begin_operation(cres, cookie, FSCACHE_WANT_PARAMS,
fscache_access_io_write);
}
EXPORT_SYMBOL(__fscache_begin_write_operation);
vfs, fscache: Implement pinning of cache usage for writeback Cachefiles has a problem in that it needs to keep the backing file for a cookie open whilst there are local modifications pending that need to be written to it. However, we don't want to keep the file open indefinitely, as that causes EMFILE/ENFILE/ENOMEM problems. Reopening the cache file, however, is a problem if this is being done due to writeback triggered by exit(). Some filesystems will oops if we try to open a file in that context because they want to access current->fs or other resources that have already been dismantled. To get around this, I added the following: (1) An inode flag, I_PINNING_FSCACHE_WB, to be set on a network filesystem inode to indicate that we have a usage count on the cookie caching that inode. (2) A flag in struct writeback_control, unpinned_fscache_wb, that is set when __writeback_single_inode() clears the last dirty page from i_pages - at which point it clears I_PINNING_FSCACHE_WB and sets this flag. This has to be done here so that clearing I_PINNING_FSCACHE_WB can be done atomically with the check of PAGECACHE_TAG_DIRTY that clears I_DIRTY_PAGES. (3) A function, fscache_set_page_dirty(), which if it is not set, sets I_PINNING_FSCACHE_WB and calls fscache_use_cookie() to pin the cache resources. (4) A function, fscache_unpin_writeback(), to be called by ->write_inode() to unuse the cookie. (5) A function, fscache_clear_inode_writeback(), to be called when the inode is evicted, before clear_inode() is called. This cleans up any lingering I_PINNING_FSCACHE_WB. The network filesystem can then use these tools to make sure that fscache_write_to_cache() can write locally modified data to the cache as well as to the server. For the future, I'm working on write helpers for netfs lib that should allow this facility to be removed by keeping track of the dirty regions separately - but that's incomplete at the moment and is also going to be affected by folios, one way or another, since it deals with pages Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Jeff Layton <jlayton@kernel.org> cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819615157.215744.17623791756928043114.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906917856.143852.8224898306177154573.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967124567.1823006.14188359004568060298.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021524705.640689.17824932021727663017.stgit@warthog.procyon.org.uk/ # v4
2021-10-21 01:50:01 +03:00
/**
* fscache_set_page_dirty - Mark page dirty and pin a cache object for writeback
* @page: The page being dirtied
* @cookie: The cookie referring to the cache object
*
* Set the dirty flag on a page and pin an in-use cache object in memory when
* dirtying a page so that writeback can later write to it. This is intended
* to be called from the filesystem's ->set_page_dirty() method.
*
* Returns 1 if PG_dirty was set on the page, 0 otherwise.
*/
int fscache_set_page_dirty(struct page *page, struct fscache_cookie *cookie)
{
struct inode *inode = page->mapping->host;
bool need_use = false;
_enter("");
if (!__set_page_dirty_nobuffers(page))
return 0;
if (!fscache_cookie_valid(cookie))
return 1;
if (!(inode->i_state & I_PINNING_FSCACHE_WB)) {
spin_lock(&inode->i_lock);
if (!(inode->i_state & I_PINNING_FSCACHE_WB)) {
inode->i_state |= I_PINNING_FSCACHE_WB;
need_use = true;
}
spin_unlock(&inode->i_lock);
if (need_use)
fscache_use_cookie(cookie, true);
}
return 1;
}
EXPORT_SYMBOL(fscache_set_page_dirty);
fscache: Implement higher-level write I/O interface Provide a higher-level function than fscache_write() to perform a write from an inode's pagecache to the cache, whilst fending off concurrent writes by means of the PG_fscache mark on a page: void fscache_write_to_cache(struct fscache_cookie *cookie, struct address_space *mapping, loff_t start, size_t len, loff_t i_size, netfs_io_terminated_t term_func, void *term_func_priv, bool caching); If caching is false, this function does nothing except call (*term_func)() if given. It assumes that, in such a case, PG_fscache will not have been set on the pages. Otherwise, if caching is true, this function requires the source pages to have had PG_fscache set on them before calling. start and len define the region of the file to be modified and i_size indicates the new file size. The source pages are extracted from the mapping. term_func and term_func_priv work as for fscache_write(). The PG_fscache marks will be cleared at the end of the operation, before term_func is called or the function otherwise returns. There is an additonal helper function to clear the PG_fscache bits from a range of pages: void fscache_clear_page_bits(struct fscache_cookie *cookie, struct address_space *mapping, loff_t start, size_t len, bool caching); If caching is true, the pages to be managed are expected to be located on mapping in the range defined by start and len. If caching is false, it does nothing. Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Jeff Layton <jlayton@kernel.org> cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819614155.215744.5528123235123721230.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906916346.143852.15632773570362489926.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967123599.1823006.12946816026724657428.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021522672.640689.4381958316198807813.stgit@warthog.procyon.org.uk/ # v4
2021-10-20 16:06:34 +03:00
struct fscache_write_request {
struct netfs_cache_resources cache_resources;
struct address_space *mapping;
loff_t start;
size_t len;
bool set_bits;
netfs_io_terminated_t term_func;
void *term_func_priv;
};
void __fscache_clear_page_bits(struct address_space *mapping,
loff_t start, size_t len)
{
pgoff_t first = start / PAGE_SIZE;
pgoff_t last = (start + len - 1) / PAGE_SIZE;
struct page *page;
if (len) {
XA_STATE(xas, &mapping->i_pages, first);
rcu_read_lock();
xas_for_each(&xas, page, last) {
end_page_fscache(page);
}
rcu_read_unlock();
}
}
EXPORT_SYMBOL(__fscache_clear_page_bits);
/*
* Deal with the completion of writing the data to the cache.
*/
static void fscache_wreq_done(void *priv, ssize_t transferred_or_error,
bool was_async)
{
struct fscache_write_request *wreq = priv;
fscache_clear_page_bits(fscache_cres_cookie(&wreq->cache_resources),
wreq->mapping, wreq->start, wreq->len,
wreq->set_bits);
if (wreq->term_func)
wreq->term_func(wreq->term_func_priv, transferred_or_error,
was_async);
fscache_end_operation(&wreq->cache_resources);
kfree(wreq);
}
void __fscache_write_to_cache(struct fscache_cookie *cookie,
struct address_space *mapping,
loff_t start, size_t len, loff_t i_size,
netfs_io_terminated_t term_func,
void *term_func_priv,
bool cond)
{
struct fscache_write_request *wreq;
struct netfs_cache_resources *cres;
struct iov_iter iter;
int ret = -ENOBUFS;
if (len == 0)
goto abandon;
_enter("%llx,%zx", start, len);
wreq = kzalloc(sizeof(struct fscache_write_request), GFP_NOFS);
if (!wreq)
goto abandon;
wreq->mapping = mapping;
wreq->start = start;
wreq->len = len;
wreq->set_bits = cond;
wreq->term_func = term_func;
wreq->term_func_priv = term_func_priv;
cres = &wreq->cache_resources;
if (fscache_begin_operation(cres, cookie, FSCACHE_WANT_WRITE,
fscache_access_io_write) < 0)
goto abandon_free;
ret = cres->ops->prepare_write(cres, &start, &len, i_size, false);
if (ret < 0)
goto abandon_end;
/* TODO: Consider clearing page bits now for space the write isn't
* covering. This is more complicated than it appears when THPs are
* taken into account.
*/
iov_iter_xarray(&iter, WRITE, &mapping->i_pages, start, len);
fscache_write(cres, start, &iter, fscache_wreq_done, wreq);
return;
abandon_end:
return fscache_wreq_done(wreq, ret, false);
abandon_free:
kfree(wreq);
abandon:
fscache_clear_page_bits(cookie, mapping, start, len, cond);
if (term_func)
term_func(term_func_priv, ret, false);
}
EXPORT_SYMBOL(__fscache_write_to_cache);
/*
* Change the size of a backing object.
*/
void __fscache_resize_cookie(struct fscache_cookie *cookie, loff_t new_size)
{
struct netfs_cache_resources cres;
trace_fscache_resize(cookie, new_size);
if (fscache_begin_operation(&cres, cookie, FSCACHE_WANT_WRITE,
fscache_access_io_resize) == 0) {
fscache_stat(&fscache_n_resizes);
set_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &cookie->flags);
/* We cannot defer a resize as we need to do it inside the
* netfs's inode lock so that we're serialised with respect to
* writes.
*/
cookie->volume->cache->ops->resize_cookie(&cres, new_size);
fscache_end_operation(&cres);
} else {
fscache_stat(&fscache_n_resizes_null);
}
}
EXPORT_SYMBOL(__fscache_resize_cookie);