NFS: Fix asynchronous read error handling

We must always call ->read_done() before we truncate the page data, or decide to flag an error. The reasons are that in NFSv2, ->read_done() is where the eof flag gets set. in NFSv3/v4 ->read_done() handles EJUKEBOX-type errors, and v4 state recovery. However, we need to mark the pages as uptodate before we deal with short read errors, since we may need to modify the nfs_read_data arguments. We therefore split the current nfs_readpage_result() into two parts: nfs_readpage_result(), which calls ->read_done() etc, and nfs_readpage_retry(), which subsequently handles short reads. Note: Removing the code that retries in case of a short read also fixes a bug in nfs_direct_read_result(), which used to return a corrupted number of bytes. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2006-11-14 16:12:23 -05:00 · 2006-11-14 16:12:23 -05:00 · 0b67130149
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@ -460,6 +460,55 @@ nfs_pagein_list(struct list_head *head, int rpages)
 	return error;
 }

+/*
+ * This is the callback from RPC telling us whether a reply was
+ * received or some error occurred (timeout or socket shutdown).
+ */
+int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
+{
+	int status;
+
+	dprintk("%s: %4d, (status %d)\n", __FUNCTION__, task->tk_pid,
+			task->tk_status);
+
+	status = NFS_PROTO(data->inode)->read_done(task, data);
+	if (status != 0)
+		return status;
+
+	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count);
+
+	if (task->tk_status == -ESTALE) {
+		set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
+		nfs_mark_for_revalidate(data->inode);
+	}
+	spin_lock(&data->inode->i_lock);
+	NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
+	spin_unlock(&data->inode->i_lock);
+	return 0;
+}
+
+static int nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
+{
+	struct nfs_readargs *argp = &data->args;
+	struct nfs_readres *resp = &data->res;
+
+	if (resp->eof || resp->count == argp->count)
+		return 0;
+
+	/* This is a short read! */
+	nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
+	/* Has the server at least made some progress? */
+	if (resp->count == 0)
+		return 0;
+
+	/* Yes, so retry the read at the end of the data */
+	argp->offset += resp->count;
+	argp->pgbase += resp->count;
+	argp->count -= resp->count;
+	rpc_restart_call(task);
+	return -EAGAIN;
+}
+
 /*
 * Handle a read reply that fills part of a page.
 */
@ -469,12 +518,16 @@ static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
 	struct nfs_page *req = data->req;
 	struct page *page = req->wb_page;
 
-	if (likely(task->tk_status >= 0))
-		nfs_readpage_truncate_uninitialised_page(data);
-	else
-		SetPageError(page);
 	if (nfs_readpage_result(task, data) != 0)
 		return;
+
+	if (likely(task->tk_status >= 0)) {
+		nfs_readpage_truncate_uninitialised_page(data);
+		if (nfs_readpage_retry(task, data) != 0)
+			return;
+	}
+	if (unlikely(task->tk_status < 0))
+		SetPageError(page);
 	if (atomic_dec_and_test(&req->wb_complete)) {
 		if (!PageError(page))
 			SetPageUptodate(page);
@ -502,25 +555,13 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
 	count += base;
 	for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
 		SetPageUptodate(*pages);
-	if (count != 0)
+	if (count == 0)
+		return;
+	/* Was this a short read? */
+	if (data->res.eof || data->res.count == data->args.count)
 		SetPageUptodate(*pages);
 }

-static void nfs_readpage_set_pages_error(struct nfs_read_data *data)
-{
-	unsigned int count = data->args.count;
-	unsigned int base = data->args.pgbase;
-	struct page **pages;
-
-	pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
-	base &= ~PAGE_CACHE_MASK;
-	count += base;
-	for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
-		SetPageError(*pages);
-	if (count != 0)
-		SetPageError(*pages);
-}
-
 /*
 * This is the callback from RPC telling us whether a reply was
 * received or some error occurred (timeout or socket shutdown).
@ -529,19 +570,20 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
 {
 	struct nfs_read_data *data = calldata;

+	if (nfs_readpage_result(task, data) != 0)
+		return;
 	/*
-	 * Note: nfs_readpage_result may change the values of
+	 * Note: nfs_readpage_retry may change the values of
 	 * data->args. In the multi-page case, we therefore need
-	 * to ensure that we call the next nfs_readpage_set_page_uptodate()
-	 * first in the multi-page case.
+	 * to ensure that we call nfs_readpage_set_pages_uptodate()
+	 * first.
 	 */
 	if (likely(task->tk_status >= 0)) {
 		nfs_readpage_truncate_uninitialised_page(data);
 		nfs_readpage_set_pages_uptodate(data);
-	} else
-		nfs_readpage_set_pages_error(data);
-	if (nfs_readpage_result(task, data) != 0)
-		return;
+		if (nfs_readpage_retry(task, data) != 0)
+			return;
+	}
 	while (!list_empty(&data->pages)) {
 		struct nfs_page *req = nfs_list_entry(data->pages.next);

@ -555,50 +597,6 @@ static const struct rpc_call_ops nfs_read_full_ops = {
 	.rpc_release = nfs_readdata_release,
 };

-/*
- * This is the callback from RPC telling us whether a reply was
- * received or some error occurred (timeout or socket shutdown).
- */
-int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
-{
-	struct nfs_readargs *argp = &data->args;
-	struct nfs_readres *resp = &data->res;
-	int status;
-
-	dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
-		task->tk_pid, task->tk_status);
-
-	status = NFS_PROTO(data->inode)->read_done(task, data);
-	if (status != 0)
-		return status;
-
-	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
-
-	if (task->tk_status < 0) {
-		if (task->tk_status == -ESTALE) {
-			set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
-			nfs_mark_for_revalidate(data->inode);
-		}
-	} else if (resp->count < argp->count && !resp->eof) {
-		/* This is a short read! */
-		nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
-		/* Has the server at least made some progress? */
-		if (resp->count != 0) {
-			/* Yes, so retry the read at the end of the data */
-			argp->offset += resp->count;
-			argp->pgbase += resp->count;
-			argp->count -= resp->count;
-			rpc_restart_call(task);
-			return -EAGAIN;
-		}
-		task->tk_status = -EIO;
-	}
-	spin_lock(&data->inode->i_lock);
-	NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
-	spin_unlock(&data->inode->i_lock);
-	return 0;
-}
-
 /*
 * Read a page over NFS.
 * We read the page synchronously in the following case: