From d0687be7c7ae21461da4438d5fd059b48487bfe1 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 3 Apr 2009 15:18:24 -0500 Subject: [PATCH 1/6] svcrdma: Fix dma map direction for rdma read targets The nfs server rdma transport was mapping rdma read target pages for TO_DEVICE instead of FROM_DEVICE. This causes data corruption on non cache-coherent systems if frmrs are used. Signed-off-by: Steve Wise Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 629a28764da9..42a6f9f20285 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -265,7 +265,7 @@ static int fast_reg_read_chunks(struct svcxprt_rdma *xprt, frmr->page_list->page_list[page_no] = ib_dma_map_single(xprt->sc_cm_id->device, page_address(rqstp->rq_arg.pages[page_no]), - PAGE_SIZE, DMA_TO_DEVICE); + PAGE_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[page_no])) goto fatal_err; From 21515e46bc6a6279dd13f6c01898ada9720100a3 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 29 Apr 2009 14:14:00 -0500 Subject: [PATCH 2/6] svcrdma: clean up error paths. These fixes resolved crashes due to resource leak BUG_ON checks. The resource leaks were detected by introducing asynchronous transport errors. Signed-off-by: Steve Wise Signed-off-by: Tom Tucker Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 3 +++ net/sunrpc/xprtrdma/svc_rdma_transport.c | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 6c26a675435a..8b510c5e8777 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -183,6 +183,7 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt, fatal_err: printk("svcrdma: Error fast registering memory for xprt %p\n", xprt); + vec->frmr = NULL; svc_rdma_put_frmr(xprt, frmr); return -EIO; } @@ -516,6 +517,7 @@ static int send_reply(struct svcxprt_rdma *rdma, "svcrdma: could not post a receive buffer, err=%d." "Closing transport %p.\n", ret, rdma); set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); + svc_rdma_put_frmr(rdma, vec->frmr); svc_rdma_put_context(ctxt, 0); return -ENOTCONN; } @@ -606,6 +608,7 @@ static int send_reply(struct svcxprt_rdma *rdma, return 0; err: + svc_rdma_unmap_dma(ctxt); svc_rdma_put_frmr(rdma, vec->frmr); svc_rdma_put_context(ctxt, 1); return -EIO; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 3d810e7df3fb..4b0c2fa15e0b 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -520,8 +520,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) svc_xprt_get(&xprt->sc_xprt); ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); if (ret) { - svc_xprt_put(&xprt->sc_xprt); + svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); + svc_xprt_put(&xprt->sc_xprt); } return ret; From ccecee1e5e42981f5eb37f4411e8552b9db04d30 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 1 May 2009 11:33:53 -0400 Subject: [PATCH 3/6] nfsd41: slots are freed with session The session and slots are allocated all in one piece. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c65a27b76a9d..3b711f5147a7 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -580,7 +580,6 @@ free_session(struct kref *kref) struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry; nfsd4_release_respages(e->ce_respages, e->ce_resused); } - kfree(ses->se_slots); kfree(ses); } From b2c0cea6b1cb210e962f07047df602875564069e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 5 May 2009 19:04:29 -0400 Subject: [PATCH 4/6] nfsd4: check for negative dentry before use in nfsv4 readdir After 2f9092e1020246168b1309b35e085ecd7ff9ff72 "Fix i_mutex vs. readdir handling in nfsd" (and 14f7dd63 "Copy XFS readdir hack into nfsd code"), an entry may be removed between the first mutex_unlock and the second mutex_lock. In this case, lookup_one_len() will return a negative dentry. Check for this case to avoid a NULL dereference. Signed-off-by: J. Bruce Fields Reviewed-by: J. R. Okajima Cc: stable@kernel.org --- fs/nfsd/nfs4xdr.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b820c311931c..b73549d293be 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2214,6 +2214,15 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen); if (IS_ERR(dentry)) return nfserrno(PTR_ERR(dentry)); + if (!dentry->d_inode) { + /* + * nfsd_buffered_readdir drops the i_mutex between + * readdir and calling this callback, leaving a window + * where this directory entry could have gone away. + */ + dput(dentry); + return nfserr_noent; + } exp_get(exp); /* @@ -2276,6 +2285,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common); int buflen; __be32 *p = cd->buffer; + __be32 *cookiep; __be32 nfserr = nfserr_toosmall; /* In nfsv4, "." and ".." never make it onto the wire.. */ @@ -2292,7 +2302,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, goto fail; *p++ = xdr_one; /* mark entry present */ - cd->offset = p; /* remember pointer */ + cookiep = p; p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */ p = xdr_encode_array(p, name, namlen); /* name length & name */ @@ -2306,6 +2316,8 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, goto fail; case nfserr_dropit: goto fail; + case nfserr_noent: + goto skip_entry; default: /* * If the client requested the RDATTR_ERROR attribute, @@ -2324,6 +2336,8 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, } cd->buflen -= (p - cd->buffer); cd->buffer = p; + cd->offset = cookiep; +skip_entry: cd->common.err = nfs_ok; return 0; fail: From 89996df4b5b1a09c279f50b3fd03aa9df735f5cb Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 6 May 2009 16:32:54 -0400 Subject: [PATCH 5/6] lockd: fix list corruption on lockd restart If lockd is signalled soon enough after restart then locks_start_grace() will try to re-add an entry to a list and trigger a lock corruption warning. Thanks to Wang Chen for the problem report and diagnosis. WARNING: at lib/list_debug.c:26 __list_add+0x27/0x5c() ... list_add corruption. next->prev should be prev (ef8fe958), but was ef8ff128. (next=ef8ff128). ... Pid: 23062, comm: lockd Tainted: G W 2.6.30-rc2 #3 Call Trace: [] warn_slowpath+0x71/0xa0 [] ? update_curr+0x11d/0x125 [] ? trace_hardirqs_on_caller+0x18/0x150 [] ? trace_hardirqs_on+0xb/0xd [] ? _raw_spin_lock+0x53/0xfa [] __list_add+0x27/0x5c [] locks_start_grace+0x22/0x30 [lockd] [] set_grace_period+0x39/0x53 [lockd] [] ? lock_kernel+0x1c/0x28 [] lockd+0x64/0x164 [lockd] [] ? trace_hardirqs_on_caller+0x18/0x150 [] ? complete+0x34/0x3e [] ? lockd+0x0/0x164 [lockd] [] ? lockd+0x0/0x164 [lockd] [] kthread+0x45/0x6b [] ? kthread+0x0/0x6b [] kernel_thread_helper+0x7/0x10 Reported-by: Wang Chen Signed-off-by: J. Bruce Fields Cc: stable@kernel.org --- fs/lockd/svc.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index abf83881f68a..1a54ae14a192 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -104,6 +104,16 @@ static void set_grace_period(void) schedule_delayed_work(&grace_period_end, grace_period); } +static void restart_grace(void) +{ + if (nlmsvc_ops) { + cancel_delayed_work_sync(&grace_period_end); + locks_end_grace(&lockd_manager); + nlmsvc_invalidate_all(); + set_grace_period(); + } +} + /* * This is the lockd kernel thread */ @@ -149,10 +159,7 @@ lockd(void *vrqstp) if (signalled()) { flush_signals(current); - if (nlmsvc_ops) { - nlmsvc_invalidate_all(); - set_grace_period(); - } + restart_grace(); continue; } From 8daed1e549b55827758b3af7b8132a73fc51526f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 11 May 2009 16:10:19 -0400 Subject: [PATCH 6/6] nfsd: silence lockdep warning Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4recover.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 5275097a7565..b5348405046b 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -229,7 +229,7 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) goto out; status = vfs_readdir(filp, nfsd4_build_namelist, &names); fput(filp); - mutex_lock(&dir->d_inode->i_mutex); + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); while (!list_empty(&names)) { entry = list_entry(names.next, struct name_list, list); @@ -264,7 +264,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen) dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); - mutex_lock(&rec_dir.dentry->d_inode->i_mutex); + mutex_lock_nested(&rec_dir.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_one_len(name, rec_dir.dentry, namlen); if (IS_ERR(dentry)) { status = PTR_ERR(dentry);