From bddfdbcddbe267519cd36aeb115fdf8620980111 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 27 Oct 2020 15:53:42 -0400 Subject: [PATCH 01/70] NFSD: Extract the svcxdr_init_encode() helper NFSD initializes an encode xdr_stream only after the RPC layer has already inserted the RPC Reply header. Thus it behaves differently than xdr_init_encode does, which assumes the passed-in xdr_buf is entirely devoid of content. nfs4proc.c has this server-side stream initialization helper, but it is visible only to the NFSv4 code. Move this helper to a place that can be accessed by NFSv2 and NFSv3 server XDR functions. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 31 +++-------- fs/nfsd/nfs4state.c | 6 +- fs/nfsd/nfs4xdr.c | 110 ++++++++++++++++++------------------- fs/nfsd/nfssvc.c | 4 +- fs/nfsd/xdr4.h | 2 +- include/linux/sunrpc/svc.h | 25 +++++++++ 6 files changed, 94 insertions(+), 84 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index dd9f38d072dd..c57cf09a8b60 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -2262,25 +2262,6 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp) return !(nextd->op_flags & OP_HANDLES_WRONGSEC); } -static void svcxdr_init_encode(struct svc_rqst *rqstp, - struct nfsd4_compoundres *resp) -{ - struct xdr_stream *xdr = &resp->xdr; - struct xdr_buf *buf = &rqstp->rq_res; - struct kvec *head = buf->head; - - xdr->buf = buf; - xdr->iov = head; - xdr->p = head->iov_base + head->iov_len; - xdr->end = head->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; - /* Tail and page_len should be zero at this point: */ - buf->len = buf->head[0].iov_len; - xdr_reset_scratch_buffer(xdr); - xdr->page_ptr = buf->pages - 1; - buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages) - - rqstp->rq_auth_slack; -} - #ifdef CONFIG_NFSD_V4_2_INTER_SSC static void check_if_stalefh_allowed(struct nfsd4_compoundargs *args) @@ -2335,10 +2316,14 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); __be32 status; - svcxdr_init_encode(rqstp, resp); - resp->tagp = resp->xdr.p; + resp->xdr = &rqstp->rq_res_stream; + + /* reserve space for: NFS status code */ + xdr_reserve_space(resp->xdr, XDR_UNIT); + + resp->tagp = resp->xdr->p; /* reserve space for: taglen, tag, and opcnt */ - xdr_reserve_space(&resp->xdr, 8 + args->taglen); + xdr_reserve_space(resp->xdr, XDR_UNIT * 2 + args->taglen); resp->taglen = args->taglen; resp->tag = args->tag; resp->rqstp = rqstp; @@ -2444,7 +2429,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) encode_op: if (op->status == nfserr_replay_me) { op->replay = &cstate->replay_owner->so_replay; - nfsd4_encode_replay(&resp->xdr, op); + nfsd4_encode_replay(resp->xdr, op); status = op->status = op->replay->rp_status; } else { nfsd4_encode_operation(resp, op); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 97447a64bad0..3290d0a7bb95 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2903,7 +2903,7 @@ out_err: static void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) { - struct xdr_buf *buf = resp->xdr.buf; + struct xdr_buf *buf = resp->xdr->buf; struct nfsd4_slot *slot = resp->cstate.slot; unsigned int base; @@ -2973,7 +2973,7 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, struct nfsd4_sequence *seq) { struct nfsd4_slot *slot = resp->cstate.slot; - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; __be32 status; @@ -3708,7 +3708,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct nfsd4_sequence *seq = &u->sequence; struct nfsd4_compoundres *resp = rqstp->rq_resp; - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct nfsd4_session *session; struct nfs4_client *clp; struct nfsd4_slot *slot; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index eaaa1605b5b5..e0f06d3cbd44 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3581,7 +3581,7 @@ nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid) static __be32 nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 8); @@ -3594,7 +3594,7 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8); @@ -3611,7 +3611,7 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, static __be32 nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &close->cl_stateid); } @@ -3620,7 +3620,7 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c static __be32 nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); @@ -3634,7 +3634,7 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 20); @@ -3649,7 +3649,7 @@ static __be32 nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr) { struct svc_fh *fhp = getattr->ga_fhp; - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry, getattr->ga_bmval, resp->rqstp, 0); @@ -3658,7 +3658,7 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 static __be32 nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct svc_fh *fhp = *fhpp; unsigned int len; __be32 *p; @@ -3713,7 +3713,7 @@ again: static __be32 nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; if (!nfserr) nfserr = nfsd4_encode_stateid(xdr, &lock->lk_resp_stateid); @@ -3726,7 +3726,7 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo static __be32 nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; if (nfserr == nfserr_denied) nfsd4_encode_lock_denied(xdr, &lockt->lt_denied); @@ -3736,7 +3736,7 @@ nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l static __be32 nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &locku->lu_stateid); } @@ -3745,7 +3745,7 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l static __be32 nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 20); @@ -3759,7 +3759,7 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li static __be32 nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid); @@ -3853,7 +3853,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op static __be32 nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid); } @@ -3861,7 +3861,7 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct static __be32 nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &od->od_stateid); } @@ -3871,7 +3871,7 @@ static __be32 nfsd4_encode_splice_read( struct nfsd4_read *read, struct file *file, unsigned long maxcount) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct xdr_buf *buf = xdr->buf; int status, space_left; u32 eof; @@ -3937,7 +3937,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, struct nfsd4_read *read, struct file *file, unsigned long maxcount) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; u32 eof; int starting_len = xdr->buf->len - 8; __be32 nfserr; @@ -3976,7 +3976,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_read *read) { unsigned long maxcount; - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct file *file; int starting_len = xdr->buf->len; __be32 *p; @@ -3990,7 +3990,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)); return nfserr_resource; } - if (resp->xdr.buf->page_len && + if (resp->xdr->buf->page_len && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) { WARN_ON_ONCE(1); return nfserr_resource; @@ -4020,7 +4020,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd int maxcount; __be32 wire_count; int zero = 0; - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; int length_offset = xdr->buf->len; int status; __be32 *p; @@ -4072,7 +4072,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 int bytes_left; loff_t offset; __be64 wire_offset; - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; int starting_len = xdr->buf->len; __be32 *p; @@ -4083,8 +4083,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ *p++ = cpu_to_be32(0); *p++ = cpu_to_be32(0); - resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p) - - (char *)resp->xdr.buf->head[0].iov_base; + xdr->buf->head[0].iov_len = (char *)xdr->p - + (char *)xdr->buf->head[0].iov_base; /* * Number of bytes left for directory entries allowing for the @@ -4159,7 +4159,7 @@ err_no_verf: static __be32 nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 20); @@ -4172,7 +4172,7 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 40); @@ -4255,7 +4255,7 @@ static __be32 nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_secinfo *secinfo) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; return nfsd4_do_encode_secinfo(xdr, secinfo->si_exp); } @@ -4264,7 +4264,7 @@ static __be32 nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_secinfo_no_name *secinfo) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp); } @@ -4276,7 +4276,7 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 16); @@ -4300,7 +4300,7 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 static __be32 nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; if (!nfserr) { @@ -4324,7 +4324,7 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n static __be32 nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 16); @@ -4341,7 +4341,7 @@ static __be32 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_exchange_id *exid) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; char *major_id; char *server_scope; @@ -4419,7 +4419,7 @@ static __be32 nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create_session *sess) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 24); @@ -4472,7 +4472,7 @@ static __be32 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_sequence *seq) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 20); @@ -4495,7 +4495,7 @@ static __be32 nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_test_stateid *test_stateid) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct nfsd4_test_stateid_id *stateid, *next; __be32 *p; @@ -4516,7 +4516,7 @@ static __be32 nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getdeviceinfo *gdev) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; const struct nfsd4_layout_ops *ops; u32 starting_len = xdr->buf->len, needed_len; __be32 *p; @@ -4572,7 +4572,7 @@ static __be32 nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_layoutget *lgp) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; const struct nfsd4_layout_ops *ops; __be32 *p; @@ -4599,7 +4599,7 @@ static __be32 nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_layoutcommit *lcp) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 4); @@ -4620,7 +4620,7 @@ static __be32 nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_layoutreturn *lrp) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 4); @@ -4638,7 +4638,7 @@ nfsd42_encode_write_res(struct nfsd4_compoundres *resp, struct nfsd42_write_res *write, bool sync) { __be32 *p; - p = xdr_reserve_space(&resp->xdr, 4); + p = xdr_reserve_space(resp->xdr, 4); if (!p) return nfserr_resource; @@ -4647,11 +4647,11 @@ nfsd42_encode_write_res(struct nfsd4_compoundres *resp, else { __be32 nfserr; *p++ = cpu_to_be32(1); - nfserr = nfsd4_encode_stateid(&resp->xdr, &write->cb_stateid); + nfserr = nfsd4_encode_stateid(resp->xdr, &write->cb_stateid); if (nfserr) return nfserr; } - p = xdr_reserve_space(&resp->xdr, 8 + 4 + NFS4_VERIFIER_SIZE); + p = xdr_reserve_space(resp->xdr, 8 + 4 + NFS4_VERIFIER_SIZE); if (!p) return nfserr_resource; @@ -4665,7 +4665,7 @@ nfsd42_encode_write_res(struct nfsd4_compoundres *resp, static __be32 nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct nfs42_netaddr *addr; __be32 *p; @@ -4713,7 +4713,7 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, if (nfserr) return nfserr; - p = xdr_reserve_space(&resp->xdr, 4 + 4); + p = xdr_reserve_space(resp->xdr, 4 + 4); *p++ = xdr_one; /* cr_consecutive */ *p++ = cpu_to_be32(copy->cp_synchronous); return 0; @@ -4723,7 +4723,7 @@ static __be32 nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_offload_status *os) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 8 + 4); @@ -4740,7 +4740,7 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, unsigned long *maxcount, u32 *eof, loff_t *pos) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct file *file = read->rd_nf->nf_file; int starting_len = xdr->buf->len; loff_t hole_pos; @@ -4799,7 +4799,7 @@ nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp, count = data_pos - read->rd_offset; /* Content type, offset, byte count */ - p = xdr_reserve_space(&resp->xdr, 4 + 8 + 8); + p = xdr_reserve_space(resp->xdr, 4 + 8 + 8); if (!p) return nfserr_resource; @@ -4817,7 +4817,7 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_read *read) { unsigned long maxcount, count; - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct file *file; int starting_len = xdr->buf->len; int last_segment = xdr->buf->len; @@ -4888,7 +4888,7 @@ static __be32 nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_copy_notify *cn) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; if (nfserr) @@ -4924,7 +4924,7 @@ nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, { __be32 *p; - p = xdr_reserve_space(&resp->xdr, 4 + 8); + p = xdr_reserve_space(resp->xdr, 4 + 8); *p++ = cpu_to_be32(seek->seek_eof); p = xdr_encode_hyper(p, seek->seek_pos); @@ -4985,7 +4985,7 @@ static __be32 nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getxattr *getxattr) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p, err; p = xdr_reserve_space(xdr, 4); @@ -5009,7 +5009,7 @@ static __be32 nfsd4_encode_setxattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setxattr *setxattr) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 20); @@ -5050,7 +5050,7 @@ static __be32 nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_listxattrs *listxattrs) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; u32 cookie_offset, count_offset, eof; u32 left, xdrleft, slen, count; u32 xdrlen, offset; @@ -5161,7 +5161,7 @@ static __be32 nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_removexattr *removexattr) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 20); @@ -5301,7 +5301,7 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 respsize) void nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct nfs4_stateowner *so = resp->cstate.replay_owner; struct svc_rqst *rqstp = resp->rqstp; const struct nfsd4_operation *opdesc = op->opdesc; @@ -5430,14 +5430,14 @@ int nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd4_compoundres *resp = rqstp->rq_resp; - struct xdr_buf *buf = resp->xdr.buf; + struct xdr_buf *buf = resp->xdr->buf; WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len + buf->tail[0].iov_len); *p = resp->cstate.status; - rqstp->rq_next_page = resp->xdr.page_ptr + 1; + rqstp->rq_next_page = resp->xdr->page_ptr + 1; p = resp->tagp; *p++ = htonl(resp->taglen); diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 6de406322106..d909e4956244 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -997,7 +997,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) * NFSv4 does some encoding while processing */ p = resv->iov_base + resv->iov_len; - resv->iov_len += sizeof(__be32); + svcxdr_init_encode(rqstp); *statp = proc->pc_func(rqstp); if (*statp == rpc_drop_reply || test_bit(RQ_DROPME, &rqstp->rq_flags)) @@ -1052,7 +1052,7 @@ int nfssvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p) */ int nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p) { - return xdr_ressize_check(rqstp, p); + return 1; } int nfsd_pool_stats_open(struct inode *inode, struct file *file) diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index c300885ae75d..fe540a3415c6 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -698,7 +698,7 @@ struct nfsd4_compoundargs { struct nfsd4_compoundres { /* scratch variables for XDR encode */ - struct xdr_stream xdr; + struct xdr_stream *xdr; struct svc_rqst * rqstp; u32 taglen; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 31ee3b6047c3..e91d51ea028b 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -248,6 +248,7 @@ struct svc_rqst { size_t rq_xprt_hlen; /* xprt header len */ struct xdr_buf rq_arg; struct xdr_stream rq_arg_stream; + struct xdr_stream rq_res_stream; struct page *rq_scratch_page; struct xdr_buf rq_res; struct page *rq_pages[RPCSVC_MAXPAGES + 1]; @@ -574,4 +575,28 @@ static inline void svcxdr_init_decode(struct svc_rqst *rqstp) xdr_set_scratch_page(xdr, rqstp->rq_scratch_page); } +/** + * svcxdr_init_encode - Prepare an xdr_stream for svc Reply encoding + * @rqstp: controlling server RPC transaction context + * + */ +static inline void svcxdr_init_encode(struct svc_rqst *rqstp) +{ + struct xdr_stream *xdr = &rqstp->rq_res_stream; + struct xdr_buf *buf = &rqstp->rq_res; + struct kvec *resv = buf->head; + + xdr_reset_scratch_buffer(xdr); + + xdr->buf = buf; + xdr->iov = resv; + xdr->p = resv->iov_base + resv->iov_len; + xdr->end = resv->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; + buf->len = resv->iov_len; + xdr->page_ptr = buf->pages - 1; + buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages); + buf->buflen -= rqstp->rq_auth_slack; + xdr->rqst = NULL; +} + #endif /* SUNRPC_SVC_H */ From 2c42f804d30f6a8d86665eca84071b316821ea08 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Oct 2020 11:58:41 -0400 Subject: [PATCH 02/70] NFSD: Update the GETATTR3res encoder to use struct xdr_stream As an additional clean up, some renaming is done to more closely reflect the data type and variable names used in the NFSv3 XDR definition provided in RFC 1813. "attrstat" is an NFSv2 thingie. Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 2 +- fs/nfsd/nfs3xdr.c | 95 ++++++++++++++++++++++++++++++++++++++++++---- fs/nfsd/nfsfh.c | 2 +- fs/nfsd/nfsfh.h | 2 +- fs/nfsd/xdr3.h | 2 +- 5 files changed, 91 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 8675851199f8..76a84481d526 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -736,7 +736,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { [NFS3PROC_GETATTR] = { .pc_func = nfsd3_proc_getattr, .pc_decode = nfs3svc_decode_fhandleargs, - .pc_encode = nfs3svc_encode_attrstatres, + .pc_encode = nfs3svc_encode_getattrres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd3_attrstatres), diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 9d9a01ce0b27..75739861d235 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -20,7 +20,7 @@ /* * Mapping of S_IF* types to NFS file types */ -static u32 nfs3_ftypes[] = { +static const u32 nfs3_ftypes[] = { NF3NON, NF3FIFO, NF3CHR, NF3BAD, NF3DIR, NF3BAD, NF3BLK, NF3BAD, NF3REG, NF3BAD, NF3LNK, NF3BAD, @@ -39,6 +39,15 @@ encode_time3(__be32 *p, struct timespec64 *time) return p; } +static __be32 * +encode_nfstime3(__be32 *p, const struct timespec64 *time) +{ + *p++ = cpu_to_be32((u32)time->tv_sec); + *p++ = cpu_to_be32(time->tv_nsec); + + return p; +} + static bool svcxdr_decode_nfstime3(struct xdr_stream *xdr, struct timespec64 *timep) { @@ -82,6 +91,19 @@ svcxdr_decode_nfs_fh3(struct xdr_stream *xdr, struct svc_fh *fhp) return true; } +static bool +svcxdr_encode_nfsstat3(struct xdr_stream *xdr, __be32 status) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, sizeof(status)); + if (!p) + return false; + *p = status; + + return true; +} + static __be32 * encode_fh(__be32 *p, struct svc_fh *fhp) { @@ -253,6 +275,58 @@ svcxdr_decode_devicedata3(struct svc_rqst *rqstp, struct xdr_stream *xdr, svcxdr_decode_specdata3(xdr, args); } +static bool +svcxdr_encode_fattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr, + const struct svc_fh *fhp, const struct kstat *stat) +{ + struct user_namespace *userns = nfsd_user_namespace(rqstp); + __be32 *p; + u64 fsid; + + p = xdr_reserve_space(xdr, XDR_UNIT * 21); + if (!p) + return false; + + *p++ = cpu_to_be32(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); + *p++ = cpu_to_be32((u32)(stat->mode & S_IALLUGO)); + *p++ = cpu_to_be32((u32)stat->nlink); + *p++ = cpu_to_be32((u32)from_kuid_munged(userns, stat->uid)); + *p++ = cpu_to_be32((u32)from_kgid_munged(userns, stat->gid)); + if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) + p = xdr_encode_hyper(p, (u64)NFS3_MAXPATHLEN); + else + p = xdr_encode_hyper(p, (u64)stat->size); + + /* used */ + p = xdr_encode_hyper(p, ((u64)stat->blocks) << 9); + + /* rdev */ + *p++ = cpu_to_be32((u32)MAJOR(stat->rdev)); + *p++ = cpu_to_be32((u32)MINOR(stat->rdev)); + + switch(fsid_source(fhp)) { + case FSIDSOURCE_FSID: + fsid = (u64)fhp->fh_export->ex_fsid; + break; + case FSIDSOURCE_UUID: + fsid = ((u64 *)fhp->fh_export->ex_uuid)[0]; + fsid ^= ((u64 *)fhp->fh_export->ex_uuid)[1]; + break; + default: + fsid = (u64)huge_encode_dev(fhp->fh_dentry->d_sb->s_dev); + } + p = xdr_encode_hyper(p, fsid); + + /* fileid */ + p = xdr_encode_hyper(p, stat->ino); + + p = encode_nfstime3(p, &stat->atime); + p = encode_nfstime3(p, &stat->mtime); + encode_nfstime3(p, &stat->ctime); + + return true; +} + static __be32 *encode_fsid(__be32 *p, struct svc_fh *fhp) { u64 f; @@ -713,17 +787,22 @@ nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p) /* GETATTR */ int -nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_encode_getattrres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_attrstat *resp = rqstp->rq_resp; - *p++ = resp->status; - if (resp->status == 0) { - lease_get_mtime(d_inode(resp->fh.fh_dentry), - &resp->stat.mtime); - p = encode_fattr3(rqstp, p, &resp->fh, &resp->stat); + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + lease_get_mtime(d_inode(resp->fh.fh_dentry), &resp->stat.mtime); + if (!svcxdr_encode_fattr3(rqstp, xdr, &resp->fh, &resp->stat)) + return 0; + break; } - return xdr_ressize_check(rqstp, p); + + return 1; } /* SETATTR, REMOVE, RMDIR */ diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 10b44421eace..c475d2271f9c 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -711,7 +711,7 @@ char * SVCFH_fmt(struct svc_fh *fhp) return buf; } -enum fsid_source fsid_source(struct svc_fh *fhp) +enum fsid_source fsid_source(const struct svc_fh *fhp) { if (fhp->fh_handle.fh_version != 1) return FSIDSOURCE_DEV; diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index f58933519f38..aff2cda5c6c3 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -82,7 +82,7 @@ enum fsid_source { FSIDSOURCE_FSID, FSIDSOURCE_UUID, }; -extern enum fsid_source fsid_source(struct svc_fh *fhp); +extern enum fsid_source fsid_source(const struct svc_fh *fhp); /* diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 3e1578953f54..0822981c61b9 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -280,7 +280,7 @@ int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *); int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *); int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *); int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *); -int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *); +int nfs3svc_encode_getattrres(struct svc_rqst *, __be32 *); int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *); int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *); int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *); From 907c38227fb57f5c537491ca76dd0b9636029393 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 22 Oct 2020 13:56:58 -0400 Subject: [PATCH 03/70] NFSD: Update the NFSv3 ACCESS3res encoder to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfs3xdr.c | 50 ++++++++++++++++++++++++++++++++++++++++++----- fs/nfsd/vfs.h | 2 +- 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 75739861d235..9d6c989df6d8 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -383,6 +383,35 @@ encode_saved_post_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) return encode_fattr3(rqstp, p, fhp, &fhp->fh_post_attr); } +static bool +svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr, + const struct svc_fh *fhp) +{ + struct dentry *dentry = fhp->fh_dentry; + struct kstat stat; + + /* + * The inode may be NULL if the call failed because of a + * stale file handle. In this case, no attributes are + * returned. + */ + if (fhp->fh_no_wcc || !dentry || !d_really_is_positive(dentry)) + goto no_post_op_attrs; + if (fh_getattr(fhp, &stat) != nfs_ok) + goto no_post_op_attrs; + + if (xdr_stream_encode_item_present(xdr) < 0) + return false; + lease_get_mtime(d_inode(dentry), &stat.mtime); + if (!svcxdr_encode_fattr3(rqstp, xdr, fhp, &stat)) + return false; + + return true; + +no_post_op_attrs: + return xdr_stream_encode_item_absent(xdr) > 0; +} + /* * Encode post-operation attributes. * The inode may be NULL if the call failed because of a stale file @@ -835,13 +864,24 @@ nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p) int nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_accessres *resp = rqstp->rq_resp; - *p++ = resp->status; - p = encode_post_op_attr(rqstp, p, &resp->fh); - if (resp->status == 0) - *p++ = htonl(resp->access); - return xdr_ressize_check(rqstp, p); + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return 0; + if (xdr_stream_encode_u32(xdr, resp->access) < 0) + return 0; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return 0; + } + + return 1; } /* READLINK */ diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index a2442ebe5acf..b21b76e6b9a8 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -152,7 +152,7 @@ static inline void fh_drop_write(struct svc_fh *fh) } } -static inline __be32 fh_getattr(struct svc_fh *fh, struct kstat *stat) +static inline __be32 fh_getattr(const struct svc_fh *fh, struct kstat *stat) { struct path p = {.mnt = fh->fh_export->ex_path.mnt, .dentry = fh->fh_dentry}; From 5cf353354af1a385f29dec4609a1532d32c83a25 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 22 Oct 2020 14:46:58 -0400 Subject: [PATCH 04/70] NFSD: Update the NFSv3 LOOKUP3res encoder to use struct xdr_stream Also, clean up: Rename the encoder function to match the name of the result structure in RFC 1813, consistent with other encoder function names in nfs3xdr.c. "diropres" is an NFSv2 thingie. Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 2 +- fs/nfsd/nfs3xdr.c | 43 +++++++++++++++++++++++++++++++++++-------- fs/nfsd/xdr3.h | 2 +- 3 files changed, 37 insertions(+), 10 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 76a84481d526..49b018afc6ea 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -758,7 +758,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { [NFS3PROC_LOOKUP] = { .pc_func = nfsd3_proc_lookup, .pc_decode = nfs3svc_decode_diropargs, - .pc_encode = nfs3svc_encode_diropres, + .pc_encode = nfs3svc_encode_lookupres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_diropargs), .pc_ressize = sizeof(struct nfsd3_diropres), diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 9d6c989df6d8..2bb998b3834b 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -104,6 +104,23 @@ svcxdr_encode_nfsstat3(struct xdr_stream *xdr, __be32 status) return true; } +static bool +svcxdr_encode_nfs_fh3(struct xdr_stream *xdr, const struct svc_fh *fhp) +{ + u32 size = fhp->fh_handle.fh_size; + __be32 *p; + + p = xdr_reserve_space(xdr, XDR_UNIT + size); + if (!p) + return false; + *p++ = cpu_to_be32(size); + if (size) + p[XDR_QUADLEN(size) - 1] = 0; + memcpy(p, &fhp->fh_handle.fh_base, size); + + return true; +} + static __be32 * encode_fh(__be32 *p, struct svc_fh *fhp) { @@ -846,18 +863,28 @@ nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p) } /* LOOKUP */ -int -nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p) +int nfs3svc_encode_lookupres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_diropres *resp = rqstp->rq_resp; - *p++ = resp->status; - if (resp->status == 0) { - p = encode_fh(p, &resp->fh); - p = encode_post_op_attr(rqstp, p, &resp->fh); + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_nfs_fh3(xdr, &resp->fh)) + return 0; + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return 0; + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh)) + return 0; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh)) + return 0; } - p = encode_post_op_attr(rqstp, p, &resp->dirfh); - return xdr_ressize_check(rqstp, p); + + return 1; } /* ACCESS */ diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 0822981c61b9..7db4ee17aa20 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -282,7 +282,7 @@ int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *); int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *); int nfs3svc_encode_getattrres(struct svc_rqst *, __be32 *); int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *); -int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *); +int nfs3svc_encode_lookupres(struct svc_rqst *, __be32 *); int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *); int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *); int nfs3svc_encode_readres(struct svc_rqst *, __be32 *); From 70f8e839859a994e324e1d18889f8319bbd5bff9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 22 Oct 2020 15:12:38 -0400 Subject: [PATCH 05/70] NFSD: Update the NFSv3 wccstat result encoder to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfs3xdr.c | 68 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 65 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 2bb998b3834b..29b923a497f4 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -400,6 +400,35 @@ encode_saved_post_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) return encode_fattr3(rqstp, p, fhp, &fhp->fh_post_attr); } +static bool +svcxdr_encode_wcc_attr(struct xdr_stream *xdr, const struct svc_fh *fhp) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, XDR_UNIT * 6); + if (!p) + return false; + p = xdr_encode_hyper(p, (u64)fhp->fh_pre_size); + p = encode_nfstime3(p, &fhp->fh_pre_mtime); + encode_nfstime3(p, &fhp->fh_pre_ctime); + + return true; +} + +static bool +svcxdr_encode_pre_op_attr(struct xdr_stream *xdr, const struct svc_fh *fhp) +{ + if (!fhp->fh_pre_saved) { + if (xdr_stream_encode_item_absent(xdr) < 0) + return false; + return true; + } + + if (xdr_stream_encode_item_present(xdr) < 0) + return false; + return svcxdr_encode_wcc_attr(xdr, fhp); +} + static bool svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct svc_fh *fhp) @@ -460,6 +489,39 @@ nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fh return encode_post_op_attr(rqstp, p, fhp); } +/* + * Encode weak cache consistency data + */ +static bool +svcxdr_encode_wcc_data(struct svc_rqst *rqstp, struct xdr_stream *xdr, + const struct svc_fh *fhp) +{ + struct dentry *dentry = fhp->fh_dentry; + + if (!dentry || !d_really_is_positive(dentry) || !fhp->fh_post_saved) + goto neither; + + /* before */ + if (!svcxdr_encode_pre_op_attr(xdr, fhp)) + return false; + + /* after */ + if (xdr_stream_encode_item_present(xdr) < 0) + return false; + if (!svcxdr_encode_fattr3(rqstp, xdr, fhp, &fhp->fh_post_attr)) + return false; + + return true; + +neither: + if (xdr_stream_encode_item_absent(xdr) < 0) + return false; + if (!svcxdr_encode_post_op_attr(rqstp, xdr, fhp)) + return false; + + return true; +} + /* * Enocde weak cache consistency data */ @@ -855,11 +917,11 @@ nfs3svc_encode_getattrres(struct svc_rqst *rqstp, __be32 *p) int nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_attrstat *resp = rqstp->rq_resp; - *p++ = resp->status; - p = encode_wcc_data(rqstp, p, &resp->fh); - return xdr_ressize_check(rqstp, p); + return svcxdr_encode_nfsstat3(xdr, resp->status) && + svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh); } /* LOOKUP */ From 9a9c8923b3efd593d0e6a405efef9d58c6e6804b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 22 Oct 2020 15:18:40 -0400 Subject: [PATCH 06/70] NFSD: Update the NFSv3 READLINK3res encoder to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 5 +++-- fs/nfsd/nfs3xdr.c | 34 ++++++++++++++++++---------------- fs/nfsd/xdr3.h | 1 + 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 49b018afc6ea..e55a1d14ede2 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -126,14 +126,15 @@ nfsd3_proc_readlink(struct svc_rqst *rqstp) { struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd3_readlinkres *resp = rqstp->rq_resp; - char *buffer = page_address(*(rqstp->rq_next_page++)); dprintk("nfsd: READLINK(3) %s\n", SVCFH_fmt(&argp->fh)); /* Read the symlink. */ fh_copy(&resp->fh, &argp->fh); resp->len = NFS3_MAXPATHLEN; - resp->status = nfsd_readlink(rqstp, &resp->fh, buffer, &resp->len); + resp->pages = rqstp->rq_next_page++; + resp->status = nfsd_readlink(rqstp, &resp->fh, + page_address(*resp->pages), &resp->len); return rpc_success; } diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 29b923a497f4..352691c3e246 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -977,26 +977,28 @@ nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p) int nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_readlinkres *resp = rqstp->rq_resp; struct kvec *head = rqstp->rq_res.head; - *p++ = resp->status; - p = encode_post_op_attr(rqstp, p, &resp->fh); - if (resp->status == 0) { - *p++ = htonl(resp->len); - xdr_ressize_check(rqstp, p); - rqstp->rq_res.page_len = resp->len; - if (resp->len & 3) { - /* need to pad the tail */ - rqstp->rq_res.tail[0].iov_base = p; - *p = 0; - rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3); - } - if (svc_encode_result_payload(rqstp, head->iov_len, resp->len)) + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) return 0; - return 1; - } else - return xdr_ressize_check(rqstp, p); + if (xdr_stream_encode_u32(xdr, resp->len) < 0) + return 0; + xdr_write_pages(xdr, resp->pages, 0, resp->len); + if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0) + return 0; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return 0; + } + + return 1; } /* READ */ diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 7db4ee17aa20..1d633c5d5fa2 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -137,6 +137,7 @@ struct nfsd3_readlinkres { __be32 status; struct svc_fh fh; __u32 len; + struct page **pages; }; struct nfsd3_readres { From cc9bcdad7773c295375e66c892c7ac00524706f2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 22 Oct 2020 15:23:50 -0400 Subject: [PATCH 07/70] NFSD: Update the NFSv3 READ3res encode to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 1 + fs/nfsd/nfs3xdr.c | 43 ++++++++++++++++++++------------------ fs/nfsd/xdr3.h | 1 + include/linux/sunrpc/xdr.h | 20 ++++++++++++++++++ 4 files changed, 45 insertions(+), 20 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index e55a1d14ede2..93d196752f87 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -159,6 +159,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp) v = 0; len = argp->count; + resp->pages = rqstp->rq_next_page; while (len > 0) { struct page *page = *(rqstp->rq_next_page++); diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 352691c3e246..859cc6c51c1a 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -1005,30 +1005,33 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p) int nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_readres *resp = rqstp->rq_resp; struct kvec *head = rqstp->rq_res.head; - *p++ = resp->status; - p = encode_post_op_attr(rqstp, p, &resp->fh); - if (resp->status == 0) { - *p++ = htonl(resp->count); - *p++ = htonl(resp->eof); - *p++ = htonl(resp->count); /* xdr opaque count */ - xdr_ressize_check(rqstp, p); - /* now update rqstp->rq_res to reflect data as well */ - rqstp->rq_res.page_len = resp->count; - if (resp->count & 3) { - /* need to pad the tail */ - rqstp->rq_res.tail[0].iov_base = p; - *p = 0; - rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3); - } - if (svc_encode_result_payload(rqstp, head->iov_len, - resp->count)) + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) return 0; - return 1; - } else - return xdr_ressize_check(rqstp, p); + if (xdr_stream_encode_u32(xdr, resp->count) < 0) + return 0; + if (xdr_stream_encode_bool(xdr, resp->eof) < 0) + return 0; + if (xdr_stream_encode_u32(xdr, resp->count) < 0) + return 0; + xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base, + resp->count); + if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0) + return 0; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return 0; + } + + return 1; } /* WRITE */ diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 1d633c5d5fa2..8073350418ae 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -145,6 +145,7 @@ struct nfsd3_readres { struct svc_fh fh; unsigned long count; __u32 eof; + struct page **pages; }; struct nfsd3_writeres { diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 2bc75c167f00..9dda7171b7b4 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -394,6 +394,26 @@ static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr) return len; } +/** + * xdr_stream_encode_bool - Encode a "not present" list item + * @xdr: pointer to xdr_stream + * @n: boolean value to encode + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline int xdr_stream_encode_bool(struct xdr_stream *xdr, __u32 n) +{ + const size_t len = XDR_UNIT; + __be32 *p = xdr_reserve_space(xdr, len); + + if (unlikely(!p)) + return -EMSGSIZE; + *p = n ? xdr_one : xdr_zero; + return len; +} + /** * xdr_stream_encode_u32 - Encode a 32-bit integer * @xdr: pointer to xdr_stream From ecb7a085ac15a8844ebf12fca6ae51ce71ac9b3b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 22 Oct 2020 15:26:31 -0400 Subject: [PATCH 08/70] NFSD: Update the NFSv3 WRITE3res encoder to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfs3xdr.c | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 859cc6c51c1a..0191cbfc486b 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -131,6 +131,19 @@ encode_fh(__be32 *p, struct svc_fh *fhp) return p + XDR_QUADLEN(size); } +static bool +svcxdr_encode_writeverf3(struct xdr_stream *xdr, const __be32 *verf) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, NFS3_WRITEVERFSIZE); + if (!p) + return false; + memcpy(p, verf, NFS3_WRITEVERFSIZE); + + return true; +} + static bool svcxdr_decode_filename3(struct xdr_stream *xdr, char **name, unsigned int *len) { @@ -1038,17 +1051,28 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p) int nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_writeres *resp = rqstp->rq_resp; - *p++ = resp->status; - p = encode_wcc_data(rqstp, p, &resp->fh); - if (resp->status == 0) { - *p++ = htonl(resp->count); - *p++ = htonl(resp->committed); - *p++ = resp->verf[0]; - *p++ = resp->verf[1]; + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) + return 0; + if (xdr_stream_encode_u32(xdr, resp->count) < 0) + return 0; + if (xdr_stream_encode_u32(xdr, resp->committed) < 0) + return 0; + if (!svcxdr_encode_writeverf3(xdr, resp->verf)) + return 0; + break; + default: + if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) + return 0; } - return xdr_ressize_check(rqstp, p); + + return 1; } /* CREATE, MKDIR, SYMLINK, MKNOD */ From 78315b36781d259dcbdc102ff22c3f2f25712223 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 22 Oct 2020 15:27:23 -0400 Subject: [PATCH 09/70] NFSD: Update the NFSv3 CREATE family of encoders to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfs3xdr.c | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 0191cbfc486b..052376a65f72 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -121,6 +121,17 @@ svcxdr_encode_nfs_fh3(struct xdr_stream *xdr, const struct svc_fh *fhp) return true; } +static bool +svcxdr_encode_post_op_fh3(struct xdr_stream *xdr, const struct svc_fh *fhp) +{ + if (xdr_stream_encode_item_present(xdr) < 0) + return false; + if (!svcxdr_encode_nfs_fh3(xdr, fhp)) + return false; + + return true; +} + static __be32 * encode_fh(__be32 *p, struct svc_fh *fhp) { @@ -1079,16 +1090,26 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p) int nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_diropres *resp = rqstp->rq_resp; - *p++ = resp->status; - if (resp->status == 0) { - *p++ = xdr_one; - p = encode_fh(p, &resp->fh); - p = encode_post_op_attr(rqstp, p, &resp->fh); + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_fh3(xdr, &resp->fh)) + return 0; + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return 0; + if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->dirfh)) + return 0; + break; + default: + if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->dirfh)) + return 0; } - p = encode_wcc_data(rqstp, p, &resp->dirfh); - return xdr_ressize_check(rqstp, p); + + return 1; } /* RENAME */ From 89d79e9672dfa6d0cc416699c16f2d312da58ff2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 22 Oct 2020 15:33:05 -0400 Subject: [PATCH 10/70] NFSD: Update the NFSv3 RENAMEv3res encoder to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfs3xdr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 052376a65f72..1d52a69562b8 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -1116,12 +1116,12 @@ nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p) int nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_renameres *resp = rqstp->rq_resp; - *p++ = resp->status; - p = encode_wcc_data(rqstp, p, &resp->ffh); - p = encode_wcc_data(rqstp, p, &resp->tfh); - return xdr_ressize_check(rqstp, p); + return svcxdr_encode_nfsstat3(xdr, resp->status) && + svcxdr_encode_wcc_data(rqstp, xdr, &resp->ffh) && + svcxdr_encode_wcc_data(rqstp, xdr, &resp->tfh); } /* LINK */ From 4d74380a446f75eebb2171687d9b8baf0025bdf1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 22 Oct 2020 15:08:29 -0400 Subject: [PATCH 11/70] NFSD: Update the NFSv3 LINK3res encoder to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfs3xdr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 1d52a69562b8..e159e4557428 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -1128,12 +1128,12 @@ nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p) int nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_linkres *resp = rqstp->rq_resp; - *p++ = resp->status; - p = encode_post_op_attr(rqstp, p, &resp->fh); - p = encode_wcc_data(rqstp, p, &resp->tfh); - return xdr_ressize_check(rqstp, p); + return svcxdr_encode_nfsstat3(xdr, resp->status) && + svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh) && + svcxdr_encode_wcc_data(rqstp, xdr, &resp->tfh); } /* READDIR */ From 8b7044984fd6eeadf72285e3617116bd15e9e676 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 6 Nov 2020 13:08:45 -0500 Subject: [PATCH 12/70] NFSD: Update the NFSv3 FSSTAT3res encoder to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfs3xdr.c | 58 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index e159e4557428..e4a569e7216d 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -17,6 +17,13 @@ #define NFSDDBG_FACILITY NFSDDBG_XDR +/* + * Force construction of an empty post-op attr + */ +static const struct svc_fh nfs3svc_null_fh = { + .fh_no_wcc = true, +}; + /* * Mapping of S_IF* types to NFS file types */ @@ -1392,27 +1399,50 @@ nfs3svc_encode_entry_plus(void *cd, const char *name, return encode_entry(cd, name, namlen, offset, ino, d_type, 1); } +static bool +svcxdr_encode_fsstat3resok(struct xdr_stream *xdr, + const struct nfsd3_fsstatres *resp) +{ + const struct kstatfs *s = &resp->stats; + u64 bs = s->f_bsize; + __be32 *p; + + p = xdr_reserve_space(xdr, XDR_UNIT * 13); + if (!p) + return false; + p = xdr_encode_hyper(p, bs * s->f_blocks); /* total bytes */ + p = xdr_encode_hyper(p, bs * s->f_bfree); /* free bytes */ + p = xdr_encode_hyper(p, bs * s->f_bavail); /* user available bytes */ + p = xdr_encode_hyper(p, s->f_files); /* total inodes */ + p = xdr_encode_hyper(p, s->f_ffree); /* free inodes */ + p = xdr_encode_hyper(p, s->f_ffree); /* user available inodes */ + *p = cpu_to_be32(resp->invarsec); /* mean unchanged time */ + + return true; +} + /* FSSTAT */ int nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_fsstatres *resp = rqstp->rq_resp; - struct kstatfs *s = &resp->stats; - u64 bs = s->f_bsize; - *p++ = resp->status; - *p++ = xdr_zero; /* no post_op_attr */ - - if (resp->status == 0) { - p = xdr_encode_hyper(p, bs * s->f_blocks); /* total bytes */ - p = xdr_encode_hyper(p, bs * s->f_bfree); /* free bytes */ - p = xdr_encode_hyper(p, bs * s->f_bavail); /* user available bytes */ - p = xdr_encode_hyper(p, s->f_files); /* total inodes */ - p = xdr_encode_hyper(p, s->f_ffree); /* free inodes */ - p = xdr_encode_hyper(p, s->f_ffree); /* user available inodes */ - *p++ = htonl(resp->invarsec); /* mean unchanged time */ + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) + return 0; + if (!svcxdr_encode_fsstat3resok(xdr, resp)) + return 0; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) + return 0; } - return xdr_ressize_check(rqstp, p); + + return 1; } /* FSINFO */ From 0a139d1b7f327010acc36e8162936d3108c7addb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 22 Oct 2020 13:42:13 -0400 Subject: [PATCH 13/70] NFSD: Update the NFSv3 FSINFO3res encoder to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfs3xdr.c | 62 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 16 deletions(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index e4a569e7216d..514f53ad7302 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -24,6 +24,15 @@ static const struct svc_fh nfs3svc_null_fh = { .fh_no_wcc = true, }; +/* + * time_delta. {1, 0} means the server is accurate only + * to the nearest second. + */ +static const struct timespec64 nfs3svc_time_delta = { + .tv_sec = 1, + .tv_nsec = 0, +}; + /* * Mapping of S_IF* types to NFS file types */ @@ -1445,30 +1454,51 @@ nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p) return 1; } +static bool +svcxdr_encode_fsinfo3resok(struct xdr_stream *xdr, + const struct nfsd3_fsinfores *resp) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, XDR_UNIT * 12); + if (!p) + return false; + *p++ = cpu_to_be32(resp->f_rtmax); + *p++ = cpu_to_be32(resp->f_rtpref); + *p++ = cpu_to_be32(resp->f_rtmult); + *p++ = cpu_to_be32(resp->f_wtmax); + *p++ = cpu_to_be32(resp->f_wtpref); + *p++ = cpu_to_be32(resp->f_wtmult); + *p++ = cpu_to_be32(resp->f_dtpref); + p = xdr_encode_hyper(p, resp->f_maxfilesize); + p = encode_nfstime3(p, &nfs3svc_time_delta); + *p = cpu_to_be32(resp->f_properties); + + return true; +} + /* FSINFO */ int nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_fsinfores *resp = rqstp->rq_resp; - *p++ = resp->status; - *p++ = xdr_zero; /* no post_op_attr */ - - if (resp->status == 0) { - *p++ = htonl(resp->f_rtmax); - *p++ = htonl(resp->f_rtpref); - *p++ = htonl(resp->f_rtmult); - *p++ = htonl(resp->f_wtmax); - *p++ = htonl(resp->f_wtpref); - *p++ = htonl(resp->f_wtmult); - *p++ = htonl(resp->f_dtpref); - p = xdr_encode_hyper(p, resp->f_maxfilesize); - *p++ = xdr_one; - *p++ = xdr_zero; - *p++ = htonl(resp->f_properties); + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) + return 0; + if (!svcxdr_encode_fsinfo3resok(xdr, resp)) + return 0; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) + return 0; } - return xdr_ressize_check(rqstp, p); + return 1; } /* PATHCONF */ From ded04a587f6ceaaba3caefad4021f2212b46c9ff Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 6 Nov 2020 13:15:09 -0500 Subject: [PATCH 14/70] NFSD: Update the NFSv3 PATHCONF3res encoder to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfs3xdr.c | 44 ++++++++++++++++++++++++++++---------- include/linux/sunrpc/xdr.h | 18 ++++++++++++++-- 2 files changed, 49 insertions(+), 13 deletions(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 514f53ad7302..1467bba02e18 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -1501,25 +1501,47 @@ nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p) return 1; } +static bool +svcxdr_encode_pathconf3resok(struct xdr_stream *xdr, + const struct nfsd3_pathconfres *resp) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, XDR_UNIT * 6); + if (!p) + return false; + *p++ = cpu_to_be32(resp->p_link_max); + *p++ = cpu_to_be32(resp->p_name_max); + p = xdr_encode_bool(p, resp->p_no_trunc); + p = xdr_encode_bool(p, resp->p_chown_restricted); + p = xdr_encode_bool(p, resp->p_case_insensitive); + xdr_encode_bool(p, resp->p_case_preserving); + + return true; +} + /* PATHCONF */ int nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_pathconfres *resp = rqstp->rq_resp; - *p++ = resp->status; - *p++ = xdr_zero; /* no post_op_attr */ - - if (resp->status == 0) { - *p++ = htonl(resp->p_link_max); - *p++ = htonl(resp->p_name_max); - *p++ = htonl(resp->p_no_trunc); - *p++ = htonl(resp->p_chown_restricted); - *p++ = htonl(resp->p_case_insensitive); - *p++ = htonl(resp->p_case_preserving); + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) + return 0; + if (!svcxdr_encode_pathconf3resok(xdr, resp)) + return 0; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) + return 0; } - return xdr_ressize_check(rqstp, p); + return 1; } /* COMMIT */ diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 9dda7171b7b4..a965cbc136ad 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -395,7 +395,21 @@ static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr) } /** - * xdr_stream_encode_bool - Encode a "not present" list item + * xdr_encode_bool - Encode a boolean item + * @p: address in a buffer into which to encode + * @n: boolean value to encode + * + * Return value: + * Address of item following the encoded boolean + */ +static inline __be32 *xdr_encode_bool(__be32 *p, u32 n) +{ + *p = n ? xdr_one : xdr_zero; + return p++; +} + +/** + * xdr_stream_encode_bool - Encode a boolean item * @xdr: pointer to xdr_stream * @n: boolean value to encode * @@ -410,7 +424,7 @@ static inline int xdr_stream_encode_bool(struct xdr_stream *xdr, __u32 n) if (unlikely(!p)) return -EMSGSIZE; - *p = n ? xdr_one : xdr_zero; + xdr_encode_bool(p, n); return len; } From 5ef2826c761079e27904c85034df34e601b82d94 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 22 Oct 2020 15:35:46 -0400 Subject: [PATCH 15/70] NFSD: Update the NFSv3 COMMIT3res encoder to use struct xdr_stream As an additional clean up, encode_wcc_data() is removed because it is now no longer used. Signed-off-by: Chuck Lever --- fs/nfsd/nfs3xdr.c | 54 +++++++++++++---------------------------------- 1 file changed, 15 insertions(+), 39 deletions(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 1467bba02e18..eab14b52db20 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -432,14 +432,6 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, return p; } -static __be32 * -encode_saved_post_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) -{ - /* Attributes to follow */ - *p++ = xdr_one; - return encode_fattr3(rqstp, p, fhp, &fhp->fh_post_attr); -} - static bool svcxdr_encode_wcc_attr(struct xdr_stream *xdr, const struct svc_fh *fhp) { @@ -562,30 +554,6 @@ neither: return true; } -/* - * Enocde weak cache consistency data - */ -static __be32 * -encode_wcc_data(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) -{ - struct dentry *dentry = fhp->fh_dentry; - - if (dentry && d_really_is_positive(dentry) && fhp->fh_post_saved) { - if (fhp->fh_pre_saved) { - *p++ = xdr_one; - p = xdr_encode_hyper(p, (u64) fhp->fh_pre_size); - p = encode_time3(p, &fhp->fh_pre_mtime); - p = encode_time3(p, &fhp->fh_pre_ctime); - } else { - *p++ = xdr_zero; - } - return encode_saved_post_attr(rqstp, p, fhp); - } - /* no pre- or post-attrs */ - *p++ = xdr_zero; - return encode_post_op_attr(rqstp, p, fhp); -} - static bool fs_supports_change_attribute(struct super_block *sb) { return sb->s_flags & SB_I_VERSION || sb->s_export_op->fetch_iversion; @@ -1548,16 +1516,24 @@ nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p) int nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_commitres *resp = rqstp->rq_resp; - *p++ = resp->status; - p = encode_wcc_data(rqstp, p, &resp->fh); - /* Write verifier */ - if (resp->status == 0) { - *p++ = resp->verf[0]; - *p++ = resp->verf[1]; + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) + return 0; + if (!svcxdr_encode_writeverf3(xdr, resp->verf)) + return 0; + break; + default: + if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) + return 0; } - return xdr_ressize_check(rqstp, p); + + return 1; } /* From a161e6c76aeba835e475a2f27dbbe5c37e565e94 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 10 Nov 2020 09:57:14 -0500 Subject: [PATCH 16/70] NFSD: Add a helper that encodes NFSv3 directory offset cookies Refactor: De-duplicate identical code that handles encoding of directory offset cookies across page boundaries. Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 24 ++---------------------- fs/nfsd/nfs3xdr.c | 36 +++++++++++++++++++++++------------- fs/nfsd/xdr3.h | 2 ++ 3 files changed, 27 insertions(+), 35 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 93d196752f87..90566cd01bdc 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -495,17 +495,7 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp) count += PAGE_SIZE; } resp->count = count >> 2; - if (resp->offset) { - if (unlikely(resp->offset1)) { - /* we ended up with offset on a page boundary */ - *resp->offset = htonl(offset >> 32); - *resp->offset1 = htonl(offset & 0xffffffff); - resp->offset1 = NULL; - } else { - xdr_encode_hyper(resp->offset, offset); - } - resp->offset = NULL; - } + nfs3svc_encode_cookie3(resp, offset); return rpc_success; } @@ -560,17 +550,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp) count += PAGE_SIZE; } resp->count = count >> 2; - if (resp->offset) { - if (unlikely(resp->offset1)) { - /* we ended up with offset on a page boundary */ - *resp->offset = htonl(offset >> 32); - *resp->offset1 = htonl(offset & 0xffffffff); - resp->offset1 = NULL; - } else { - xdr_encode_hyper(resp->offset, offset); - } - resp->offset = NULL; - } + nfs3svc_encode_cookie3(resp, offset); out: return rpc_success; diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index eab14b52db20..e334a1454edb 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -1219,6 +1219,28 @@ out: return p; } +/** + * nfs3svc_encode_cookie3 - Encode a directory offset cookie + * @resp: readdir result context + * @offset: offset cookie to encode + * + */ +void nfs3svc_encode_cookie3(struct nfsd3_readdirres *resp, u64 offset) +{ + if (!resp->offset) + return; + + if (resp->offset1) { + /* we ended up with offset on a page boundary */ + *resp->offset = cpu_to_be32(offset >> 32); + *resp->offset1 = cpu_to_be32(offset & 0xffffffff); + resp->offset1 = NULL; + } else { + xdr_encode_hyper(resp->offset, offset); + } + resp->offset = NULL; +} + /* * Encode a directory entry. This one works for both normal readdir * and readdirplus. @@ -1244,19 +1266,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, int elen; /* estimated entry length in words */ int num_entry_words = 0; /* actual number of words */ - if (cd->offset) { - u64 offset64 = offset; - - if (unlikely(cd->offset1)) { - /* we ended up with offset on a page boundary */ - *cd->offset = htonl(offset64 >> 32); - *cd->offset1 = htonl(offset64 & 0xffffffff); - cd->offset1 = NULL; - } else { - xdr_encode_hyper(cd->offset, offset64); - } - cd->offset = NULL; - } + nfs3svc_encode_cookie3(cd, offset); /* dprintk("encode_entry(%.*s @%ld%s)\n", diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 8073350418ae..e76e9230827e 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -300,6 +300,8 @@ int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *); void nfs3svc_release_fhandle(struct svc_rqst *); void nfs3svc_release_fhandle2(struct svc_rqst *); + +void nfs3svc_encode_cookie3(struct nfsd3_readdirres *resp, u64 offset); int nfs3svc_encode_entry(void *, const char *name, int namlen, loff_t offset, u64 ino, unsigned int); From a1409e2de4f11034c8eb30775cc3e37039a4ef13 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 9 Nov 2020 13:13:21 -0500 Subject: [PATCH 17/70] NFSD: Count bytes instead of pages in the NFSv3 READDIR encoder Clean up: Counting the bytes used by each returned directory entry seems less brittle to me than trying to measure consumed pages after the fact. Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 31 ++----------------------------- fs/nfsd/nfs3xdr.c | 1 + 2 files changed, 3 insertions(+), 29 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 90566cd01bdc..791d77363acd 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -462,10 +462,7 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp) { struct nfsd3_readdirargs *argp = rqstp->rq_argp; struct nfsd3_readdirres *resp = rqstp->rq_resp; - int count = 0; loff_t offset; - struct page **p; - caddr_t page_addr = NULL; dprintk("nfsd: READDIR(3) %s %d bytes at %d\n", SVCFH_fmt(&argp->fh), @@ -476,6 +473,7 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp) /* Read directory and encode entries on the fly */ fh_copy(&resp->fh, &argp->fh); + resp->count = 0; resp->common.err = nfs_ok; resp->rqstp = rqstp; offset = argp->cookie; @@ -483,18 +481,6 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp) resp->status = nfsd_readdir(rqstp, &resp->fh, &offset, &resp->common, nfs3svc_encode_entry); memcpy(resp->verf, argp->verf, 8); - count = 0; - for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) { - page_addr = page_address(*p); - - if (((caddr_t)resp->buffer >= page_addr) && - ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) { - count += (caddr_t)resp->buffer - page_addr; - break; - } - count += PAGE_SIZE; - } - resp->count = count >> 2; nfs3svc_encode_cookie3(resp, offset); return rpc_success; @@ -509,10 +495,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp) { struct nfsd3_readdirargs *argp = rqstp->rq_argp; struct nfsd3_readdirres *resp = rqstp->rq_resp; - int count = 0; loff_t offset; - struct page **p; - caddr_t page_addr = NULL; dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n", SVCFH_fmt(&argp->fh), @@ -523,6 +506,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp) /* Read directory and encode entries on the fly */ fh_copy(&resp->fh, &argp->fh); + resp->count = 0; resp->common.err = nfs_ok; resp->rqstp = rqstp; offset = argp->cookie; @@ -539,17 +523,6 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp) resp->status = nfsd_readdir(rqstp, &resp->fh, &offset, &resp->common, nfs3svc_encode_entry_plus); memcpy(resp->verf, argp->verf, 8); - for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) { - page_addr = page_address(*p); - - if (((caddr_t)resp->buffer >= page_addr) && - ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) { - count += (caddr_t)resp->buffer - page_addr; - break; - } - count += PAGE_SIZE; - } - resp->count = count >> 2; nfs3svc_encode_cookie3(resp, offset); out: diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index e334a1454edb..523b2dca0494 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -1364,6 +1364,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, return -EINVAL; } + cd->count += num_entry_words; cd->buflen -= num_entry_words; cd->buffer = p; cd->common.err = nfs_ok; From e4ccfe3014de435984939a3d84b7f241d3b57b0d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 22 Oct 2020 19:31:48 -0400 Subject: [PATCH 18/70] NFSD: Update the NFSv3 READDIR3res encoder to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 3 ++- fs/nfsd/nfs3xdr.c | 52 ++++++++++++++++++++++++++++++---------------- fs/nfsd/xdr3.h | 1 + 3 files changed, 37 insertions(+), 19 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 791d77363acd..f1096aa0f47c 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -447,7 +447,8 @@ static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp, * and reserve room for the NULL ptr & eof flag (-2 words) */ resp->buflen = (count >> 2) - 2; - resp->buffer = page_address(*rqstp->rq_next_page); + resp->pages = rqstp->rq_next_page; + resp->buffer = page_address(*resp->pages); while (count > 0) { rqstp->rq_next_page++; count -= PAGE_SIZE; diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 523b2dca0494..3d076d3c5c7b 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -158,6 +158,19 @@ encode_fh(__be32 *p, struct svc_fh *fhp) return p + XDR_QUADLEN(size); } +static bool +svcxdr_encode_cookieverf3(struct xdr_stream *xdr, const __be32 *verf) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, NFS3_COOKIEVERFSIZE); + if (!p) + return false; + memcpy(p, verf, NFS3_COOKIEVERFSIZE); + + return true; +} + static bool svcxdr_encode_writeverf3(struct xdr_stream *xdr, const __be32 *verf) { @@ -1124,27 +1137,30 @@ nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p) int nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_readdirres *resp = rqstp->rq_resp; - *p++ = resp->status; - p = encode_post_op_attr(rqstp, p, &resp->fh); + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return 0; + if (!svcxdr_encode_cookieverf3(xdr, resp->verf)) + return 0; + xdr_write_pages(xdr, resp->pages, 0, resp->count << 2); + /* no more entries */ + if (xdr_stream_encode_item_absent(xdr) < 0) + return 0; + if (xdr_stream_encode_bool(xdr, resp->common.err == nfserr_eof) < 0) + return 0; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return 0; + } - if (resp->status == 0) { - /* stupid readdir cookie */ - memcpy(p, resp->verf, 8); p += 2; - xdr_ressize_check(rqstp, p); - if (rqstp->rq_res.head[0].iov_len + (2<<2) > PAGE_SIZE) - return 1; /*No room for trailer */ - rqstp->rq_res.page_len = (resp->count) << 2; - - /* add the 'tail' to the end of the 'head' page - page 0. */ - rqstp->rq_res.tail[0].iov_base = p; - *p++ = 0; /* no more entries */ - *p++ = htonl(resp->common.err == nfserr_eof); - rqstp->rq_res.tail[0].iov_len = 2<<2; - return 1; - } else - return xdr_ressize_check(rqstp, p); + return 1; } static __be32 * diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index e76e9230827e..a4cdd8ccb175 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -176,6 +176,7 @@ struct nfsd3_readdirres { struct svc_fh scratch; int count; __be32 verf[2]; + struct page **pages; struct readdir_cd common; __be32 * buffer; From 7f87fc2d34d475225e78b7f5c4eabb121f4282b2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 22 Oct 2020 19:46:58 -0400 Subject: [PATCH 19/70] NFSD: Update NFSv3 READDIR entry encoders to use struct xdr_stream The benefit of the xdr_stream helpers is that they transparently handle encoding an XDR data item that crosses page boundaries. Most of the open-coded logic to do that here can be eliminated. A sub-buffer and sub-stream are set up as a sink buffer for the directory entry encoder. As an entry is encoded, it is added to the end of the content in this buffer/stream. The total length of the directory list is tracked in the buffer's @len field. When it comes time to encode the Reply, the sub-buffer is merged into rq_res's page array at the correct place using xdr_write_pages(). Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 35 ++++++---- fs/nfsd/nfs3xdr.c | 166 +++++++++++++++++++++++++++++++++++++++++---- fs/nfsd/xdr3.h | 14 ++-- 3 files changed, 185 insertions(+), 30 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index f1096aa0f47c..bc64e95a168d 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -441,18 +441,30 @@ static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp, struct nfsd3_readdirres *resp, int count) { + struct xdr_buf *buf = &resp->dirlist; + struct xdr_stream *xdr = &resp->xdr; + count = min_t(u32, count, svc_max_payload(rqstp)); - /* Convert byte count to number of words (i.e. >> 2), - * and reserve room for the NULL ptr & eof flag (-2 words) */ - resp->buflen = (count >> 2) - 2; + memset(buf, 0, sizeof(*buf)); - resp->pages = rqstp->rq_next_page; - resp->buffer = page_address(*resp->pages); + /* Reserve room for the NULL ptr & eof flag (-2 words) */ + buf->buflen = count - XDR_UNIT * 2; + buf->pages = rqstp->rq_next_page; while (count > 0) { rqstp->rq_next_page++; count -= PAGE_SIZE; } + + /* This is xdr_init_encode(), but it assumes that + * the head kvec has already been consumed. */ + xdr_set_scratch_buffer(xdr, NULL, 0); + xdr->buf = buf; + xdr->page_ptr = buf->pages; + xdr->iov = NULL; + xdr->p = page_address(*buf->pages); + xdr->end = xdr->p + (PAGE_SIZE >> 2); + xdr->rqst = NULL; } /* @@ -471,16 +483,13 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp) nfsd3_init_dirlist_pages(rqstp, resp, argp->count); - /* Read directory and encode entries on the fly */ fh_copy(&resp->fh, &argp->fh); - - resp->count = 0; resp->common.err = nfs_ok; + resp->cookie_offset = 0; resp->rqstp = rqstp; offset = argp->cookie; - resp->status = nfsd_readdir(rqstp, &resp->fh, &offset, - &resp->common, nfs3svc_encode_entry); + &resp->common, nfs3svc_encode_entry3); memcpy(resp->verf, argp->verf, 8); nfs3svc_encode_cookie3(resp, offset); @@ -504,11 +513,9 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp) nfsd3_init_dirlist_pages(rqstp, resp, argp->count); - /* Read directory and encode entries on the fly */ fh_copy(&resp->fh, &argp->fh); - - resp->count = 0; resp->common.err = nfs_ok; + resp->cookie_offset = 0; resp->rqstp = rqstp; offset = argp->cookie; @@ -522,7 +529,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp) } resp->status = nfsd_readdir(rqstp, &resp->fh, &offset, - &resp->common, nfs3svc_encode_entry_plus); + &resp->common, nfs3svc_encode_entryplus3); memcpy(resp->verf, argp->verf, 8); nfs3svc_encode_cookie3(resp, offset); diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 3d076d3c5c7b..f38d845ac8a0 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -1139,6 +1139,7 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p) { struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_readdirres *resp = rqstp->rq_resp; + struct xdr_buf *dirlist = &resp->dirlist; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) return 0; @@ -1148,7 +1149,7 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p) return 0; if (!svcxdr_encode_cookieverf3(xdr, resp->verf)) return 0; - xdr_write_pages(xdr, resp->pages, 0, resp->count << 2); + xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len); /* no more entries */ if (xdr_stream_encode_item_absent(xdr) < 0) return 0; @@ -1240,21 +1241,18 @@ out: * @resp: readdir result context * @offset: offset cookie to encode * + * The buffer space for the offset cookie has already been reserved + * by svcxdr_encode_entry3_common(). */ void nfs3svc_encode_cookie3(struct nfsd3_readdirres *resp, u64 offset) { - if (!resp->offset) - return; + __be64 cookie = cpu_to_be64(offset); - if (resp->offset1) { - /* we ended up with offset on a page boundary */ - *resp->offset = cpu_to_be32(offset >> 32); - *resp->offset1 = cpu_to_be32(offset & 0xffffffff); - resp->offset1 = NULL; - } else { - xdr_encode_hyper(resp->offset, offset); - } - resp->offset = NULL; + if (!resp->cookie_offset) + return; + write_bytes_to_xdr_buf(&resp->dirlist, resp->cookie_offset, &cookie, + sizeof(cookie)); + resp->cookie_offset = 0; } /* @@ -1403,6 +1401,150 @@ nfs3svc_encode_entry_plus(void *cd, const char *name, return encode_entry(cd, name, namlen, offset, ino, d_type, 1); } +static bool +svcxdr_encode_entry3_common(struct nfsd3_readdirres *resp, const char *name, + int namlen, loff_t offset, u64 ino) +{ + struct xdr_buf *dirlist = &resp->dirlist; + struct xdr_stream *xdr = &resp->xdr; + + if (xdr_stream_encode_item_present(xdr) < 0) + return false; + /* fileid */ + if (xdr_stream_encode_u64(xdr, ino) < 0) + return false; + /* name */ + if (xdr_stream_encode_opaque(xdr, name, min(namlen, NFS3_MAXNAMLEN)) < 0) + return false; + /* cookie */ + resp->cookie_offset = dirlist->len; + if (xdr_stream_encode_u64(xdr, NFS_OFFSET_MAX) < 0) + return false; + + return true; +} + +/** + * nfs3svc_encode_entry3 - encode one NFSv3 READDIR entry + * @data: directory context + * @name: name of the object to be encoded + * @namlen: length of that name, in bytes + * @offset: the offset of the previous entry + * @ino: the fileid of this entry + * @d_type: unused + * + * Return values: + * %0: Entry was successfully encoded. + * %-EINVAL: An encoding problem occured, secondary status code in resp->common.err + * + * On exit, the following fields are updated: + * - resp->xdr + * - resp->common.err + * - resp->cookie_offset + */ +int nfs3svc_encode_entry3(void *data, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) +{ + struct readdir_cd *ccd = data; + struct nfsd3_readdirres *resp = container_of(ccd, + struct nfsd3_readdirres, + common); + unsigned int starting_length = resp->dirlist.len; + + /* The offset cookie for the previous entry */ + nfs3svc_encode_cookie3(resp, offset); + + if (!svcxdr_encode_entry3_common(resp, name, namlen, offset, ino)) + goto out_toosmall; + + xdr_commit_encode(&resp->xdr); + resp->common.err = nfs_ok; + return 0; + +out_toosmall: + resp->cookie_offset = 0; + resp->common.err = nfserr_toosmall; + resp->dirlist.len = starting_length; + return -EINVAL; +} + +static bool +svcxdr_encode_entry3_plus(struct nfsd3_readdirres *resp, const char *name, + int namlen, u64 ino) +{ + struct xdr_stream *xdr = &resp->xdr; + struct svc_fh *fhp = &resp->scratch; + bool result; + + result = false; + fh_init(fhp, NFS3_FHSIZE); + if (compose_entry_fh(resp, fhp, name, namlen, ino) != nfs_ok) + goto out_noattrs; + + if (!svcxdr_encode_post_op_attr(resp->rqstp, xdr, fhp)) + goto out; + if (!svcxdr_encode_post_op_fh3(xdr, fhp)) + goto out; + result = true; + +out: + fh_put(fhp); + return result; + +out_noattrs: + if (xdr_stream_encode_item_absent(xdr) < 0) + return false; + if (xdr_stream_encode_item_absent(xdr) < 0) + return false; + return true; +} + +/** + * nfs3svc_encode_entryplus3 - encode one NFSv3 READDIRPLUS entry + * @data: directory context + * @name: name of the object to be encoded + * @namlen: length of that name, in bytes + * @offset: the offset of the previous entry + * @ino: the fileid of this entry + * @d_type: unused + * + * Return values: + * %0: Entry was successfully encoded. + * %-EINVAL: An encoding problem occured, secondary status code in resp->common.err + * + * On exit, the following fields are updated: + * - resp->xdr + * - resp->common.err + * - resp->cookie_offset + */ +int nfs3svc_encode_entryplus3(void *data, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) +{ + struct readdir_cd *ccd = data; + struct nfsd3_readdirres *resp = container_of(ccd, + struct nfsd3_readdirres, + common); + unsigned int starting_length = resp->dirlist.len; + + /* The offset cookie for the previous entry */ + nfs3svc_encode_cookie3(resp, offset); + + if (!svcxdr_encode_entry3_common(resp, name, namlen, offset, ino)) + goto out_toosmall; + if (!svcxdr_encode_entry3_plus(resp, name, namlen, ino)) + goto out_toosmall; + + xdr_commit_encode(&resp->xdr); + resp->common.err = nfs_ok; + return 0; + +out_toosmall: + resp->cookie_offset = 0; + resp->common.err = nfserr_toosmall; + resp->dirlist.len = starting_length; + return -EINVAL; +} + static bool svcxdr_encode_fsstat3resok(struct xdr_stream *xdr, const struct nfsd3_fsstatres *resp) diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index a4cdd8ccb175..81dea78b0f17 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -169,20 +169,22 @@ struct nfsd3_linkres { }; struct nfsd3_readdirres { + /* Components of the reply */ __be32 status; struct svc_fh fh; - /* Just to save kmalloc on every readdirplus entry (svc_fh is a - * little large for the stack): */ - struct svc_fh scratch; int count; __be32 verf[2]; - struct page **pages; + /* Used to encode the reply's entry list */ + struct xdr_stream xdr; + struct xdr_buf dirlist; + struct svc_fh scratch; struct readdir_cd common; __be32 * buffer; int buflen; __be32 * offset; __be32 * offset1; + unsigned int cookie_offset; struct svc_rqst * rqstp; }; @@ -309,6 +311,10 @@ int nfs3svc_encode_entry(void *, const char *name, int nfs3svc_encode_entry_plus(void *, const char *name, int namlen, loff_t offset, u64 ino, unsigned int); +int nfs3svc_encode_entry3(void *data, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type); +int nfs3svc_encode_entryplus3(void *data, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type); /* Helper functions for NFSv3 ACL code */ __be32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp); From 1411934627f9fe31a36ac8c43179ce9b63edce5c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 13 Nov 2020 11:27:13 -0500 Subject: [PATCH 20/70] NFSD: Remove unused NFSv3 directory entry encoders Clean up. Signed-off-by: Chuck Lever --- fs/nfsd/nfs3xdr.c | 190 ---------------------------------------------- fs/nfsd/xdr3.h | 11 --- 2 files changed, 201 deletions(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index f38d845ac8a0..646bbfc5b779 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -148,16 +148,6 @@ svcxdr_encode_post_op_fh3(struct xdr_stream *xdr, const struct svc_fh *fhp) return true; } -static __be32 * -encode_fh(__be32 *p, struct svc_fh *fhp) -{ - unsigned int size = fhp->fh_handle.fh_size; - *p++ = htonl(size); - if (size) p[XDR_QUADLEN(size)-1]=0; - memcpy(p, &fhp->fh_handle.fh_base, size); - return p + XDR_QUADLEN(size); -} - static bool svcxdr_encode_cookieverf3(struct xdr_stream *xdr, const __be32 *verf) { @@ -1164,20 +1154,6 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p) return 1; } -static __be32 * -encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, - int namlen, u64 ino) -{ - *p++ = xdr_one; /* mark entry present */ - p = xdr_encode_hyper(p, ino); /* file id */ - p = xdr_encode_array(p, name, namlen);/* name length & name */ - - cd->offset = p; /* remember pointer */ - p = xdr_encode_hyper(p, NFS_OFFSET_MAX);/* offset of next entry */ - - return p; -} - static __be32 compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, const char *name, int namlen, u64 ino) @@ -1216,26 +1192,6 @@ out: return rv; } -static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen, u64 ino) -{ - struct svc_fh *fh = &cd->scratch; - __be32 err; - - fh_init(fh, NFS3_FHSIZE); - err = compose_entry_fh(cd, fh, name, namlen, ino); - if (err) { - *p++ = 0; - *p++ = 0; - goto out; - } - p = encode_post_op_attr(cd->rqstp, p, fh); - *p++ = xdr_one; /* yes, a file handle follows */ - p = encode_fh(p, fh); -out: - fh_put(fh); - return p; -} - /** * nfs3svc_encode_cookie3 - Encode a directory offset cookie * @resp: readdir result context @@ -1255,152 +1211,6 @@ void nfs3svc_encode_cookie3(struct nfsd3_readdirres *resp, u64 offset) resp->cookie_offset = 0; } -/* - * Encode a directory entry. This one works for both normal readdir - * and readdirplus. - * The normal readdir reply requires 2 (fileid) + 1 (stringlen) - * + string + 2 (cookie) + 1 (next) words, i.e. 6 + strlen. - * - * The readdirplus baggage is 1+21 words for post_op_attr, plus the - * file handle. - */ - -#define NFS3_ENTRY_BAGGAGE (2 + 1 + 2 + 1) -#define NFS3_ENTRYPLUS_BAGGAGE (1 + 21 + 1 + (NFS3_FHSIZE >> 2)) -static int -encode_entry(struct readdir_cd *ccd, const char *name, int namlen, - loff_t offset, u64 ino, unsigned int d_type, int plus) -{ - struct nfsd3_readdirres *cd = container_of(ccd, struct nfsd3_readdirres, - common); - __be32 *p = cd->buffer; - caddr_t curr_page_addr = NULL; - struct page ** page; - int slen; /* string (name) length */ - int elen; /* estimated entry length in words */ - int num_entry_words = 0; /* actual number of words */ - - nfs3svc_encode_cookie3(cd, offset); - - /* - dprintk("encode_entry(%.*s @%ld%s)\n", - namlen, name, (long) offset, plus? " plus" : ""); - */ - - /* truncate filename if too long */ - namlen = min(namlen, NFS3_MAXNAMLEN); - - slen = XDR_QUADLEN(namlen); - elen = slen + NFS3_ENTRY_BAGGAGE - + (plus? NFS3_ENTRYPLUS_BAGGAGE : 0); - - if (cd->buflen < elen) { - cd->common.err = nfserr_toosmall; - return -EINVAL; - } - - /* determine which page in rq_respages[] we are currently filling */ - for (page = cd->rqstp->rq_respages + 1; - page < cd->rqstp->rq_next_page; page++) { - curr_page_addr = page_address(*page); - - if (((caddr_t)cd->buffer >= curr_page_addr) && - ((caddr_t)cd->buffer < curr_page_addr + PAGE_SIZE)) - break; - } - - if ((caddr_t)(cd->buffer + elen) < (curr_page_addr + PAGE_SIZE)) { - /* encode entry in current page */ - - p = encode_entry_baggage(cd, p, name, namlen, ino); - - if (plus) - p = encode_entryplus_baggage(cd, p, name, namlen, ino); - num_entry_words = p - cd->buffer; - } else if (*(page+1) != NULL) { - /* temporarily encode entry into next page, then move back to - * current and next page in rq_respages[] */ - __be32 *p1, *tmp; - int len1, len2; - - /* grab next page for temporary storage of entry */ - p1 = tmp = page_address(*(page+1)); - - p1 = encode_entry_baggage(cd, p1, name, namlen, ino); - - if (plus) - p1 = encode_entryplus_baggage(cd, p1, name, namlen, ino); - - /* determine entry word length and lengths to go in pages */ - num_entry_words = p1 - tmp; - len1 = curr_page_addr + PAGE_SIZE - (caddr_t)cd->buffer; - if ((num_entry_words << 2) < len1) { - /* the actual number of words in the entry is less - * than elen and can still fit in the current page - */ - memmove(p, tmp, num_entry_words << 2); - p += num_entry_words; - - /* update offset */ - cd->offset = cd->buffer + (cd->offset - tmp); - } else { - unsigned int offset_r = (cd->offset - tmp) << 2; - - /* update pointer to offset location. - * This is a 64bit quantity, so we need to - * deal with 3 cases: - * - entirely in first page - * - entirely in second page - * - 4 bytes in each page - */ - if (offset_r + 8 <= len1) { - cd->offset = p + (cd->offset - tmp); - } else if (offset_r >= len1) { - cd->offset -= len1 >> 2; - } else { - /* sitting on the fence */ - BUG_ON(offset_r != len1 - 4); - cd->offset = p + (cd->offset - tmp); - cd->offset1 = tmp; - } - - len2 = (num_entry_words << 2) - len1; - - /* move from temp page to current and next pages */ - memmove(p, tmp, len1); - memmove(tmp, (caddr_t)tmp+len1, len2); - - p = tmp + (len2 >> 2); - } - } - else { - cd->common.err = nfserr_toosmall; - return -EINVAL; - } - - cd->count += num_entry_words; - cd->buflen -= num_entry_words; - cd->buffer = p; - cd->common.err = nfs_ok; - return 0; - -} - -int -nfs3svc_encode_entry(void *cd, const char *name, - int namlen, loff_t offset, u64 ino, unsigned int d_type) -{ - return encode_entry(cd, name, namlen, offset, ino, d_type, 0); -} - -int -nfs3svc_encode_entry_plus(void *cd, const char *name, - int namlen, loff_t offset, u64 ino, - unsigned int d_type) -{ - return encode_entry(cd, name, namlen, offset, ino, d_type, 1); -} - static bool svcxdr_encode_entry3_common(struct nfsd3_readdirres *resp, const char *name, int namlen, loff_t offset, u64 ino) diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 81dea78b0f17..b851458373db 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -172,7 +172,6 @@ struct nfsd3_readdirres { /* Components of the reply */ __be32 status; struct svc_fh fh; - int count; __be32 verf[2]; /* Used to encode the reply's entry list */ @@ -180,10 +179,6 @@ struct nfsd3_readdirres { struct xdr_buf dirlist; struct svc_fh scratch; struct readdir_cd common; - __be32 * buffer; - int buflen; - __be32 * offset; - __be32 * offset1; unsigned int cookie_offset; struct svc_rqst * rqstp; @@ -305,12 +300,6 @@ void nfs3svc_release_fhandle(struct svc_rqst *); void nfs3svc_release_fhandle2(struct svc_rqst *); void nfs3svc_encode_cookie3(struct nfsd3_readdirres *resp, u64 offset); -int nfs3svc_encode_entry(void *, const char *name, - int namlen, loff_t offset, u64 ino, - unsigned int); -int nfs3svc_encode_entry_plus(void *, const char *name, - int namlen, loff_t offset, u64 ino, - unsigned int); int nfs3svc_encode_entry3(void *data, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type); int nfs3svc_encode_entryplus3(void *data, const char *name, int namlen, From 76ed0dd96eeb2771b21bf5dcbd88326ef89ee0ed Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 15 Jan 2021 09:28:44 -0500 Subject: [PATCH 21/70] NFSD: Reduce svc_rqst::rq_pages churn during READDIR operations During NFSv2 and NFSv3 READDIR/PLUS operations, NFSD advances rq_next_page to the full size of the client-requested buffer, then releases all those pages at the end of the request. The next request to use that nfsd thread has to refill the pages. NFSD does this even when the dirlist in the reply is small. With NFSv3 clients that send READDIR operations with large buffer sizes, that can be 256 put_page/alloc_page pairs per READDIR request, even though those pages often remain unused. We can save some work by not releasing dirlist buffer pages that were not used to form the READDIR Reply. I've left the NFSv2 code alone since there are never more than three pages involved in an NFSv2 READDIR Reply. Eventually we should nail down why these pages need to be released at all in order to avoid allocating and releasing pages unnecessarily. Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index bc64e95a168d..17715a6c7a40 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -493,6 +493,9 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp) memcpy(resp->verf, argp->verf, 8); nfs3svc_encode_cookie3(resp, offset); + /* Recycle only pages that were part of the reply */ + rqstp->rq_next_page = resp->xdr.page_ptr + 1; + return rpc_success; } @@ -533,6 +536,9 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp) memcpy(resp->verf, argp->verf, 8); nfs3svc_encode_cookie3(resp, offset); + /* Recycle only pages that were part of the reply */ + rqstp->rq_next_page = resp->xdr.page_ptr + 1; + out: return rpc_success; } From a887eaed2a964754334cd3f8c5fe87e413e68fef Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Oct 2020 11:08:02 -0400 Subject: [PATCH 22/70] NFSD: Update the NFSv2 stat encoder to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfsproc.c | 10 +++++----- fs/nfsd/nfsxdr.c | 19 ++++++++++++++++--- fs/nfsd/xdr.h | 2 +- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index a8d5449dd0e9..59080e6793b9 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -736,7 +736,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { [NFSPROC_REMOVE] = { .pc_func = nfsd_proc_remove, .pc_decode = nfssvc_decode_diropargs, - .pc_encode = nfssvc_encode_stat, + .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_diropargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, @@ -746,7 +746,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { [NFSPROC_RENAME] = { .pc_func = nfsd_proc_rename, .pc_decode = nfssvc_decode_renameargs, - .pc_encode = nfssvc_encode_stat, + .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_renameargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, @@ -756,7 +756,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { [NFSPROC_LINK] = { .pc_func = nfsd_proc_link, .pc_decode = nfssvc_decode_linkargs, - .pc_encode = nfssvc_encode_stat, + .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_linkargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, @@ -766,7 +766,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { [NFSPROC_SYMLINK] = { .pc_func = nfsd_proc_symlink, .pc_decode = nfssvc_decode_symlinkargs, - .pc_encode = nfssvc_encode_stat, + .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_symlinkargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, @@ -787,7 +787,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { [NFSPROC_RMDIR] = { .pc_func = nfsd_proc_rmdir, .pc_decode = nfssvc_decode_diropargs, - .pc_encode = nfssvc_encode_stat, + .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_diropargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 5d79ef6a0c7f..10cd120044b3 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -26,6 +26,19 @@ static u32 nfs_ftypes[] = { * Basic NFSv2 data types (RFC 1094 Section 2.3) */ +static bool +svcxdr_encode_stat(struct xdr_stream *xdr, __be32 status) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, sizeof(status)); + if (!p) + return false; + *p = status; + + return true; +} + /** * svcxdr_decode_fhandle - Decode an NFSv2 file handle * @xdr: XDR stream positioned at an encoded NFSv2 FH @@ -390,12 +403,12 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p) */ int -nfssvc_encode_stat(struct svc_rqst *rqstp, __be32 *p) +nfssvc_encode_statres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_stat *resp = rqstp->rq_resp; - *p++ = resp->status; - return xdr_ressize_check(rqstp, p); + return svcxdr_encode_stat(xdr, resp->status); } int diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index 3018b52b6d5e..a74ffcf8b9c6 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -147,7 +147,7 @@ int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *); int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *); int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *); int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *); -int nfssvc_encode_stat(struct svc_rqst *, __be32 *); +int nfssvc_encode_statres(struct svc_rqst *, __be32 *); int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *); int nfssvc_encode_diropres(struct svc_rqst *, __be32 *); int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *); From 92b54a4fa4224e6116eb0d87a39dd05af23fcdfa Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Oct 2020 15:28:59 -0400 Subject: [PATCH 23/70] NFSD: Update the NFSv2 attrstat encoder to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfsproc.c | 6 ++-- fs/nfsd/nfsxdr.c | 90 ++++++++++++++++++++++++++++++++++++++++++----- fs/nfsd/xdr.h | 2 +- 3 files changed, 86 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 59080e6793b9..2ae6409ee39e 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -640,7 +640,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { [NFSPROC_GETATTR] = { .pc_func = nfsd_proc_getattr, .pc_decode = nfssvc_decode_fhandleargs, - .pc_encode = nfssvc_encode_attrstat, + .pc_encode = nfssvc_encode_attrstatres, .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd_attrstat), @@ -651,7 +651,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { [NFSPROC_SETATTR] = { .pc_func = nfsd_proc_setattr, .pc_decode = nfssvc_decode_sattrargs, - .pc_encode = nfssvc_encode_attrstat, + .pc_encode = nfssvc_encode_attrstatres, .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd_sattrargs), .pc_ressize = sizeof(struct nfsd_attrstat), @@ -714,7 +714,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { [NFSPROC_WRITE] = { .pc_func = nfsd_proc_write, .pc_decode = nfssvc_decode_writeargs, - .pc_encode = nfssvc_encode_attrstat, + .pc_encode = nfssvc_encode_attrstatres, .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd_writeargs), .pc_ressize = sizeof(struct nfsd_attrstat), diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 10cd120044b3..65c8f8f31444 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -14,7 +14,7 @@ /* * Mapping of S_IF* types to NFS file types */ -static u32 nfs_ftypes[] = { +static const u32 nfs_ftypes[] = { NFNON, NFCHR, NFCHR, NFBAD, NFDIR, NFBAD, NFBLK, NFBAD, NFREG, NFBAD, NFLNK, NFBAD, @@ -70,6 +70,17 @@ encode_fh(__be32 *p, struct svc_fh *fhp) return p + (NFS_FHSIZE>> 2); } +static __be32 * +encode_timeval(__be32 *p, const struct timespec64 *time) +{ + *p++ = cpu_to_be32((u32)time->tv_sec); + if (time->tv_nsec) + *p++ = cpu_to_be32(time->tv_nsec / NSEC_PER_USEC); + else + *p++ = xdr_zero; + return p; +} + static bool svcxdr_decode_filename(struct xdr_stream *xdr, char **name, unsigned int *len) { @@ -233,6 +244,64 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, return p; } +static int +svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr, + const struct svc_fh *fhp, const struct kstat *stat) +{ + struct user_namespace *userns = nfsd_user_namespace(rqstp); + struct dentry *dentry = fhp->fh_dentry; + int type = stat->mode & S_IFMT; + struct timespec64 time; + __be32 *p; + u32 fsid; + + p = xdr_reserve_space(xdr, XDR_UNIT * 17); + if (!p) + return 0; + + *p++ = cpu_to_be32(nfs_ftypes[type >> 12]); + *p++ = cpu_to_be32((u32)stat->mode); + *p++ = cpu_to_be32((u32)stat->nlink); + *p++ = cpu_to_be32((u32)from_kuid_munged(userns, stat->uid)); + *p++ = cpu_to_be32((u32)from_kgid_munged(userns, stat->gid)); + + if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) + *p++ = cpu_to_be32(NFS_MAXPATHLEN); + else + *p++ = cpu_to_be32((u32) stat->size); + *p++ = cpu_to_be32((u32) stat->blksize); + if (S_ISCHR(type) || S_ISBLK(type)) + *p++ = cpu_to_be32(new_encode_dev(stat->rdev)); + else + *p++ = cpu_to_be32(0xffffffff); + *p++ = cpu_to_be32((u32)stat->blocks); + + switch (fsid_source(fhp)) { + case FSIDSOURCE_FSID: + fsid = (u32)fhp->fh_export->ex_fsid; + break; + case FSIDSOURCE_UUID: + fsid = ((u32 *)fhp->fh_export->ex_uuid)[0]; + fsid ^= ((u32 *)fhp->fh_export->ex_uuid)[1]; + fsid ^= ((u32 *)fhp->fh_export->ex_uuid)[2]; + fsid ^= ((u32 *)fhp->fh_export->ex_uuid)[3]; + break; + default: + fsid = new_encode_dev(stat->dev); + break; + } + *p++ = cpu_to_be32(fsid); + + *p++ = cpu_to_be32((u32)stat->ino); + p = encode_timeval(p, &stat->atime); + time = stat->mtime; + lease_get_mtime(d_inode(dentry), &time); + p = encode_timeval(p, &time); + encode_timeval(p, &stat->ctime); + + return 1; +} + /* Helper function for NFSv2 ACL code */ __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat) { @@ -412,16 +481,21 @@ nfssvc_encode_statres(struct svc_rqst *rqstp, __be32 *p) } int -nfssvc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p) +nfssvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_attrstat *resp = rqstp->rq_resp; - *p++ = resp->status; - if (resp->status != nfs_ok) - goto out; - p = encode_fattr(rqstp, p, &resp->fh, &resp->stat); -out: - return xdr_ressize_check(rqstp, p); + if (!svcxdr_encode_stat(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) + return 0; + break; + } + + return 1; } int diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index a74ffcf8b9c6..a9b4ee6d098b 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -148,7 +148,7 @@ int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *); int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *); int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *); int nfssvc_encode_statres(struct svc_rqst *, __be32 *); -int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *); +int nfssvc_encode_attrstatres(struct svc_rqst *, __be32 *); int nfssvc_encode_diropres(struct svc_rqst *, __be32 *); int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *); int nfssvc_encode_readres(struct svc_rqst *, __be32 *); From e3b4ef221ac57c08341c97a10c8a81c041f76716 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Oct 2020 16:44:16 -0400 Subject: [PATCH 24/70] NFSD: Update the NFSv2 diropres encoder to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfsxdr.c | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 65c8f8f31444..989144b0d5be 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -63,11 +63,17 @@ svcxdr_decode_fhandle(struct xdr_stream *xdr, struct svc_fh *fhp) return true; } -static __be32 * -encode_fh(__be32 *p, struct svc_fh *fhp) +static bool +svcxdr_encode_fhandle(struct xdr_stream *xdr, const struct svc_fh *fhp) { + __be32 *p; + + p = xdr_reserve_space(xdr, NFS_FHSIZE); + if (!p) + return false; memcpy(p, &fhp->fh_handle.fh_base, NFS_FHSIZE); - return p + (NFS_FHSIZE>> 2); + + return true; } static __be32 * @@ -244,7 +250,7 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, return p; } -static int +static bool svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct svc_fh *fhp, const struct kstat *stat) { @@ -257,7 +263,7 @@ svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr, p = xdr_reserve_space(xdr, XDR_UNIT * 17); if (!p) - return 0; + return false; *p++ = cpu_to_be32(nfs_ftypes[type >> 12]); *p++ = cpu_to_be32((u32)stat->mode); @@ -299,7 +305,7 @@ svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr, p = encode_timeval(p, &time); encode_timeval(p, &stat->ctime); - return 1; + return true; } /* Helper function for NFSv2 ACL code */ @@ -501,15 +507,21 @@ nfssvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p) int nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_diropres *resp = rqstp->rq_resp; - *p++ = resp->status; - if (resp->status != nfs_ok) - goto out; - p = encode_fh(p, &resp->fh); - p = encode_fattr(rqstp, p, &resp->fh, &resp->stat); -out: - return xdr_ressize_check(rqstp, p); + if (!svcxdr_encode_stat(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_fhandle(xdr, &resp->fh)) + return 0; + if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) + return 0; + break; + } + + return 1; } int From d9014b0f8fae11f22a3d356553844e06ddcdce4a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Oct 2020 15:41:09 -0400 Subject: [PATCH 25/70] NFSD: Update the NFSv2 READLINK result encoder to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfsproc.c | 5 +++-- fs/nfsd/nfsxdr.c | 26 ++++++++++++-------------- fs/nfsd/xdr.h | 1 + 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 2ae6409ee39e..f55080482997 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -151,13 +151,14 @@ nfsd_proc_readlink(struct svc_rqst *rqstp) { struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd_readlinkres *resp = rqstp->rq_resp; - char *buffer = page_address(*(rqstp->rq_next_page++)); dprintk("nfsd: READLINK %s\n", SVCFH_fmt(&argp->fh)); /* Read the symlink. */ resp->len = NFS_MAXPATHLEN; - resp->status = nfsd_readlink(rqstp, &argp->fh, buffer, &resp->len); + resp->page = *(rqstp->rq_next_page++); + resp->status = nfsd_readlink(rqstp, &argp->fh, + page_address(resp->page), &resp->len); fh_put(&argp->fh); return rpc_success; diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 989144b0d5be..74d9d11949c6 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -527,24 +527,22 @@ nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p) int nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_readlinkres *resp = rqstp->rq_resp; struct kvec *head = rqstp->rq_res.head; - *p++ = resp->status; - if (resp->status != nfs_ok) - return xdr_ressize_check(rqstp, p); - - *p++ = htonl(resp->len); - xdr_ressize_check(rqstp, p); - rqstp->rq_res.page_len = resp->len; - if (resp->len & 3) { - /* need to pad the tail */ - rqstp->rq_res.tail[0].iov_base = p; - *p = 0; - rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3); - } - if (svc_encode_result_payload(rqstp, head->iov_len, resp->len)) + if (!svcxdr_encode_stat(xdr, resp->status)) return 0; + switch (resp->status) { + case nfs_ok: + if (xdr_stream_encode_u32(xdr, resp->len) < 0) + return 0; + xdr_write_pages(xdr, &resp->page, 0, resp->len); + if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0) + return 0; + break; + } + return 1; } diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index a9b4ee6d098b..b868565c471f 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -94,6 +94,7 @@ struct nfsd_diropres { struct nfsd_readlinkres { __be32 status; int len; + struct page *page; }; struct nfsd_readres { From a6f8d9dc9e44b51303d9abde4643460137d19b28 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Oct 2020 16:40:11 -0400 Subject: [PATCH 26/70] NFSD: Update the NFSv2 READ result encoder to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfsproc.c | 1 + fs/nfsd/nfsxdr.c | 32 +++++++++++++++----------------- fs/nfsd/xdr.h | 1 + 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index f55080482997..2088bb0887ba 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -185,6 +185,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) v = 0; len = argp->count; + resp->pages = rqstp->rq_next_page; while (len > 0) { struct page *page = *(rqstp->rq_next_page++); diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 74d9d11949c6..d6d7d07dbb1b 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -549,27 +549,25 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p) int nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_readres *resp = rqstp->rq_resp; struct kvec *head = rqstp->rq_res.head; - *p++ = resp->status; - if (resp->status != nfs_ok) - return xdr_ressize_check(rqstp, p); - - p = encode_fattr(rqstp, p, &resp->fh, &resp->stat); - *p++ = htonl(resp->count); - xdr_ressize_check(rqstp, p); - - /* now update rqstp->rq_res to reflect data as well */ - rqstp->rq_res.page_len = resp->count; - if (resp->count & 3) { - /* need to pad the tail */ - rqstp->rq_res.tail[0].iov_base = p; - *p = 0; - rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3); - } - if (svc_encode_result_payload(rqstp, head->iov_len, resp->count)) + if (!svcxdr_encode_stat(xdr, resp->status)) return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) + return 0; + if (xdr_stream_encode_u32(xdr, resp->count) < 0) + return 0; + xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base, + resp->count); + if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0) + return 0; + break; + } + return 1; } diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index b868565c471f..277b74c511ce 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -102,6 +102,7 @@ struct nfsd_readres { struct svc_fh fh; unsigned long count; struct kstat stat; + struct page **pages; }; struct nfsd_readdirres { From bf15229f2ced4f14946eef958336f764e30f8efb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Oct 2020 19:01:38 -0400 Subject: [PATCH 27/70] NFSD: Update the NFSv2 STATFS result encoder to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfsxdr.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index d6d7d07dbb1b..39d296aecd3e 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -592,19 +592,26 @@ nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p) int nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_statfsres *resp = rqstp->rq_resp; struct kstatfs *stat = &resp->stats; - *p++ = resp->status; - if (resp->status != nfs_ok) - return xdr_ressize_check(rqstp, p); + if (!svcxdr_encode_stat(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + p = xdr_reserve_space(xdr, XDR_UNIT * 5); + if (!p) + return 0; + *p++ = cpu_to_be32(NFSSVC_MAXBLKSIZE_V2); + *p++ = cpu_to_be32(stat->f_bsize); + *p++ = cpu_to_be32(stat->f_blocks); + *p++ = cpu_to_be32(stat->f_bfree); + *p = cpu_to_be32(stat->f_bavail); + break; + } - *p++ = htonl(NFSSVC_MAXBLKSIZE_V2); /* max transfer size */ - *p++ = htonl(stat->f_bsize); - *p++ = htonl(stat->f_blocks); - *p++ = htonl(stat->f_bfree); - *p++ = htonl(stat->f_bavail); - return xdr_ressize_check(rqstp, p); + return 1; } int From d52532002ffa217ad3fa4c3ba86c95203d21dd21 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 13 Nov 2020 16:53:17 -0500 Subject: [PATCH 28/70] NFSD: Add a helper that encodes NFSv3 directory offset cookies Refactor: Add helper function similar to nfs3svc_encode_cookie3(). Signed-off-by: Chuck Lever --- fs/nfsd/nfsproc.c | 3 +-- fs/nfsd/nfsxdr.c | 18 ++++++++++++++++-- fs/nfsd/xdr.h | 1 + 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 2088bb0887ba..5a0dd6e23c85 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -595,8 +595,7 @@ nfsd_proc_readdir(struct svc_rqst *rqstp) &resp->common, nfssvc_encode_entry); resp->count = resp->buffer - buffer; - if (resp->offset) - *resp->offset = htonl(offset); + nfssvc_encode_nfscookie(resp, offset); fh_put(&argp->fh); return rpc_success; diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 39d296aecd3e..a87b21cfe0d0 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -614,6 +614,21 @@ nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p) return 1; } +/** + * nfssvc_encode_nfscookie - Encode a directory offset cookie + * @resp: readdir result context + * @offset: offset cookie to encode + * + */ +void nfssvc_encode_nfscookie(struct nfsd_readdirres *resp, u32 offset) +{ + if (!resp->offset) + return; + + *resp->offset = cpu_to_be32(offset); + resp->offset = NULL; +} + int nfssvc_encode_entry(void *ccdv, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) @@ -632,8 +647,7 @@ nfssvc_encode_entry(void *ccdv, const char *name, cd->common.err = nfserr_fbig; return -EINVAL; } - if (cd->offset) - *cd->offset = htonl(offset); + nfssvc_encode_nfscookie(cd, offset); /* truncate filename */ namlen = min(namlen, NFS2_MAXNAMLEN); diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index 277b74c511ce..651de13e62fe 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -157,6 +157,7 @@ int nfssvc_encode_readres(struct svc_rqst *, __be32 *); int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *); int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *); +void nfssvc_encode_nfscookie(struct nfsd_readdirres *resp, u32 offset); int nfssvc_encode_entry(void *, const char *name, int namlen, loff_t offset, u64 ino, unsigned int); From 8141d6a2bb6c655ff0c0b81ced80d9025f03e926 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 13 Nov 2020 16:57:44 -0500 Subject: [PATCH 29/70] NFSD: Count bytes instead of pages in the NFSv2 READDIR encoder Clean up: Counting the bytes used by each returned directory entry seems less brittle to me than trying to measure consumed pages after the fact. Signed-off-by: Chuck Lever --- fs/nfsd/nfsproc.c | 4 ---- fs/nfsd/nfsxdr.c | 3 ++- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 5a0dd6e23c85..1acff9f4aaf1 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -578,14 +578,12 @@ nfsd_proc_readdir(struct svc_rqst *rqstp) struct nfsd_readdirargs *argp = rqstp->rq_argp; struct nfsd_readdirres *resp = rqstp->rq_resp; loff_t offset; - __be32 *buffer; dprintk("nfsd: READDIR %s %d bytes at %d\n", SVCFH_fmt(&argp->fh), argp->count, argp->cookie); nfsd_init_dirlist_pages(rqstp, resp, argp->count); - buffer = resp->buffer; resp->offset = NULL; resp->common.err = nfs_ok; @@ -593,8 +591,6 @@ nfsd_proc_readdir(struct svc_rqst *rqstp) offset = argp->cookie; resp->status = nfsd_readdir(rqstp, &argp->fh, &offset, &resp->common, nfssvc_encode_entry); - - resp->count = resp->buffer - buffer; nfssvc_encode_nfscookie(resp, offset); fh_put(&argp->fh); diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index a87b21cfe0d0..8ae23ed6dc5d 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -584,7 +584,7 @@ nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p) p = resp->buffer; *p++ = 0; /* no more entries */ *p++ = htonl((resp->common.err == nfserr_eof)); - rqstp->rq_res.page_len = (((unsigned long)p-1) & ~PAGE_MASK)+1; + rqstp->rq_res.page_len = resp->count << 2; return 1; } @@ -667,6 +667,7 @@ nfssvc_encode_entry(void *ccdv, const char *name, cd->offset = p; /* remember pointer */ *p++ = htonl(~0U); /* offset of next entry */ + cd->count += p - cd->buffer; cd->buflen = buflen; cd->buffer = p; cd->common.err = nfs_ok; From 94c8f8c682a6497af7ea71351b18f637c6337d42 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Oct 2020 16:49:01 -0400 Subject: [PATCH 30/70] NFSD: Update the NFSv2 READDIR result encoder to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfsxdr.c | 22 +++++++++++++--------- fs/nfsd/xdr.h | 1 + 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 8ae23ed6dc5d..9522e5c5f49d 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -574,17 +574,21 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p) int nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_readdirres *resp = rqstp->rq_resp; - *p++ = resp->status; - if (resp->status != nfs_ok) - return xdr_ressize_check(rqstp, p); - - xdr_ressize_check(rqstp, p); - p = resp->buffer; - *p++ = 0; /* no more entries */ - *p++ = htonl((resp->common.err == nfserr_eof)); - rqstp->rq_res.page_len = resp->count << 2; + if (!svcxdr_encode_stat(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + xdr_write_pages(xdr, &resp->page, 0, resp->count << 2); + /* no more entries */ + if (xdr_stream_encode_item_absent(xdr) < 0) + return 0; + if (xdr_stream_encode_bool(xdr, resp->common.err == nfserr_eof) < 0) + return 0; + break; + } return 1; } diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index 651de13e62fe..69a6efc71ecb 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -114,6 +114,7 @@ struct nfsd_readdirres { __be32 * buffer; int buflen; __be32 * offset; + struct page *page; }; struct nfsd_statfsres { From f5dcccd647da513a89f3b6ca392b0c1eb050b9fc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 14 Nov 2020 13:45:35 -0500 Subject: [PATCH 31/70] NFSD: Update the NFSv2 READDIR entry encoder to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfsproc.c | 26 +++++++++++---- fs/nfsd/nfsxdr.c | 81 ++++++++++++++++++++++++++++++++++++++++++++--- fs/nfsd/xdr.h | 7 ++++ 3 files changed, 103 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 1acff9f4aaf1..86d438fbd576 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -559,14 +559,27 @@ static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp, struct nfsd_readdirres *resp, int count) { + struct xdr_buf *buf = &resp->dirlist; + struct xdr_stream *xdr = &resp->xdr; + count = min_t(u32, count, PAGE_SIZE); - /* Convert byte count to number of words (i.e. >> 2), - * and reserve room for the NULL ptr & eof flag (-2 words) */ - resp->buflen = (count >> 2) - 2; + memset(buf, 0, sizeof(*buf)); - resp->buffer = page_address(*rqstp->rq_next_page); + /* Reserve room for the NULL ptr & eof flag (-2 words) */ + buf->buflen = count - sizeof(__be32) * 2; + buf->pages = rqstp->rq_next_page; rqstp->rq_next_page++; + + /* This is xdr_init_encode(), but it assumes that + * the head kvec has already been consumed. */ + xdr_set_scratch_buffer(xdr, NULL, 0); + xdr->buf = buf; + xdr->page_ptr = buf->pages; + xdr->iov = NULL; + xdr->p = page_address(*buf->pages); + xdr->end = xdr->p + (PAGE_SIZE >> 2); + xdr->rqst = NULL; } /* @@ -585,12 +598,11 @@ nfsd_proc_readdir(struct svc_rqst *rqstp) nfsd_init_dirlist_pages(rqstp, resp, argp->count); - resp->offset = NULL; resp->common.err = nfs_ok; - /* Read directory and encode entries on the fly */ + resp->cookie_offset = 0; offset = argp->cookie; resp->status = nfsd_readdir(rqstp, &argp->fh, &offset, - &resp->common, nfssvc_encode_entry); + &resp->common, nfs2svc_encode_entry); nfssvc_encode_nfscookie(resp, offset); fh_put(&argp->fh); diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 9522e5c5f49d..1102d40ded03 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -576,12 +576,13 @@ nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p) { struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_readdirres *resp = rqstp->rq_resp; + struct xdr_buf *dirlist = &resp->dirlist; if (!svcxdr_encode_stat(xdr, resp->status)) return 0; switch (resp->status) { case nfs_ok: - xdr_write_pages(xdr, &resp->page, 0, resp->count << 2); + xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len); /* no more entries */ if (xdr_stream_encode_item_absent(xdr) < 0) return 0; @@ -623,14 +624,86 @@ nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p) * @resp: readdir result context * @offset: offset cookie to encode * + * The buffer space for the offset cookie has already been reserved + * by svcxdr_encode_entry_common(). */ void nfssvc_encode_nfscookie(struct nfsd_readdirres *resp, u32 offset) { - if (!resp->offset) + __be32 cookie = cpu_to_be32(offset); + + if (!resp->cookie_offset) return; - *resp->offset = cpu_to_be32(offset); - resp->offset = NULL; + write_bytes_to_xdr_buf(&resp->dirlist, resp->cookie_offset, &cookie, + sizeof(cookie)); + resp->cookie_offset = 0; +} + +static bool +svcxdr_encode_entry_common(struct nfsd_readdirres *resp, const char *name, + int namlen, loff_t offset, u64 ino) +{ + struct xdr_buf *dirlist = &resp->dirlist; + struct xdr_stream *xdr = &resp->xdr; + + if (xdr_stream_encode_item_present(xdr) < 0) + return false; + /* fileid */ + if (xdr_stream_encode_u32(xdr, (u32)ino) < 0) + return false; + /* name */ + if (xdr_stream_encode_opaque(xdr, name, min(namlen, NFS2_MAXNAMLEN)) < 0) + return false; + /* cookie */ + resp->cookie_offset = dirlist->len; + if (xdr_stream_encode_u32(xdr, ~0U) < 0) + return false; + + return true; +} + +/** + * nfs2svc_encode_entry - encode one NFSv2 READDIR entry + * @data: directory context + * @name: name of the object to be encoded + * @namlen: length of that name, in bytes + * @offset: the offset of the previous entry + * @ino: the fileid of this entry + * @d_type: unused + * + * Return values: + * %0: Entry was successfully encoded. + * %-EINVAL: An encoding problem occured, secondary status code in resp->common.err + * + * On exit, the following fields are updated: + * - resp->xdr + * - resp->common.err + * - resp->cookie_offset + */ +int nfs2svc_encode_entry(void *data, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) +{ + struct readdir_cd *ccd = data; + struct nfsd_readdirres *resp = container_of(ccd, + struct nfsd_readdirres, + common); + unsigned int starting_length = resp->dirlist.len; + + /* The offset cookie for the previous entry */ + nfssvc_encode_nfscookie(resp, offset); + + if (!svcxdr_encode_entry_common(resp, name, namlen, offset, ino)) + goto out_toosmall; + + xdr_commit_encode(&resp->xdr); + resp->common.err = nfs_ok; + return 0; + +out_toosmall: + resp->cookie_offset = 0; + resp->common.err = nfserr_toosmall; + resp->dirlist.len = starting_length; + return -EINVAL; } int diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index 69a6efc71ecb..082f262a1bf9 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -106,15 +106,20 @@ struct nfsd_readres { }; struct nfsd_readdirres { + /* Components of the reply */ __be32 status; int count; + /* Used to encode the reply's entry list */ + struct xdr_stream xdr; + struct xdr_buf dirlist; struct readdir_cd common; __be32 * buffer; int buflen; __be32 * offset; struct page *page; + unsigned int cookie_offset; }; struct nfsd_statfsres { @@ -159,6 +164,8 @@ int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *); int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *); void nfssvc_encode_nfscookie(struct nfsd_readdirres *resp, u32 offset); +int nfs2svc_encode_entry(void *data, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type); int nfssvc_encode_entry(void *, const char *name, int namlen, loff_t offset, u64 ino, unsigned int); From 8a2cf9f5709cc20a1114a7d22655928314fc86f8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Nov 2020 14:30:13 -0500 Subject: [PATCH 32/70] NFSD: Remove unused NFSv2 directory entry encoders Clean up. Signed-off-by: Chuck Lever --- fs/nfsd/nfsproc.c | 2 +- fs/nfsd/nfsxdr.c | 51 +++-------------------------------------------- fs/nfsd/xdr.h | 10 ++-------- 3 files changed, 6 insertions(+), 57 deletions(-) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 86d438fbd576..c2cd2984e41d 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -602,7 +602,7 @@ nfsd_proc_readdir(struct svc_rqst *rqstp) resp->cookie_offset = 0; offset = argp->cookie; resp->status = nfsd_readdir(rqstp, &argp->fh, &offset, - &resp->common, nfs2svc_encode_entry); + &resp->common, nfssvc_encode_entry); nfssvc_encode_nfscookie(resp, offset); fh_put(&argp->fh); diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 1102d40ded03..5df6f00d76fd 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -663,7 +663,7 @@ svcxdr_encode_entry_common(struct nfsd_readdirres *resp, const char *name, } /** - * nfs2svc_encode_entry - encode one NFSv2 READDIR entry + * nfssvc_encode_entry - encode one NFSv2 READDIR entry * @data: directory context * @name: name of the object to be encoded * @namlen: length of that name, in bytes @@ -680,8 +680,8 @@ svcxdr_encode_entry_common(struct nfsd_readdirres *resp, const char *name, * - resp->common.err * - resp->cookie_offset */ -int nfs2svc_encode_entry(void *data, const char *name, int namlen, - loff_t offset, u64 ino, unsigned int d_type) +int nfssvc_encode_entry(void *data, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) { struct readdir_cd *ccd = data; struct nfsd_readdirres *resp = container_of(ccd, @@ -706,51 +706,6 @@ out_toosmall: return -EINVAL; } -int -nfssvc_encode_entry(void *ccdv, const char *name, - int namlen, loff_t offset, u64 ino, unsigned int d_type) -{ - struct readdir_cd *ccd = ccdv; - struct nfsd_readdirres *cd = container_of(ccd, struct nfsd_readdirres, common); - __be32 *p = cd->buffer; - int buflen, slen; - - /* - dprintk("nfsd: entry(%.*s off %ld ino %ld)\n", - namlen, name, offset, ino); - */ - - if (offset > ~((u32) 0)) { - cd->common.err = nfserr_fbig; - return -EINVAL; - } - nfssvc_encode_nfscookie(cd, offset); - - /* truncate filename */ - namlen = min(namlen, NFS2_MAXNAMLEN); - slen = XDR_QUADLEN(namlen); - - if ((buflen = cd->buflen - slen - 4) < 0) { - cd->common.err = nfserr_toosmall; - return -EINVAL; - } - if (ino > ~((u32) 0)) { - cd->common.err = nfserr_fbig; - return -EINVAL; - } - *p++ = xdr_one; /* mark entry present */ - *p++ = htonl((u32) ino); /* file id */ - p = xdr_encode_array(p, name, namlen);/* name length & name */ - cd->offset = p; /* remember pointer */ - *p++ = htonl(~0U); /* offset of next entry */ - - cd->count += p - cd->buffer; - cd->buflen = buflen; - cd->buffer = p; - cd->common.err = nfs_ok; - return 0; -} - /* * XDR release functions */ diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index 082f262a1bf9..bfffcb70a5f8 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -115,10 +115,6 @@ struct nfsd_readdirres { struct xdr_stream xdr; struct xdr_buf dirlist; struct readdir_cd common; - __be32 * buffer; - int buflen; - __be32 * offset; - struct page *page; unsigned int cookie_offset; }; @@ -164,10 +160,8 @@ int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *); int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *); void nfssvc_encode_nfscookie(struct nfsd_readdirres *resp, u32 offset); -int nfs2svc_encode_entry(void *data, const char *name, int namlen, - loff_t offset, u64 ino, unsigned int d_type); -int nfssvc_encode_entry(void *, const char *name, - int namlen, loff_t offset, u64 ino, unsigned int); +int nfssvc_encode_entry(void *data, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type); void nfssvc_release_attrstat(struct svc_rqst *rqstp); void nfssvc_release_diropres(struct svc_rqst *rqstp); From 8edc0648880a151026fe625fa1b76772b5766f68 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Nov 2020 14:55:05 -0500 Subject: [PATCH 33/70] NFSD: Add an xdr_stream-based encoder for NFSv2/3 ACLs Signed-off-by: Chuck Lever --- fs/nfs_common/nfsacl.c | 71 ++++++++++++++++++++++++++++++++++++++++++ include/linux/nfsacl.h | 3 ++ 2 files changed, 74 insertions(+) diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c index 79c563c1a5e8..5a5bd85d08f8 100644 --- a/fs/nfs_common/nfsacl.c +++ b/fs/nfs_common/nfsacl.c @@ -136,6 +136,77 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, } EXPORT_SYMBOL_GPL(nfsacl_encode); +/** + * nfs_stream_encode_acl - Encode an NFSv3 ACL + * + * @xdr: an xdr_stream positioned to receive an encoded ACL + * @inode: inode of file whose ACL this is + * @acl: posix_acl to encode + * @encode_entries: whether to encode ACEs as well + * @typeflag: ACL type: NFS_ACL_DEFAULT or zero + * + * Return values: + * %false: The ACL could not be encoded + * %true: @xdr is advanced to the next available position + */ +bool nfs_stream_encode_acl(struct xdr_stream *xdr, struct inode *inode, + struct posix_acl *acl, int encode_entries, + int typeflag) +{ + const size_t elem_size = XDR_UNIT * 3; + u32 entries = (acl && acl->a_count) ? max_t(int, acl->a_count, 4) : 0; + struct nfsacl_encode_desc nfsacl_desc = { + .desc = { + .elem_size = elem_size, + .array_len = encode_entries ? entries : 0, + .xcode = xdr_nfsace_encode, + }, + .acl = acl, + .typeflag = typeflag, + .uid = inode->i_uid, + .gid = inode->i_gid, + }; + struct nfsacl_simple_acl aclbuf; + unsigned int base; + int err; + + if (entries > NFS_ACL_MAX_ENTRIES) + return false; + if (xdr_stream_encode_u32(xdr, entries) < 0) + return false; + + if (encode_entries && acl && acl->a_count == 3) { + struct posix_acl *acl2 = &aclbuf.acl; + + /* Avoid the use of posix_acl_alloc(). nfsacl_encode() is + * invoked in contexts where a memory allocation failure is + * fatal. Fortunately this fake ACL is small enough to + * construct on the stack. */ + posix_acl_init(acl2, 4); + + /* Insert entries in canonical order: other orders seem + to confuse Solaris VxFS. */ + acl2->a_entries[0] = acl->a_entries[0]; /* ACL_USER_OBJ */ + acl2->a_entries[1] = acl->a_entries[1]; /* ACL_GROUP_OBJ */ + acl2->a_entries[2] = acl->a_entries[1]; /* ACL_MASK */ + acl2->a_entries[2].e_tag = ACL_MASK; + acl2->a_entries[3] = acl->a_entries[2]; /* ACL_OTHER */ + nfsacl_desc.acl = acl2; + } + + base = xdr_stream_pos(xdr); + if (!xdr_reserve_space(xdr, XDR_UNIT + + elem_size * nfsacl_desc.desc.array_len)) + return false; + err = xdr_encode_array2(xdr->buf, base, &nfsacl_desc.desc); + if (err) + return false; + + return true; +} +EXPORT_SYMBOL_GPL(nfs_stream_encode_acl); + + struct nfsacl_decode_desc { struct xdr_array2_desc desc; unsigned int count; diff --git a/include/linux/nfsacl.h b/include/linux/nfsacl.h index 0ba99c513649..8e76a79cdc6a 100644 --- a/include/linux/nfsacl.h +++ b/include/linux/nfsacl.h @@ -41,5 +41,8 @@ nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, extern bool nfs_stream_decode_acl(struct xdr_stream *xdr, unsigned int *aclcnt, struct posix_acl **pacl); +extern bool +nfs_stream_encode_acl(struct xdr_stream *xdr, struct inode *inode, + struct posix_acl *acl, int encode_entries, int typeflag); #endif /* __LINUX_NFSACL_H */ From f8cba47344f794b54373189bec23195b51020faf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Nov 2020 14:38:47 -0500 Subject: [PATCH 34/70] NFSD: Update the NFSv2 GETACL result encoder to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfs2acl.c | 44 +++++++++++++++++--------------------------- fs/nfsd/nfsxdr.c | 24 ++++++++++++++++++++++-- fs/nfsd/xdr.h | 3 +++ 3 files changed, 42 insertions(+), 29 deletions(-) diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 855e17772eba..4a15ae6fdbc2 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -242,51 +242,41 @@ static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p) /* GETACL */ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_getaclres *resp = rqstp->rq_resp; struct dentry *dentry = resp->fh.fh_dentry; struct inode *inode; - struct kvec *head = rqstp->rq_res.head; - unsigned int base; - int n; int w; - *p++ = resp->status; - if (resp->status != nfs_ok) - return xdr_ressize_check(rqstp, p); - - /* - * Since this is version 2, the check for nfserr in - * nfsd_dispatch actually ensures the following cannot happen. - * However, it seems fragile to depend on that. - */ - if (dentry == NULL || d_really_is_negative(dentry)) + if (!svcxdr_encode_stat(xdr, resp->status)) return 0; + + if (dentry == NULL || d_really_is_negative(dentry)) + return 1; inode = d_inode(dentry); - p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat); - *p++ = htonl(resp->mask); - if (!xdr_ressize_check(rqstp, p)) + if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) + return 0; + if (xdr_stream_encode_u32(xdr, resp->mask) < 0) return 0; - base = (char *)p - (char *)head->iov_base; rqstp->rq_res.page_len = w = nfsacl_size( (resp->mask & NFS_ACL) ? resp->acl_access : NULL, (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); while (w > 0) { if (!*(rqstp->rq_next_page++)) - return 0; + return 1; w -= PAGE_SIZE; } - n = nfsacl_encode(&rqstp->rq_res, base, inode, - resp->acl_access, - resp->mask & NFS_ACL, 0); - if (n > 0) - n = nfsacl_encode(&rqstp->rq_res, base + n, inode, - resp->acl_default, - resp->mask & NFS_DFACL, - NFS_ACL_DEFAULT); - return (n > 0); + if (!nfs_stream_encode_acl(xdr, inode, resp->acl_access, + resp->mask & NFS_ACL, 0)) + return 0; + if (!nfs_stream_encode_acl(xdr, inode, resp->acl_default, + resp->mask & NFS_DFACL, NFS_ACL_DEFAULT)) + return 0; + + return 1; } static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p) diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 5df6f00d76fd..1fed3a8deb18 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -26,7 +26,16 @@ static const u32 nfs_ftypes[] = { * Basic NFSv2 data types (RFC 1094 Section 2.3) */ -static bool +/** + * svcxdr_encode_stat - Encode an NFSv2 status code + * @xdr: XDR stream + * @status: status value to encode + * + * Return values: + * %false: Send buffer space was exhausted + * %true: Success + */ +bool svcxdr_encode_stat(struct xdr_stream *xdr, __be32 status) { __be32 *p; @@ -250,7 +259,18 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, return p; } -static bool +/** + * svcxdr_encode_fattr - Encode NFSv2 file attributes + * @rqstp: Context of a completed RPC transaction + * @xdr: XDR stream + * @fhp: File handle to encode + * @stat: Attributes to encode + * + * Return values: + * %false: Send buffer space was exhausted + * %true: Success + */ +bool svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct svc_fh *fhp, const struct kstat *stat) { diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index bfffcb70a5f8..ad7f7eabf41a 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -170,5 +170,8 @@ void nfssvc_release_readres(struct svc_rqst *rqstp); /* Helper functions for NFSv2 ACL code */ __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat); bool svcxdr_decode_fhandle(struct xdr_stream *xdr, struct svc_fh *fhp); +bool svcxdr_encode_stat(struct xdr_stream *xdr, __be32 status); +bool svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr, + const struct svc_fh *fhp, const struct kstat *stat); #endif /* LINUX_NFSD_H */ From 778f068fa0c0846b650ebdb8795fd51b5badc332 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Nov 2020 14:47:56 -0500 Subject: [PATCH 35/70] NFSD: Update the NFSv2 SETACL result encoder to use struct xdr_stream The SETACL result encoder is exactly the same as the NFSv2 attrstatres decoder. Signed-off-by: Chuck Lever --- fs/nfsd/nfs2acl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 4a15ae6fdbc2..9c572ffa5e7b 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -365,8 +365,8 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = { [ACLPROC2_SETACL] = { .pc_func = nfsacld_proc_setacl, .pc_decode = nfsaclsvc_decode_setaclargs, - .pc_encode = nfsaclsvc_encode_attrstatres, - .pc_release = nfsaclsvc_release_attrstat, + .pc_encode = nfssvc_encode_attrstatres, + .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd3_setaclargs), .pc_ressize = sizeof(struct nfsd_attrstat), .pc_cachetype = RC_NOCACHE, From 8d2009a10b3abaa12a39deb4876b215714993fe8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Nov 2020 14:49:57 -0500 Subject: [PATCH 36/70] NFSD: Update the NFSv2 ACL GETATTR result encoder to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfs2acl.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 9c572ffa5e7b..9270530a0c2f 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -279,19 +279,6 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) return 1; } -static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p) -{ - struct nfsd_attrstat *resp = rqstp->rq_resp; - - *p++ = resp->status; - if (resp->status != nfs_ok) - goto out; - - p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat); -out: - return xdr_ressize_check(rqstp, p); -} - /* ACCESS */ static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p) { @@ -319,13 +306,6 @@ static void nfsaclsvc_release_getacl(struct svc_rqst *rqstp) posix_acl_release(resp->acl_default); } -static void nfsaclsvc_release_attrstat(struct svc_rqst *rqstp) -{ - struct nfsd_attrstat *resp = rqstp->rq_resp; - - fh_put(&resp->fh); -} - static void nfsaclsvc_release_access(struct svc_rqst *rqstp) { struct nfsd3_accessres *resp = rqstp->rq_resp; @@ -376,8 +356,8 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = { [ACLPROC2_GETATTR] = { .pc_func = nfsacld_proc_getattr, .pc_decode = nfssvc_decode_fhandleargs, - .pc_encode = nfsaclsvc_encode_attrstatres, - .pc_release = nfsaclsvc_release_attrstat, + .pc_encode = nfssvc_encode_attrstatres, + .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd_attrstat), .pc_cachetype = RC_NOCACHE, From 07f5c2963c04b11603e9667f89bb430c132e9cc1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Nov 2020 14:52:09 -0500 Subject: [PATCH 37/70] NFSD: Update the NFSv2 ACL ACCESS result encoder to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfs2acl.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 9270530a0c2f..4b43929c1f25 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -282,16 +282,21 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) /* ACCESS */ static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_accessres *resp = rqstp->rq_resp; - *p++ = resp->status; - if (resp->status != nfs_ok) - goto out; + if (!svcxdr_encode_stat(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) + return 0; + if (xdr_stream_encode_u32(xdr, resp->access) < 0) + return 0; + break; + } - p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat); - *p++ = htonl(resp->access); -out: - return xdr_ressize_check(rqstp, p); + return 1; } /* From 83d0b84572775a29f800de67a1b9b642a5376bc3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Nov 2020 14:31:42 -0500 Subject: [PATCH 38/70] NFSD: Clean up after updating NFSv2 ACL encoders Signed-off-by: Chuck Lever --- fs/nfsd/nfsxdr.c | 64 ------------------------------------------------ fs/nfsd/xdr.h | 1 - 2 files changed, 65 deletions(-) diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 1fed3a8deb18..b800cfefcab7 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -201,64 +201,6 @@ svcxdr_decode_sattr(struct svc_rqst *rqstp, struct xdr_stream *xdr, return true; } -static __be32 * -encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, - struct kstat *stat) -{ - struct user_namespace *userns = nfsd_user_namespace(rqstp); - struct dentry *dentry = fhp->fh_dentry; - int type; - struct timespec64 time; - u32 f; - - type = (stat->mode & S_IFMT); - - *p++ = htonl(nfs_ftypes[type >> 12]); - *p++ = htonl((u32) stat->mode); - *p++ = htonl((u32) stat->nlink); - *p++ = htonl((u32) from_kuid_munged(userns, stat->uid)); - *p++ = htonl((u32) from_kgid_munged(userns, stat->gid)); - - if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) { - *p++ = htonl(NFS_MAXPATHLEN); - } else { - *p++ = htonl((u32) stat->size); - } - *p++ = htonl((u32) stat->blksize); - if (S_ISCHR(type) || S_ISBLK(type)) - *p++ = htonl(new_encode_dev(stat->rdev)); - else - *p++ = htonl(0xffffffff); - *p++ = htonl((u32) stat->blocks); - switch (fsid_source(fhp)) { - default: - case FSIDSOURCE_DEV: - *p++ = htonl(new_encode_dev(stat->dev)); - break; - case FSIDSOURCE_FSID: - *p++ = htonl((u32) fhp->fh_export->ex_fsid); - break; - case FSIDSOURCE_UUID: - f = ((u32*)fhp->fh_export->ex_uuid)[0]; - f ^= ((u32*)fhp->fh_export->ex_uuid)[1]; - f ^= ((u32*)fhp->fh_export->ex_uuid)[2]; - f ^= ((u32*)fhp->fh_export->ex_uuid)[3]; - *p++ = htonl(f); - break; - } - *p++ = htonl((u32) stat->ino); - *p++ = htonl((u32) stat->atime.tv_sec); - *p++ = htonl(stat->atime.tv_nsec ? stat->atime.tv_nsec / 1000 : 0); - time = stat->mtime; - lease_get_mtime(d_inode(dentry), &time); - *p++ = htonl((u32) time.tv_sec); - *p++ = htonl(time.tv_nsec ? time.tv_nsec / 1000 : 0); - *p++ = htonl((u32) stat->ctime.tv_sec); - *p++ = htonl(stat->ctime.tv_nsec ? stat->ctime.tv_nsec / 1000 : 0); - - return p; -} - /** * svcxdr_encode_fattr - Encode NFSv2 file attributes * @rqstp: Context of a completed RPC transaction @@ -328,12 +270,6 @@ svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr, return true; } -/* Helper function for NFSv2 ACL code */ -__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat) -{ - return encode_fattr(rqstp, p, fhp, stat); -} - /* * XDR decode functions */ diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index ad7f7eabf41a..f45b4bc93f52 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -168,7 +168,6 @@ void nfssvc_release_diropres(struct svc_rqst *rqstp); void nfssvc_release_readres(struct svc_rqst *rqstp); /* Helper functions for NFSv2 ACL code */ -__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat); bool svcxdr_decode_fhandle(struct xdr_stream *xdr, struct svc_fh *fhp); bool svcxdr_encode_stat(struct xdr_stream *xdr, __be32 status); bool svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr, From 20798dfe249a01ad1b12eec7dbc572db5003244a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Nov 2020 16:11:42 -0500 Subject: [PATCH 39/70] NFSD: Update the NFSv3 GETACL result encoder to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfs3acl.c | 33 +++++++++++++++++++-------------- fs/nfsd/nfs3xdr.c | 23 +++++++++++++++++++++-- fs/nfsd/xdr3.h | 3 +++ 3 files changed, 43 insertions(+), 16 deletions(-) diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 9a6f18d74d14..11991026ab3a 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -168,22 +168,25 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p) /* GETACL */ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_getaclres *resp = rqstp->rq_resp; struct dentry *dentry = resp->fh.fh_dentry; + struct kvec *head = rqstp->rq_res.head; + struct inode *inode = d_inode(dentry); + unsigned int base; + int n; + int w; - *p++ = resp->status; - p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh); - if (resp->status == 0 && dentry && d_really_is_positive(dentry)) { - struct inode *inode = d_inode(dentry); - struct kvec *head = rqstp->rq_res.head; - unsigned int base; - int n; - int w; - - *p++ = htonl(resp->mask); - if (!xdr_ressize_check(rqstp, p)) + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) return 0; - base = (char *)p - (char *)head->iov_base; + if (xdr_stream_encode_u32(xdr, resp->mask) < 0) + return 0; + + base = (char *)xdr->p - (char *)head->iov_base; rqstp->rq_res.page_len = w = nfsacl_size( (resp->mask & NFS_ACL) ? resp->acl_access : NULL, @@ -204,9 +207,11 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) NFS_ACL_DEFAULT); if (n <= 0) return 0; - } else - if (!xdr_ressize_check(rqstp, p)) + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) return 0; + } return 1; } diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 646bbfc5b779..941740a97f8f 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -107,7 +107,16 @@ svcxdr_decode_nfs_fh3(struct xdr_stream *xdr, struct svc_fh *fhp) return true; } -static bool +/** + * svcxdr_encode_nfsstat3 - Encode an NFSv3 status code + * @xdr: XDR stream + * @status: status value to encode + * + * Return values: + * %false: Send buffer space was exhausted + * %true: Success + */ +bool svcxdr_encode_nfsstat3(struct xdr_stream *xdr, __be32 status) { __be32 *p; @@ -464,7 +473,17 @@ svcxdr_encode_pre_op_attr(struct xdr_stream *xdr, const struct svc_fh *fhp) return svcxdr_encode_wcc_attr(xdr, fhp); } -static bool +/** + * svcxdr_encode_post_op_attr - Encode NFSv3 post-op attributes + * @rqstp: Context of a completed RPC transaction + * @xdr: XDR stream + * @fhp: File handle to encode + * + * Return values: + * %false: Send buffer space was exhausted + * %true: Success + */ +bool svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct svc_fh *fhp) { diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index b851458373db..746c5f79964f 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -308,5 +308,8 @@ int nfs3svc_encode_entryplus3(void *data, const char *name, int namlen, __be32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp); bool svcxdr_decode_nfs_fh3(struct xdr_stream *xdr, struct svc_fh *fhp); +bool svcxdr_encode_nfsstat3(struct xdr_stream *xdr, __be32 status); +bool svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr, + const struct svc_fh *fhp); #endif /* _LINUX_NFSD_XDR3_H */ From 15e432bf0cfd1e6aebfa9ffd4e0cc2ff4f3ae2db Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Nov 2020 16:21:24 -0500 Subject: [PATCH 40/70] NFSD: Update the NFSv3 SETACL result encoder to use struct xdr_stream Signed-off-by: Chuck Lever --- fs/nfsd/nfs3acl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 11991026ab3a..a1591feeea22 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -219,11 +219,11 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) /* SETACL */ static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_attrstat *resp = rqstp->rq_resp; - *p++ = resp->status; - p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh); - return xdr_ressize_check(rqstp, p); + return svcxdr_encode_nfsstat3(xdr, resp->status) && + svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh); } /* From 1416f435303d81070c6bcf5a4a9b4ed0f7a9f013 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Nov 2020 15:09:16 -0500 Subject: [PATCH 41/70] NFSD: Clean up after updating NFSv3 ACL encoders Signed-off-by: Chuck Lever --- fs/nfsd/nfs3xdr.c | 86 ----------------------------------------------- fs/nfsd/xdr3.h | 2 -- 2 files changed, 88 deletions(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 941740a97f8f..fcfa0d611b93 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -48,13 +48,6 @@ static const u32 nfs3_ftypes[] = { * Basic NFSv3 data types (RFC 1813 Sections 2.5 and 2.6) */ -static __be32 * -encode_time3(__be32 *p, struct timespec64 *time) -{ - *p++ = htonl((u32) time->tv_sec); *p++ = htonl(time->tv_nsec); - return p; -} - static __be32 * encode_nfstime3(__be32 *p, const struct timespec64 *time) { @@ -396,54 +389,6 @@ svcxdr_encode_fattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr, return true; } -static __be32 *encode_fsid(__be32 *p, struct svc_fh *fhp) -{ - u64 f; - switch(fsid_source(fhp)) { - default: - case FSIDSOURCE_DEV: - p = xdr_encode_hyper(p, (u64)huge_encode_dev - (fhp->fh_dentry->d_sb->s_dev)); - break; - case FSIDSOURCE_FSID: - p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid); - break; - case FSIDSOURCE_UUID: - f = ((u64*)fhp->fh_export->ex_uuid)[0]; - f ^= ((u64*)fhp->fh_export->ex_uuid)[1]; - p = xdr_encode_hyper(p, f); - break; - } - return p; -} - -static __be32 * -encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, - struct kstat *stat) -{ - struct user_namespace *userns = nfsd_user_namespace(rqstp); - *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); - *p++ = htonl((u32) (stat->mode & S_IALLUGO)); - *p++ = htonl((u32) stat->nlink); - *p++ = htonl((u32) from_kuid_munged(userns, stat->uid)); - *p++ = htonl((u32) from_kgid_munged(userns, stat->gid)); - if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) { - p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN); - } else { - p = xdr_encode_hyper(p, (u64) stat->size); - } - p = xdr_encode_hyper(p, ((u64)stat->blocks) << 9); - *p++ = htonl((u32) MAJOR(stat->rdev)); - *p++ = htonl((u32) MINOR(stat->rdev)); - p = encode_fsid(p, fhp); - p = xdr_encode_hyper(p, stat->ino); - p = encode_time3(p, &stat->atime); - p = encode_time3(p, &stat->mtime); - p = encode_time3(p, &stat->ctime); - - return p; -} - static bool svcxdr_encode_wcc_attr(struct xdr_stream *xdr, const struct svc_fh *fhp) { @@ -512,37 +457,6 @@ no_post_op_attrs: return xdr_stream_encode_item_absent(xdr) > 0; } -/* - * Encode post-operation attributes. - * The inode may be NULL if the call failed because of a stale file - * handle. In this case, no attributes are returned. - */ -static __be32 * -encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) -{ - struct dentry *dentry = fhp->fh_dentry; - if (!fhp->fh_no_wcc && dentry && d_really_is_positive(dentry)) { - __be32 err; - struct kstat stat; - - err = fh_getattr(fhp, &stat); - if (!err) { - *p++ = xdr_one; /* attributes follow */ - lease_get_mtime(d_inode(dentry), &stat.mtime); - return encode_fattr3(rqstp, p, fhp, &stat); - } - } - *p++ = xdr_zero; - return p; -} - -/* Helper for NFSv3 ACLs */ -__be32 * -nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) -{ - return encode_post_op_attr(rqstp, p, fhp); -} - /* * Encode weak cache consistency data */ diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 746c5f79964f..933008382bbe 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -305,8 +305,6 @@ int nfs3svc_encode_entry3(void *data, const char *name, int namlen, int nfs3svc_encode_entryplus3(void *data, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type); /* Helper functions for NFSv3 ACL code */ -__be32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, - struct svc_fh *fhp); bool svcxdr_decode_nfs_fh3(struct xdr_stream *xdr, struct svc_fh *fhp); bool svcxdr_encode_nfsstat3(struct xdr_stream *xdr, __be32 status); bool svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr, From 6019ce0742ca55d3e45279a19b07d1542747a098 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Mar 2021 13:57:40 -0500 Subject: [PATCH 42/70] NFSD: Add a tracepoint to record directory entry encoding Enable watching the progress of directory encoding to capture the timing of any issues with reading or encoding a directory. The new tracepoint captures dirent encoding for all NFS versions. For example, here's what a few NFSv4 directory entries might look like: nfsd-989 [002] 468.596265: nfsd_dirent: fh_hash=0x5d162594 ino=2 name=. nfsd-989 [002] 468.596267: nfsd_dirent: fh_hash=0x5d162594 ino=1 name=.. nfsd-989 [002] 468.596299: nfsd_dirent: fh_hash=0x5d162594 ino=3827 name=zlib.c nfsd-989 [002] 468.596325: nfsd_dirent: fh_hash=0x5d162594 ino=3811 name=xdiff nfsd-989 [002] 468.596351: nfsd_dirent: fh_hash=0x5d162594 ino=3810 name=xdiff-interface.h nfsd-989 [002] 468.596377: nfsd_dirent: fh_hash=0x5d162594 ino=3809 name=xdiff-interface.c Signed-off-by: Chuck Lever --- fs/nfsd/trace.h | 24 ++++++++++++++++++++++++ fs/nfsd/vfs.c | 9 ++++++--- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 92a0973dd671..27a93ebd1d80 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -391,6 +391,30 @@ DEFINE_EVENT(nfsd_err_class, nfsd_##name, \ DEFINE_NFSD_ERR_EVENT(read_err); DEFINE_NFSD_ERR_EVENT(write_err); +TRACE_EVENT(nfsd_dirent, + TP_PROTO(struct svc_fh *fhp, + u64 ino, + const char *name, + int namlen), + TP_ARGS(fhp, ino, name, namlen), + TP_STRUCT__entry( + __field(u32, fh_hash) + __field(u64, ino) + __field(int, len) + __dynamic_array(unsigned char, name, namlen) + ), + TP_fast_assign( + __entry->fh_hash = fhp ? knfsd_fh_hash(&fhp->fh_handle) : 0; + __entry->ino = ino; + __entry->len = namlen; + memcpy(__get_str(name), name, namlen); + __assign_str(name, name); + ), + TP_printk("fh_hash=0x%08x ino=%llu name=%.*s", + __entry->fh_hash, __entry->ino, + __entry->len, __get_str(name)) +) + #include "state.h" #include "filecache.h" #include "vfs.h" diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index fd6be35a1642..15adf1f6ab21 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1968,8 +1968,9 @@ static int nfsd_buffered_filldir(struct dir_context *ctx, const char *name, return 0; } -static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func, - struct readdir_cd *cdp, loff_t *offsetp) +static __be32 nfsd_buffered_readdir(struct file *file, struct svc_fh *fhp, + nfsd_filldir_t func, struct readdir_cd *cdp, + loff_t *offsetp) { struct buffered_dirent *de; int host_err; @@ -2015,6 +2016,8 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func, if (cdp->err != nfs_ok) break; + trace_nfsd_dirent(fhp, de->ino, de->name, de->namlen); + reclen = ALIGN(sizeof(*de) + de->namlen, sizeof(u64)); size -= reclen; @@ -2062,7 +2065,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, goto out_close; } - err = nfsd_buffered_readdir(file, func, cdp, offsetp); + err = nfsd_buffered_readdir(file, fhp, func, cdp, offsetp); if (err == nfserr_eof || err == nfserr_toosmall) err = nfs_ok; /* can still be found in ->err */ From 219a170502b3d597c52eeec088aee8fbf7b90da5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 5 Mar 2021 14:22:32 -0500 Subject: [PATCH 43/70] NFSD: Clean up NFSDDBG_FACILITY macro These are no longer needed because there are no dprintk() call sites in these files. Signed-off-by: Chuck Lever --- fs/nfsd/nfs3xdr.c | 3 --- fs/nfsd/nfsxdr.c | 2 -- 2 files changed, 5 deletions(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index fcfa0d611b93..0a5ebc52e6a9 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -14,9 +14,6 @@ #include "netns.h" #include "vfs.h" -#define NFSDDBG_FACILITY NFSDDBG_XDR - - /* * Force construction of an empty post-op attr */ diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index b800cfefcab7..a06c05fe3b42 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -9,8 +9,6 @@ #include "xdr.h" #include "auth.h" -#define NFSDDBG_FACILITY NFSDDBG_XDR - /* * Mapping of S_IF* types to NFS file types */ From 7f7e7a4006f74b031718055a0751c70c2e3d5e7e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 2 Mar 2021 10:46:23 -0500 Subject: [PATCH 44/70] nfsd: helper for laundromat expiry calculations We do this same logic repeatedly, and it's easy to get the sense of the comparison wrong. Signed-off-by: J. Bruce Fields Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 49 +++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3290d0a7bb95..1aa7d4ee7c15 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5338,6 +5338,22 @@ static bool clients_still_reclaiming(struct nfsd_net *nn) return true; } +struct laundry_time { + time64_t cutoff; + time64_t new_timeo; +}; + +static bool state_expired(struct laundry_time *lt, time64_t last_refresh) +{ + time64_t time_remaining; + + if (last_refresh < lt->cutoff) + return true; + time_remaining = last_refresh - lt->cutoff; + lt->new_timeo = min(lt->new_timeo, time_remaining); + return false; +} + static time64_t nfs4_laundromat(struct nfsd_net *nn) { @@ -5347,14 +5363,16 @@ nfs4_laundromat(struct nfsd_net *nn) struct nfs4_ol_stateid *stp; struct nfsd4_blocked_lock *nbl; struct list_head *pos, *next, reaplist; - time64_t cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease; - time64_t t, new_timeo = nn->nfsd4_lease; + struct laundry_time lt = { + .cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease, + .new_timeo = nn->nfsd4_lease + }; struct nfs4_cpntf_state *cps; copy_stateid_t *cps_t; int i; if (clients_still_reclaiming(nn)) { - new_timeo = 0; + lt.new_timeo = 0; goto out; } nfsd4_end_grace(nn); @@ -5364,7 +5382,7 @@ nfs4_laundromat(struct nfsd_net *nn) idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) { cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid); if (cps->cp_stateid.sc_type == NFS4_COPYNOTIFY_STID && - cps->cpntf_time < cutoff) + state_expired(<, cps->cpntf_time)) _free_cpntf_state_locked(nn, cps); } spin_unlock(&nn->s2s_cp_lock); @@ -5372,11 +5390,8 @@ nfs4_laundromat(struct nfsd_net *nn) spin_lock(&nn->client_lock); list_for_each_safe(pos, next, &nn->client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); - if (clp->cl_time > cutoff) { - t = clp->cl_time - cutoff; - new_timeo = min(new_timeo, t); + if (!state_expired(<, clp->cl_time)) break; - } if (mark_client_expired_locked(clp)) { trace_nfsd_clid_expired(&clp->cl_clientid); continue; @@ -5393,11 +5408,8 @@ nfs4_laundromat(struct nfsd_net *nn) spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - if (dp->dl_time > cutoff) { - t = dp->dl_time - cutoff; - new_timeo = min(new_timeo, t); + if (!state_expired(<, dp->dl_time)) break; - } WARN_ON(!unhash_delegation_locked(dp)); list_add(&dp->dl_recall_lru, &reaplist); } @@ -5413,11 +5425,8 @@ nfs4_laundromat(struct nfsd_net *nn) while (!list_empty(&nn->close_lru)) { oo = list_first_entry(&nn->close_lru, struct nfs4_openowner, oo_close_lru); - if (oo->oo_time > cutoff) { - t = oo->oo_time - cutoff; - new_timeo = min(new_timeo, t); + if (!state_expired(<, oo->oo_time)) break; - } list_del_init(&oo->oo_close_lru); stp = oo->oo_last_closed_stid; oo->oo_last_closed_stid = NULL; @@ -5443,11 +5452,8 @@ nfs4_laundromat(struct nfsd_net *nn) while (!list_empty(&nn->blocked_locks_lru)) { nbl = list_first_entry(&nn->blocked_locks_lru, struct nfsd4_blocked_lock, nbl_lru); - if (nbl->nbl_time > cutoff) { - t = nbl->nbl_time - cutoff; - new_timeo = min(new_timeo, t); + if (!state_expired(<, nbl->nbl_time)) break; - } list_move(&nbl->nbl_lru, &reaplist); list_del_init(&nbl->nbl_list); } @@ -5460,8 +5466,7 @@ nfs4_laundromat(struct nfsd_net *nn) free_blocked_lock(nbl); } out: - new_timeo = max_t(time64_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); - return new_timeo; + return max_t(time64_t, lt.new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); } static struct workqueue_struct *laundry_wq; From f988a7b71d1e66e63f79cd59c763875347943a7a Mon Sep 17 00:00:00 2001 From: Paul Menzel Date: Fri, 12 Mar 2021 22:03:00 +0100 Subject: [PATCH 45/70] nfsd: Log client tracking type log message as info instead of warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `printk()`, by default, uses the log level warning, which leaves the user reading NFSD: Using UMH upcall client tracking operations. wondering what to do about it (`dmesg --level=warn`). Several client tracking methods are tried, and expected to fail. That’s why a message is printed only on success. It might be interesting for users to know the chosen method, so use info-level instead of debug-level. Cc: linux-nfs@vger.kernel.org Signed-off-by: Paul Menzel Signed-off-by: Chuck Lever --- fs/nfsd/nfs4recover.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 891395c6c7d3..6fedc49726bf 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -626,7 +626,7 @@ nfsd4_legacy_tracking_init(struct net *net) status = nfsd4_load_reboot_recovery_data(net); if (status) goto err; - printk("NFSD: Using legacy client tracking operations.\n"); + pr_info("NFSD: Using legacy client tracking operations.\n"); return 0; err: @@ -1028,7 +1028,7 @@ nfsd4_init_cld_pipe(struct net *net) status = __nfsd4_init_cld_pipe(net); if (!status) - printk("NFSD: Using old nfsdcld client tracking operations.\n"); + pr_info("NFSD: Using old nfsdcld client tracking operations.\n"); return status; } @@ -1605,7 +1605,7 @@ nfsd4_cld_tracking_init(struct net *net) nfs4_release_reclaim(nn); goto err_remove; } else - printk("NFSD: Using nfsdcld client tracking operations.\n"); + pr_info("NFSD: Using nfsdcld client tracking operations.\n"); return 0; err_remove: @@ -1864,7 +1864,7 @@ nfsd4_umh_cltrack_init(struct net *net) ret = nfsd4_umh_cltrack_upcall("init", NULL, grace_start, NULL); kfree(grace_start); if (!ret) - printk("NFSD: Using UMH upcall client tracking operations.\n"); + pr_info("NFSD: Using UMH upcall client tracking operations.\n"); return ret; } From c6c7f2a84da459bcc3714044e74a9cb66de31039 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 13 Mar 2021 16:08:47 -0500 Subject: [PATCH 46/70] nfsd: Ensure knfsd shuts down when the "nfsd" pseudofs is unmounted In order to ensure that knfsd threads don't linger once the nfsd pseudofs is unmounted (e.g. when the container is killed) we let nfsd_umount() shut down those threads and wait for them to exit. This also should ensure that we don't need to do a kernel mount of the pseudofs, since the thread lifetime is now limited by the lifetime of the filesystem. Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- fs/nfsd/netns.h | 6 +++--- fs/nfsd/nfs4state.c | 8 +------- fs/nfsd/nfsctl.c | 14 ++------------ fs/nfsd/nfsd.h | 3 +-- fs/nfsd/nfssvc.c | 35 ++++++++++++++++++++++++++++++++++- 5 files changed, 41 insertions(+), 25 deletions(-) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index c330f5bd0cf3..a75abeb1e698 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -51,9 +51,6 @@ struct nfsd_net { bool grace_ended; time64_t boot_time; - /* internal mount of the "nfsd" pseudofilesystem: */ - struct vfsmount *nfsd_mnt; - struct dentry *nfsd_client_dir; /* @@ -130,6 +127,9 @@ struct nfsd_net { wait_queue_head_t ntf_wq; atomic_t ntf_refcnt; + /* Allow umount to wait for nfsd state cleanup */ + struct completion nfsd_shutdown_complete; + /* * clientid and stateid data for construction of net unique COPY * stateids. diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1aa7d4ee7c15..e011b407d007 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -7326,14 +7326,9 @@ nfs4_state_start_net(struct net *net) struct nfsd_net *nn = net_generic(net, nfsd_net_id); int ret; - ret = get_nfsdfs(net); + ret = nfs4_state_create_net(net); if (ret) return ret; - ret = nfs4_state_create_net(net); - if (ret) { - mntput(nn->nfsd_mnt); - return ret; - } locks_start_grace(net, &nn->nfsd4_manager); nfsd4_client_tracking_init(net); if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0) @@ -7403,7 +7398,6 @@ nfs4_state_shutdown_net(struct net *net) nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); - mntput(nn->nfsd_mnt); } void diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index ef86ed23af82..02ff7f762e2d 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1416,6 +1416,8 @@ static void nfsd_umount(struct super_block *sb) { struct net *net = sb->s_fs_info; + nfsd_shutdown_threads(net); + kill_litter_super(sb); put_net(net); } @@ -1428,18 +1430,6 @@ static struct file_system_type nfsd_fs_type = { }; MODULE_ALIAS_FS("nfsd"); -int get_nfsdfs(struct net *net) -{ - struct nfsd_net *nn = net_generic(net, nfsd_net_id); - struct vfsmount *mnt; - - mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL); - if (IS_ERR(mnt)) - return PTR_ERR(mnt); - nn->nfsd_mnt = mnt; - return 0; -} - #ifdef CONFIG_PROC_FS static int create_proc_exports_entry(void) { diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 8bdc37aa2c2e..27c1308ffc2b 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -93,13 +93,12 @@ int nfsd_get_nrthreads(int n, int *, struct net *); int nfsd_set_nrthreads(int n, int *, struct net *); int nfsd_pool_stats_open(struct inode *, struct file *); int nfsd_pool_stats_release(struct inode *, struct file *); +void nfsd_shutdown_threads(struct net *net); void nfsd_destroy(struct net *net); bool i_am_nfsd(void); -int get_nfsdfs(struct net *); - struct nfsdfs_client { struct kref cl_ref; void (*cl_release)(struct kref *kref); diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index d909e4956244..b2eef4112bc2 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -596,6 +596,37 @@ static const struct svc_serv_ops nfsd_thread_sv_ops = { .svo_module = THIS_MODULE, }; +static void nfsd_complete_shutdown(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + WARN_ON(!mutex_is_locked(&nfsd_mutex)); + + nn->nfsd_serv = NULL; + complete(&nn->nfsd_shutdown_complete); +} + +void nfsd_shutdown_threads(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct svc_serv *serv; + + mutex_lock(&nfsd_mutex); + serv = nn->nfsd_serv; + if (serv == NULL) { + mutex_unlock(&nfsd_mutex); + return; + } + + svc_get(serv); + /* Kill outstanding nfsd threads */ + serv->sv_ops->svo_setup(serv, NULL, 0); + nfsd_destroy(net); + mutex_unlock(&nfsd_mutex); + /* Wait for shutdown of nfsd_serv to complete */ + wait_for_completion(&nn->nfsd_shutdown_complete); +} + bool i_am_nfsd(void) { return kthread_func(current) == nfsd; @@ -618,11 +649,13 @@ int nfsd_create_serv(struct net *net) &nfsd_thread_sv_ops); if (nn->nfsd_serv == NULL) return -ENOMEM; + init_completion(&nn->nfsd_shutdown_complete); nn->nfsd_serv->sv_maxconn = nn->max_connections; error = svc_bind(nn->nfsd_serv, net); if (error < 0) { svc_destroy(nn->nfsd_serv); + nfsd_complete_shutdown(net); return error; } @@ -671,7 +704,7 @@ void nfsd_destroy(struct net *net) svc_shutdown_net(nn->nfsd_serv, net); svc_destroy(nn->nfsd_serv); if (destroy) - nn->nfsd_serv = NULL; + nfsd_complete_shutdown(net); } int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) From 34a624931b8c12b435b5009edc5897e4630107bc Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Thu, 18 Mar 2021 21:22:21 +0100 Subject: [PATCH 47/70] nfsd: Fix typo "accesible" Trivial fix. Cc: linux-nfs@vger.kernel.org Signed-off-by: Ricardo Ribalda Signed-off-by: Chuck Lever --- fs/nfsd/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index d6cff5fbe705..5fa38ad9e7e3 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -99,7 +99,7 @@ config NFSD_BLOCKLAYOUT help This option enables support for the exporting pNFS block layouts in the kernel's NFS server. The pNFS block layout enables NFS - clients to directly perform I/O to block devices accesible to both + clients to directly perform I/O to block devices accessible to both the server and the clients. See RFC 5663 for more details. If unsure, say N. @@ -113,7 +113,7 @@ config NFSD_SCSILAYOUT help This option enables support for the exporting pNFS SCSI layouts in the kernel's NFS server. The pNFS SCSI layout enables NFS - clients to directly perform I/O to SCSI devices accesible to both + clients to directly perform I/O to SCSI devices accessible to both the server and the clients. See draft-ietf-nfsv4-scsi-layout for more details. @@ -127,7 +127,7 @@ config NFSD_FLEXFILELAYOUT This option enables support for the exporting pNFS Flex File layouts in the kernel's NFS server. The pNFS Flex File layout enables NFS clients to directly perform I/O to NFSv3 devices - accesible to both the server and the clients. See + accessible to both the server and the clients. See draft-ietf-nfsv4-flex-files for more details. Warning, this server implements the bare minimum functionality From 792a5112aa90e59c048b601c6382fe3498d75db7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 18 Mar 2021 20:03:23 -0400 Subject: [PATCH 48/70] nfsd: COPY with length 0 should copy to end of file >From https://tools.ietf.org/html/rfc7862#page-65 A count of 0 (zero) requests that all bytes from ca_src_offset through EOF be copied to the destination. Reported-by: Signed-off-by: J. Bruce Fields Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index c57cf09a8b60..d7b100a1e306 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1387,6 +1387,9 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) u64 src_pos = copy->cp_src_pos; u64 dst_pos = copy->cp_dst_pos; + /* See RFC 7862 p.67: */ + if (bytes_total == 0) + bytes_total = ULLONG_MAX; do { if (kthread_should_stop()) break; From e7a833e9cc6c3b58fe94f049d2b40943cba07086 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 18 Mar 2021 20:03:22 -0400 Subject: [PATCH 49/70] nfsd: don't ignore high bits of copy count Note size_t is 32-bit on a 32-bit architecture, but cp_count is defined by the protocol to be 64 bit, so we could be turning a large copy into a 0-length copy here. Reported-by: Signed-off-by: J. Bruce Fields Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d7b100a1e306..edcb380fbf12 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1383,7 +1383,7 @@ static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync) static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) { ssize_t bytes_copied = 0; - size_t bytes_total = copy->cp_count; + u64 bytes_total = copy->cp_count; u64 src_pos = copy->cp_src_pos; u64 dst_pos = copy->cp_dst_pos; From 472d155a0631bd1a09b5c0c275a254e65605d683 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sat, 20 Mar 2021 09:38:04 +1100 Subject: [PATCH 50/70] nfsd: report client confirmation status in "info" file mountd can now monitor clients appearing and disappearing in /proc/fs/nfsd/clients, and will log these events, in liu of the logging of mount/unmount events for NFSv3. Currently it cannot distinguish between unconfirmed clients (which might be transient and totally uninteresting) and confirmed clients. So add a "status: " line which reports either "confirmed" or "unconfirmed", and use fsnotify to report that the info file has been modified. This requires a bit of infrastructure to keep the dentry for the "info" file. There is no need to take a counted reference as the dentry must remain around until the client is removed. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 19 +++++++++++++++---- fs/nfsd/nfsctl.c | 14 ++++++++------ fs/nfsd/nfsd.h | 4 +++- fs/nfsd/state.h | 4 ++++ 4 files changed, 30 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e011b407d007..7698172ac0c7 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "xdr4.h" #include "xdr4cb.h" #include "vfs.h" @@ -2352,6 +2353,10 @@ static int client_info_show(struct seq_file *m, void *v) memcpy(&clid, &clp->cl_clientid, sizeof(clid)); seq_printf(m, "clientid: 0x%llx\n", clid); seq_printf(m, "address: \"%pISpc\"\n", (struct sockaddr *)&clp->cl_addr); + if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags)) + seq_puts(m, "status: confirmed\n"); + else + seq_puts(m, "status: unconfirmed\n"); seq_printf(m, "name: "); seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len); seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion); @@ -2702,6 +2707,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, int ret; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct dentry *dentries[ARRAY_SIZE(client_files)]; clp = alloc_client(name); if (clp == NULL) @@ -2721,9 +2727,11 @@ static struct nfs4_client *create_client(struct xdr_netobj name, memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage)); clp->cl_cb_session = NULL; clp->net = net; - clp->cl_nfsd_dentry = nfsd_client_mkdir(nn, &clp->cl_nfsdfs, - clp->cl_clientid.cl_id - nn->clientid_base, - client_files); + clp->cl_nfsd_dentry = nfsd_client_mkdir( + nn, &clp->cl_nfsdfs, + clp->cl_clientid.cl_id - nn->clientid_base, + client_files, dentries); + clp->cl_nfsd_info_dentry = dentries[0]; if (!clp->cl_nfsd_dentry) { free_client(clp); return NULL; @@ -2798,7 +2806,10 @@ move_to_confirmed(struct nfs4_client *clp) list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]); rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); add_clp_to_name_tree(clp, &nn->conf_name_tree); - set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); + if (!test_and_set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags) && + clp->cl_nfsd_dentry && + clp->cl_nfsd_info_dentry) + fsnotify_dentry(clp->cl_nfsd_info_dentry, FS_MODIFY); renew_client_locked(clp); } diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 02ff7f762e2d..853bf50a2a9b 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1266,7 +1266,8 @@ static void nfsdfs_remove_files(struct dentry *root) /* XXX: cut'n'paste from simple_fill_super; figure out if we could share * code instead. */ static int nfsdfs_create_files(struct dentry *root, - const struct tree_descr *files) + const struct tree_descr *files, + struct dentry **fdentries) { struct inode *dir = d_inode(root); struct inode *inode; @@ -1275,8 +1276,6 @@ static int nfsdfs_create_files(struct dentry *root, inode_lock(dir); for (i = 0; files->name && files->name[0]; i++, files++) { - if (!files->name) - continue; dentry = d_alloc_name(root, files->name); if (!dentry) goto out; @@ -1290,6 +1289,8 @@ static int nfsdfs_create_files(struct dentry *root, inode->i_private = __get_nfsdfs_client(dir); d_add(dentry, inode); fsnotify_create(dir, dentry); + if (fdentries) + fdentries[i] = dentry; } inode_unlock(dir); return 0; @@ -1301,8 +1302,9 @@ out: /* on success, returns positive number unique to that client. */ struct dentry *nfsd_client_mkdir(struct nfsd_net *nn, - struct nfsdfs_client *ncl, u32 id, - const struct tree_descr *files) + struct nfsdfs_client *ncl, u32 id, + const struct tree_descr *files, + struct dentry **fdentries) { struct dentry *dentry; char name[11]; @@ -1313,7 +1315,7 @@ struct dentry *nfsd_client_mkdir(struct nfsd_net *nn, dentry = nfsd_mkdir(nn->nfsd_client_dir, ncl, name); if (IS_ERR(dentry)) /* XXX: tossing errors? */ return NULL; - ret = nfsdfs_create_files(dentry, files); + ret = nfsdfs_create_files(dentry, files, fdentries); if (ret) { nfsd_client_rmdir(dentry); return NULL; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 27c1308ffc2b..14dbfa75059d 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -106,7 +106,9 @@ struct nfsdfs_client { struct nfsdfs_client *get_nfsdfs_client(struct inode *); struct dentry *nfsd_client_mkdir(struct nfsd_net *nn, - struct nfsdfs_client *ncl, u32 id, const struct tree_descr *); + struct nfsdfs_client *ncl, u32 id, + const struct tree_descr *, + struct dentry **fdentries); void nfsd_client_rmdir(struct dentry *dentry); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 73deea353169..54cab651ac1d 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -371,6 +371,10 @@ struct nfs4_client { /* debugging info directory under nfsd/clients/ : */ struct dentry *cl_nfsd_dentry; + /* 'info' file within that directory. Ref is not counted, + * but will remain valid iff cl_nfsd_dentry != NULL + */ + struct dentry *cl_nfsd_info_dentry; /* for nfs41 callbacks */ /* We currently support a single back channel with a single slot */ From 072db263e1dd2efa29bfc2ae84b88042502c5d54 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 20 Feb 2021 19:11:55 -0500 Subject: [PATCH 51/70] svcrdma: RPCDBG_FACILITY is no longer used Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 2 -- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 7d34290e2ff8..215d2adadbdd 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -107,8 +107,6 @@ #include "xprt_rdma.h" #include -#define RPCDBG_FACILITY RPCDBG_SVCXPRT - static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc); static inline struct svc_rdma_recv_ctxt * diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 52c759a8543e..e6fab5dd20d0 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -111,8 +111,6 @@ #include "xprt_rdma.h" #include -#define RPCDBG_FACILITY RPCDBG_SVCXPRT - static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc); static inline struct svc_rdma_send_ctxt * From 270f25edcc164ab045c94a1bb645080f26a81ce2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 1 Mar 2021 13:34:38 -0500 Subject: [PATCH 52/70] svcrdma: Provide an explanatory comment in CMA event handler Clean up: explain why svc_xprt_enqueue() is invoked in the event handler even though no xpt_flags bits are toggled here. Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_transport.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index c895f80df659..046a07da5cf9 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -279,6 +279,9 @@ static int svc_rdma_cma_handler(struct rdma_cm_id *cma_id, switch (event->event) { case RDMA_CM_EVENT_ESTABLISHED: clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags); + + /* Handle any requests that were received while + * CONN_PENDING was set. */ svc_xprt_enqueue(xprt); break; case RDMA_CM_EVENT_DISCONNECTED: From c6b7ed8f946fe03b45d2cf3d2b5452d831e6f237 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Mar 2021 13:49:25 -0500 Subject: [PATCH 53/70] svcrdma: Remove stale comment for svc_rdma_wc_receive() xprt pinning was removed in commit 365e9992b90f ("svcrdma: Remove transport reference counting"), but this comment was not updated to reflect that change. Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 215d2adadbdd..04148a656b2a 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -322,8 +322,6 @@ bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma) * @cq: Completion Queue context * @wc: Work Completion object * - * NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that - * the Receive completion handler could be running. */ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) { From 77f0a2aa5cdde0524eab745f7a117706d3e3014f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Mar 2021 13:54:34 -0500 Subject: [PATCH 54/70] svcrdma: Add a batch Receive posting mechanism Introduce a server-side mechanism similar to commit e340c2d6ef2a ("xprtrdma: Reduce the doorbell rate (Receive)") to post Receive WRs in batch. Its first consumer is svc_rdma_post_recvs(), which posts the initial set of Receive WRs. Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 56 ++++++++++++++++++------- 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 04148a656b2a..0c6aa8693f20 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -264,6 +264,47 @@ void svc_rdma_release_rqst(struct svc_rqst *rqstp) svc_rdma_recv_ctxt_put(rdma, ctxt); } +static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma, + unsigned int wanted, bool temp) +{ + const struct ib_recv_wr *bad_wr = NULL; + struct svc_rdma_recv_ctxt *ctxt; + struct ib_recv_wr *recv_chain; + int ret; + + if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) + return false; + + recv_chain = NULL; + while (wanted--) { + ctxt = svc_rdma_recv_ctxt_get(rdma); + if (!ctxt) + break; + + trace_svcrdma_post_recv(ctxt); + ctxt->rc_temp = temp; + ctxt->rc_recv_wr.next = recv_chain; + recv_chain = &ctxt->rc_recv_wr; + } + if (!recv_chain) + return false; + + ret = ib_post_recv(rdma->sc_qp, recv_chain, &bad_wr); + if (ret) + goto err_free; + return true; + +err_free: + trace_svcrdma_rq_post_err(rdma, ret); + while (bad_wr) { + ctxt = container_of(bad_wr, struct svc_rdma_recv_ctxt, + rc_recv_wr); + bad_wr = bad_wr->next; + svc_rdma_recv_ctxt_put(rdma, ctxt); + } + return false; +} + static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma, struct svc_rdma_recv_ctxt *ctxt) { @@ -301,20 +342,7 @@ static int svc_rdma_post_recv(struct svcxprt_rdma *rdma) */ bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma) { - struct svc_rdma_recv_ctxt *ctxt; - unsigned int i; - int ret; - - for (i = 0; i < rdma->sc_max_requests; i++) { - ctxt = svc_rdma_recv_ctxt_get(rdma); - if (!ctxt) - return false; - ctxt->rc_temp = true; - ret = __svc_rdma_post_recv(rdma, ctxt); - if (ret) - return false; - } - return true; + return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests, true); } /** From 7b748c30cc046056a24c459de415844a856ea54b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Mar 2021 16:15:22 -0500 Subject: [PATCH 55/70] svcrdma: Use svc_rdma_refresh_recvs() in wc_receive Replace svc_rdma_post_recv() with the new batch receive mechanism. For the moment it is posting just a single Receive WR at a time, so no change in behavior is expected. Since svc_rdma_wc_receive() was the last call site for svc_rdma_post_recv(), it is removed. Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 43 +++++++------------------ 1 file changed, 11 insertions(+), 32 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 0c6aa8693f20..1e7381ff948b 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -305,35 +305,6 @@ err_free: return false; } -static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma, - struct svc_rdma_recv_ctxt *ctxt) -{ - int ret; - - trace_svcrdma_post_recv(ctxt); - ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, NULL); - if (ret) - goto err_post; - return 0; - -err_post: - trace_svcrdma_rq_post_err(rdma, ret); - svc_rdma_recv_ctxt_put(rdma, ctxt); - return ret; -} - -static int svc_rdma_post_recv(struct svcxprt_rdma *rdma) -{ - struct svc_rdma_recv_ctxt *ctxt; - - if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) - return 0; - ctxt = svc_rdma_recv_ctxt_get(rdma); - if (!ctxt) - return -ENOMEM; - return __svc_rdma_post_recv(rdma, ctxt); -} - /** * svc_rdma_post_recvs - Post initial set of Recv WRs * @rdma: fresh svcxprt_rdma @@ -364,8 +335,17 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) if (wc->status != IB_WC_SUCCESS) goto flushed; - if (svc_rdma_post_recv(rdma)) - goto post_err; + /* If receive posting fails, the connection is about to be + * lost anyway. The server will not be able to send a reply + * for this RPC, and the client will retransmit this RPC + * anyway when it reconnects. + * + * Therefore we drop the Receive, even if status was SUCCESS + * to reduce the likelihood of replayed requests once the + * client reconnects. + */ + if (!svc_rdma_refresh_recvs(rdma, 1, false)) + goto flushed; /* All wc fields are now known to be valid */ ctxt->rc_byte_len = wc->byte_len; @@ -380,7 +360,6 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) return; flushed: -post_err: svc_rdma_recv_ctxt_put(rdma, ctxt); set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); svc_xprt_enqueue(&rdma->sc_xprt); From c558d47596867ff1082fd7475b63670f63f7f5cf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Mar 2021 18:32:30 -0500 Subject: [PATCH 56/70] svcrdma: Maintain a Receive water mark Post more Receives when the number of pending Receives drops below a water mark. The batch mechanism is disabled if the underlying device cannot support a reasonably-sized Receive Queue. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 2 ++ net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 10 ++++++++-- net/sunrpc/xprtrdma/svc_rdma_transport.c | 5 ++++- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 1e76ed688044..722fc7c48725 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -94,6 +94,8 @@ struct svcxprt_rdma { spinlock_t sc_rw_ctxt_lock; struct list_head sc_rw_ctxts; + u32 sc_pending_recvs; + u32 sc_recv_batch; struct list_head sc_rq_dto_q; spinlock_t sc_rq_dto_lock; struct ib_qp *sc_qp; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 1e7381ff948b..2571188ef7f2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -285,6 +285,7 @@ static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma, ctxt->rc_temp = temp; ctxt->rc_recv_wr.next = recv_chain; recv_chain = &ctxt->rc_recv_wr; + rdma->sc_pending_recvs++; } if (!recv_chain) return false; @@ -302,6 +303,8 @@ err_free: bad_wr = bad_wr->next; svc_rdma_recv_ctxt_put(rdma, ctxt); } + /* Since we're destroying the xprt, no need to reset + * sc_pending_recvs. */ return false; } @@ -328,6 +331,8 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) struct ib_cqe *cqe = wc->wr_cqe; struct svc_rdma_recv_ctxt *ctxt; + rdma->sc_pending_recvs--; + /* WARNING: Only wc->wr_cqe and wc->status are reliable */ ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe); @@ -344,8 +349,9 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) * to reduce the likelihood of replayed requests once the * client reconnects. */ - if (!svc_rdma_refresh_recvs(rdma, 1, false)) - goto flushed; + if (rdma->sc_pending_recvs < rdma->sc_max_requests) + if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch, false)) + goto flushed; /* All wc fields are now known to be valid */ ctxt->rc_byte_len = wc->byte_len; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 046a07da5cf9..e629eacfedfc 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -407,11 +407,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_max_req_size = svcrdma_max_req_size; newxprt->sc_max_requests = svcrdma_max_requests; newxprt->sc_max_bc_requests = svcrdma_max_bc_requests; - rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests; + newxprt->sc_recv_batch = RPCRDMA_MAX_RECV_BATCH; + rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests + + newxprt->sc_recv_batch; if (rq_depth > dev->attrs.max_qp_wr) { pr_warn("svcrdma: reducing receive depth to %d\n", dev->attrs.max_qp_wr); rq_depth = dev->attrs.max_qp_wr; + newxprt->sc_recv_batch = 1; newxprt->sc_max_requests = rq_depth - 2; newxprt->sc_max_bc_requests = 2; } From e844d307d46cfa7e09cdb671941bfd5f1be86773 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 20 Feb 2021 18:53:40 -0500 Subject: [PATCH 57/70] svcrdma: Add a "deferred close" helper Refactor a bit of commonly used logic so that every site that wants a close deferred to an nfsd thread does all the right things (set_bit(XPT_CLOSE) then enqueue). Also, once XPT_CLOSE is set on a transport, it is never cleared. If XPT_CLOSE is already set, then the close is already being handled and the enqueue can be skipped. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_xprt.h | 1 + net/sunrpc/svc_xprt.c | 14 ++++++++++++++ net/sunrpc/svcsock.c | 15 ++++++--------- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 3 +-- net/sunrpc/xprtrdma/svc_rdma_rw.c | 5 ++--- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 10 ++++------ net/sunrpc/xprtrdma/svc_rdma_transport.c | 6 ++---- 7 files changed, 30 insertions(+), 24 deletions(-) diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 92455e0d5244..34dacadfe517 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -143,6 +143,7 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen); void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *xprt); void svc_age_temp_xprts_now(struct svc_serv *, struct sockaddr *); +void svc_xprt_deferred_close(struct svc_xprt *xprt); static inline void svc_xprt_get(struct svc_xprt *xprt) { diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 3cdd71a8df1e..b134fc5f3b8d 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -139,6 +139,20 @@ int svc_print_xprts(char *buf, int maxlen) return len; } +/** + * svc_xprt_deferred_close - Close a transport + * @xprt: transport instance + * + * Used in contexts that need to defer the work of shutting down + * the transport to an nfsd thread. + */ +void svc_xprt_deferred_close(struct svc_xprt *xprt) +{ + if (!test_and_set_bit(XPT_CLOSE, &xprt->xpt_flags)) + svc_xprt_enqueue(xprt); +} +EXPORT_SYMBOL_GPL(svc_xprt_deferred_close); + static void svc_xprt_free(struct kref *kref) { struct svc_xprt *xprt = diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 2e2f007dfc9f..22454b2df5be 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -728,10 +728,8 @@ static void svc_tcp_state_change(struct sock *sk) rmb(); svsk->sk_ostate(sk); trace_svcsock_tcp_state(&svsk->sk_xprt, svsk->sk_sock); - if (sk->sk_state != TCP_ESTABLISHED) { - set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); - svc_xprt_enqueue(&svsk->sk_xprt); - } + if (sk->sk_state != TCP_ESTABLISHED) + svc_xprt_deferred_close(&svsk->sk_xprt); } } @@ -901,7 +899,7 @@ err_too_large: net_notice_ratelimited("svc: %s %s RPC fragment too large: %d\n", __func__, svsk->sk_xprt.xpt_server->sv_name, svc_sock_reclen(svsk)); - set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); + svc_xprt_deferred_close(&svsk->sk_xprt); err_short: return -EAGAIN; } @@ -1057,7 +1055,7 @@ err_nuts: svsk->sk_datalen = 0; err_delete: trace_svcsock_tcp_recv_err(&svsk->sk_xprt, len); - set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); + svc_xprt_deferred_close(&svsk->sk_xprt); err_noclose: return 0; /* record not complete */ } @@ -1188,8 +1186,7 @@ out_close: xprt->xpt_server->sv_name, (err < 0) ? "got error" : "sent", (err < 0) ? err : sent, xdr->len); - set_bit(XPT_CLOSE, &xprt->xpt_flags); - svc_xprt_enqueue(xprt); + svc_xprt_deferred_close(xprt); atomic_dec(&svsk->sk_sendqlen); mutex_unlock(&xprt->xpt_mutex); return -EAGAIN; @@ -1268,7 +1265,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) case TCP_ESTABLISHED: break; default: - set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); + svc_xprt_deferred_close(&svsk->sk_xprt); } } } diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 2571188ef7f2..8d93d26e0318 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -367,8 +367,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) flushed: svc_rdma_recv_ctxt_put(rdma, ctxt); - set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); - svc_xprt_enqueue(&rdma->sc_xprt); + svc_xprt_deferred_close(&rdma->sc_xprt); } /** diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 693d139a8633..d7054e3a8e33 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -250,7 +250,7 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) wake_up(&rdma->sc_send_wait); if (unlikely(wc->status != IB_WC_SUCCESS)) - set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); + svc_xprt_deferred_close(&rdma->sc_xprt); svc_rdma_write_info_free(info); } @@ -334,7 +334,6 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc) static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) { struct svcxprt_rdma *rdma = cc->cc_rdma; - struct svc_xprt *xprt = &rdma->sc_xprt; struct ib_send_wr *first_wr; const struct ib_send_wr *bad_wr; struct list_head *tmp; @@ -373,7 +372,7 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) } while (1); trace_svcrdma_sq_post_err(rdma, ret); - set_bit(XPT_CLOSE, &xprt->xpt_flags); + svc_xprt_deferred_close(&rdma->sc_xprt); /* If even one was posted, there will be a completion. */ if (bad_wr != first_wr) diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index e6fab5dd20d0..4471a0fcd3a3 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -285,10 +285,8 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) svc_rdma_send_ctxt_put(rdma, ctxt); - if (unlikely(wc->status != IB_WC_SUCCESS)) { - set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); - svc_xprt_enqueue(&rdma->sc_xprt); - } + if (unlikely(wc->status != IB_WC_SUCCESS)) + svc_xprt_deferred_close(&rdma->sc_xprt); } /** @@ -334,7 +332,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) } trace_svcrdma_sq_post_err(rdma, ret); - set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); + svc_xprt_deferred_close(&rdma->sc_xprt); wake_up(&rdma->sc_send_wait); return ret; } @@ -994,7 +992,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) svc_rdma_send_ctxt_put(rdma, sctxt); err0: trace_svcrdma_send_err(rqstp, ret); - set_bit(XPT_CLOSE, &xprt->xpt_flags); + svc_xprt_deferred_close(&rdma->sc_xprt); return -ENOTCONN; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index e629eacfedfc..3646216211c5 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -119,8 +119,7 @@ static void qp_event_handler(struct ib_event *event, void *context) case IB_EVENT_QP_ACCESS_ERR: case IB_EVENT_DEVICE_FATAL: default: - set_bit(XPT_CLOSE, &xprt->xpt_flags); - svc_xprt_enqueue(xprt); + svc_xprt_deferred_close(xprt); break; } } @@ -286,8 +285,7 @@ static int svc_rdma_cma_handler(struct rdma_cm_id *cma_id, break; case RDMA_CM_EVENT_DISCONNECTED: case RDMA_CM_EVENT_DEVICE_REMOVAL: - set_bit(XPT_CLOSE, &xprt->xpt_flags); - svc_xprt_enqueue(xprt); + svc_xprt_deferred_close(xprt); break; default: break; From 2a1e4f21d84184f7ff5768ee3d3d0c30b1135867 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 13 Jan 2021 13:57:18 -0500 Subject: [PATCH 58/70] svcrdma: Normalize Send page handling Currently svc_rdma_sendto() migrates xdr_buf pages into a separate page list and NULLs out a bunch of entries in rq_pages while the pages are under I/O. The Send completion handler then frees those pages later. Instead, let's wait for the Send completion, then handle page releasing in the nfsd thread. I'd like to avoid the cost of 250+ put_page() calls in the Send completion handler, which is single- threaded. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 1 + net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 8 +++++- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 29 ++++++++++++---------- 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 722fc7c48725..5841978550c6 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -160,6 +160,7 @@ struct svc_rdma_send_ctxt { struct ib_send_wr sc_send_wr; struct ib_cqe sc_cqe; + struct completion sc_done; struct xdr_buf sc_hdrbuf; struct xdr_stream sc_stream; void *sc_xprt_buf; diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 9150df35fb6f..16897fcb659c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -93,7 +93,13 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, */ get_page(virt_to_page(rqst->rq_buffer)); sctxt->sc_send_wr.opcode = IB_WR_SEND; - return svc_rdma_send(rdma, sctxt); + ret = svc_rdma_send(rdma, sctxt); + if (ret < 0) + return ret; + + ret = wait_for_completion_killable(&sctxt->sc_done); + svc_rdma_send_ctxt_put(rdma, sctxt); + return ret; } /* Server-side transport endpoint wants a whole page for its send diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 4471a0fcd3a3..62d55850ca54 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -155,6 +155,7 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe; ctxt->sc_send_wr.sg_list = ctxt->sc_sges; ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED; + init_completion(&ctxt->sc_done); ctxt->sc_cqe.done = svc_rdma_wc_send; ctxt->sc_xprt_buf = buffer; xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf, @@ -280,11 +281,11 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) trace_svcrdma_wc_send(wc, &ctxt->sc_cid); + complete(&ctxt->sc_done); + atomic_inc(&rdma->sc_sq_avail); wake_up(&rdma->sc_send_wait); - svc_rdma_send_ctxt_put(rdma, ctxt); - if (unlikely(wc->status != IB_WC_SUCCESS)) svc_xprt_deferred_close(&rdma->sc_xprt); } @@ -294,7 +295,7 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) * @rdma: transport on which to post the WR * @ctxt: send ctxt with a Send WR ready to post * - * Returns zero the Send WR was posted successfully. Otherwise, a + * Returns zero if the Send WR was posted successfully. Otherwise, a * negative errno is returned. */ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) @@ -302,7 +303,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) struct ib_send_wr *wr = &ctxt->sc_send_wr; int ret; - might_sleep(); + reinit_completion(&ctxt->sc_done); /* Sync the transport header buffer */ ib_dma_sync_single_for_device(rdma->sc_pd->device, @@ -795,7 +796,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, * svc_rdma_sendto returns. Transfer pages under I/O to the ctxt * so they are released by the Send completion handler. */ -static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, +static inline void svc_rdma_save_io_pages(struct svc_rqst *rqstp, struct svc_rdma_send_ctxt *ctxt) { int i, pages = rqstp->rq_next_page - rqstp->rq_respages; @@ -839,15 +840,20 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, if (ret < 0) return ret; - svc_rdma_save_io_pages(rqstp, sctxt); - if (rctxt->rc_inv_rkey) { sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV; sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey; } else { sctxt->sc_send_wr.opcode = IB_WR_SEND; } - return svc_rdma_send(rdma, sctxt); + + ret = svc_rdma_send(rdma, sctxt); + if (ret < 0) + return ret; + + ret = wait_for_completion_killable(&sctxt->sc_done); + svc_rdma_send_ctxt_put(rdma, sctxt); + return ret; } /** @@ -913,7 +919,8 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len; if (svc_rdma_send(rdma, sctxt)) goto put_ctxt; - return; + + wait_for_completion_killable(&sctxt->sc_done); put_ctxt: svc_rdma_send_ctxt_put(rdma, sctxt); @@ -981,10 +988,6 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) if (ret != -E2BIG && ret != -EINVAL) goto err1; - /* Send completion releases payload pages that were part - * of previously posted RDMA Writes. - */ - svc_rdma_save_io_pages(rqstp, sctxt); svc_rdma_send_error_msg(rdma, sctxt, rctxt, ret); return 0; From 579900670ac770a547ff607a60c02c56a7d27bd7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 28 Jan 2021 16:47:56 -0500 Subject: [PATCH 59/70] svcrdma: Remove unused sc_pages field Clean up. This significantly reduces the size of struct svc_rdma_send_ctxt. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 3 +-- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 25 ------------------------- 2 files changed, 1 insertion(+), 27 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 5841978550c6..6e621e1f56b8 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -164,9 +164,8 @@ struct svc_rdma_send_ctxt { struct xdr_buf sc_hdrbuf; struct xdr_stream sc_stream; void *sc_xprt_buf; - int sc_page_count; int sc_cur_sge_no; - struct page *sc_pages[RPCSVC_MAXPAGES]; + struct ib_sge sc_sges[]; }; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 62d55850ca54..f093c9b536ff 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -219,7 +219,6 @@ out: ctxt->sc_send_wr.num_sge = 0; ctxt->sc_cur_sge_no = 0; - ctxt->sc_page_count = 0; return ctxt; out_empty: @@ -234,8 +233,6 @@ out_empty: * svc_rdma_send_ctxt_put - Return send_ctxt to free list * @rdma: controlling svcxprt_rdma * @ctxt: object to return to the free list - * - * Pages left in sc_pages are DMA unmapped and released. */ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) @@ -256,9 +253,6 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, ctxt->sc_sges[i].length); } - for (i = 0; i < ctxt->sc_page_count; ++i) - put_page(ctxt->sc_pages[i]); - spin_lock(&rdma->sc_send_lock); list_add(&ctxt->sc_list, &rdma->sc_send_ctxts); spin_unlock(&rdma->sc_send_lock); @@ -792,25 +786,6 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, svc_rdma_xb_dma_map, &args); } -/* The svc_rqst and all resources it owns are released as soon as - * svc_rdma_sendto returns. Transfer pages under I/O to the ctxt - * so they are released by the Send completion handler. - */ -static inline void svc_rdma_save_io_pages(struct svc_rqst *rqstp, - struct svc_rdma_send_ctxt *ctxt) -{ - int i, pages = rqstp->rq_next_page - rqstp->rq_respages; - - ctxt->sc_page_count += pages; - for (i = 0; i < pages; i++) { - ctxt->sc_pages[i] = rqstp->rq_respages[i]; - rqstp->rq_respages[i] = NULL; - } - - /* Prevent svc_xprt_release from releasing pages in rq_pages */ - rqstp->rq_next_page = rqstp->rq_respages; -} - /* Prepare the portion of the RPC Reply that will be transmitted * via RDMA Send. The RPC-over-RDMA transport header is prepared * in sc_sges[0], and the RPC xdr_buf is prepared in following sges. From cc93ce9529a63bd67e1a64682b288db0092b34b6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 1 Feb 2021 15:16:57 -0500 Subject: [PATCH 60/70] svcrdma: Retain the page backing rq_res.head[0].iov_base svc_rdma_sendto() now waits for the NIC hardware to finish with the pages backing rq_res. We still have to release the page array in some cases, but now it's always safe to immediately re-use the page backing rq_res's head buffer. Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index f093c9b536ff..056452cabc98 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -957,6 +957,11 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp); if (ret < 0) goto err1; + + /* Prevent svc_xprt_release() from releasing the page backing + * rq_res.head[0].iov_base. It's no longer being accessed by + * the I/O device. */ + rqstp->rq_respages++; return 0; err2: From 7dcfbd86adc45f6d6b37278efd22530cf80ab474 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 29 Jan 2021 13:04:04 -0500 Subject: [PATCH 61/70] SUNRPC: Export svc_xprt_received() Prepare svc_xprt_received() to be called from transport code instead of from generic RPC server code. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_xprt.h | 1 + include/trace/events/sunrpc.h | 1 + net/sunrpc/svc_xprt.c | 13 +++++++++---- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 34dacadfe517..571f605bc91e 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -130,6 +130,7 @@ void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *, int svc_create_xprt(struct svc_serv *, const char *, struct net *, const int, const unsigned short, int, const struct cred *); +void svc_xprt_received(struct svc_xprt *xprt); void svc_xprt_do_enqueue(struct svc_xprt *xprt); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_put(struct svc_xprt *xprt); diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 036eb1f5c133..bda16e9e6ba7 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1781,6 +1781,7 @@ DECLARE_EVENT_CLASS(svc_xprt_event, ), \ TP_ARGS(xprt)) +DEFINE_SVC_XPRT_EVENT(received); DEFINE_SVC_XPRT_EVENT(no_write_space); DEFINE_SVC_XPRT_EVENT(close); DEFINE_SVC_XPRT_EVENT(detach); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index b134fc5f3b8d..9d1374e82e90 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -247,21 +247,25 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, return xprt; } -/* - * svc_xprt_received conditionally queues the transport for processing - * by another thread. The caller must hold the XPT_BUSY bit and must +/** + * svc_xprt_received - start next receiver thread + * @xprt: controlling transport + * + * The caller must hold the XPT_BUSY bit and must * not thereafter touch transport data. * * Note: XPT_DATA only gets cleared when a read-attempt finds no (or * insufficient) data. */ -static void svc_xprt_received(struct svc_xprt *xprt) +void svc_xprt_received(struct svc_xprt *xprt) { if (!test_bit(XPT_BUSY, &xprt->xpt_flags)) { WARN_ONCE(1, "xprt=0x%p already busy!", xprt); return; } + trace_svc_xprt_received(xprt); + /* As soon as we clear busy, the xprt could be closed and * 'put', so we need a reference to call svc_enqueue_xprt with: */ @@ -271,6 +275,7 @@ static void svc_xprt_received(struct svc_xprt *xprt) xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt); svc_xprt_put(xprt); } +EXPORT_SYMBOL_GPL(svc_xprt_received); void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new) { From 82011c80b3ec0e05940a2ee2c76c1df9fd2b1ce8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 5 Jan 2021 10:15:09 -0500 Subject: [PATCH 62/70] SUNRPC: Move svc_xprt_received() call sites Currently, XPT_BUSY is not cleared until xpo_recvfrom returns. That effectively blocks the receipt and handling of the next RPC message until the current one has been taken off the transport. This strict ordering is a requirement for socket transports. For our kernel RPC/RDMA transport implementation, however, dequeuing an ingress message is nothing more than a list_del(). The transport can safely be marked un-busy as soon as that is done. To keep the changes simpler, this patch just moves the svc_xprt_received() call site from svc_handle_xprt() into the transports, so that the actual optimization can be done in a subsequent patch. Signed-off-by: Chuck Lever --- net/sunrpc/svc_xprt.c | 7 ++++--- net/sunrpc/svcsock.c | 9 +++++++-- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 6 ++++++ 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 9d1374e82e90..42565f0c7d5a 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -820,8 +820,10 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) newxpt->xpt_cred = get_cred(xprt->xpt_cred); svc_add_new_temp_xprt(serv, newxpt); trace_svc_xprt_accept(newxpt, serv->sv_name); - } else + } else { module_put(xprt->xpt_class->xcl_owner); + } + svc_xprt_received(xprt); } else if (svc_xprt_reserve_slot(rqstp, xprt)) { /* XPT_DATA|XPT_DEFERRED case: */ dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", @@ -836,8 +838,6 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); } - /* clear XPT_BUSY: */ - svc_xprt_received(xprt); out: trace_svc_handle_xprt(xprt, len); return len; @@ -1248,6 +1248,7 @@ static noinline int svc_deferred_recv(struct svc_rqst *rqstp) rqstp->rq_xprt_hlen = dr->xprt_hlen; rqstp->rq_daddr = dr->daddr; rqstp->rq_respages = rqstp->rq_pages; + svc_xprt_received(rqstp->rq_xprt); return (dr->argslen<<2) - dr->xprt_hlen; } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 22454b2df5be..9eb5b6b89077 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -519,6 +519,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) if (serv->sv_stats) serv->sv_stats->netudpcnt++; + svc_xprt_received(rqstp->rq_xprt); return len; out_recv_err: @@ -527,7 +528,7 @@ out_recv_err: set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); } trace_svcsock_udp_recv_err(&svsk->sk_xprt, err); - return 0; + goto out_clear_busy; out_cmsg_err: net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n", cmh->cmsg_level, cmh->cmsg_type); @@ -536,6 +537,8 @@ out_bh_enable: local_bh_enable(); out_free: kfree_skb(skb); +out_clear_busy: + svc_xprt_received(rqstp->rq_xprt); return 0; } @@ -1033,6 +1036,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (serv->sv_stats) serv->sv_stats->nettcpcnt++; + svc_xprt_received(rqstp->rq_xprt); return rqstp->rq_arg.len; err_incomplete: @@ -1050,13 +1054,14 @@ error: if (len != -EAGAIN) goto err_delete; trace_svcsock_tcp_recv_eagain(&svsk->sk_xprt, 0); - return 0; + goto err_noclose; err_nuts: svsk->sk_datalen = 0; err_delete: trace_svcsock_tcp_recv_err(&svsk->sk_xprt, len); svc_xprt_deferred_close(&svsk->sk_xprt); err_noclose: + svc_xprt_received(rqstp->rq_xprt); return 0; /* record not complete */ } diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 8d93d26e0318..9cb5a09c4a01 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -846,6 +846,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) /* No new incoming requests, terminate the loop */ clear_bit(XPT_DATA, &xprt->xpt_flags); spin_unlock(&rdma_xprt->sc_rq_dto_lock); + svc_xprt_received(xprt); return 0; } list_del(&ctxt->rc_list); @@ -883,28 +884,33 @@ complete: rqstp->rq_xprt_ctxt = ctxt; rqstp->rq_prot = IPPROTO_MAX; svc_xprt_copy_addrs(rqstp, xprt); + svc_xprt_received(xprt); return rqstp->rq_arg.len; out_readlist: ret = svc_rdma_process_read_list(rdma_xprt, rqstp, ctxt); if (ret < 0) goto out_readfail; + svc_xprt_received(xprt); return 0; out_err: svc_rdma_send_error(rdma_xprt, ctxt, ret); svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); + svc_xprt_received(xprt); return 0; out_readfail: if (ret == -EINVAL) svc_rdma_send_error(rdma_xprt, ctxt, ret); svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); + svc_xprt_received(xprt); return ret; out_backchannel: svc_rdma_handle_bc_reply(rqstp, ctxt); out_drop: svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); + svc_xprt_received(xprt); return 0; } From 7d81ee8722d69f753c88e7d594790fa10a384f1a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Dec 2020 13:22:20 -0500 Subject: [PATCH 63/70] svcrdma: Single-stage RDMA Read Currently the generic RPC server layer calls svc_rdma_recvfrom() twice to retrieve an RPC message that uses Read chunks. I'm not exactly sure why this design was chosen originally. Instead, let's wait for the Read chunk completion inline in the first call to svc_rdma_recvfrom(). The goal is to eliminate some page allocator churn. rdma_read_complete() replaces pages in the second svc_rqst by calling put_page() repeatedly while the upper layer waits for the request to be constructed, which adds unnecessary NFS WRITE round- trip latency. Signed-off-by: Chuck Lever Reviewed-by: Tom Talpey --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 10 ++- net/sunrpc/xprtrdma/svc_rdma_rw.c | 94 +++++++++---------------- 2 files changed, 37 insertions(+), 67 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 9cb5a09c4a01..ae932435e1d3 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -853,6 +853,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) spin_unlock(&rdma_xprt->sc_rq_dto_lock); percpu_counter_inc(&svcrdma_stat_recv); + /* Unblock the transport for the next receive */ + svc_xprt_received(xprt); + ib_dma_sync_single_for_cpu(rdma_xprt->sc_pd->device, ctxt->rc_recv_sge.addr, ctxt->rc_byte_len, DMA_FROM_DEVICE); @@ -884,33 +887,28 @@ complete: rqstp->rq_xprt_ctxt = ctxt; rqstp->rq_prot = IPPROTO_MAX; svc_xprt_copy_addrs(rqstp, xprt); - svc_xprt_received(xprt); return rqstp->rq_arg.len; out_readlist: ret = svc_rdma_process_read_list(rdma_xprt, rqstp, ctxt); if (ret < 0) goto out_readfail; - svc_xprt_received(xprt); - return 0; + goto complete; out_err: svc_rdma_send_error(rdma_xprt, ctxt, ret); svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); - svc_xprt_received(xprt); return 0; out_readfail: if (ret == -EINVAL) svc_rdma_send_error(rdma_xprt, ctxt, ret); svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); - svc_xprt_received(xprt); return ret; out_backchannel: svc_rdma_handle_bc_reply(rqstp, ctxt); out_drop: svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); - svc_xprt_received(xprt); return 0; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index d7054e3a8e33..36959a14375e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -150,6 +150,8 @@ struct svc_rdma_chunk_ctxt { struct svcxprt_rdma *cc_rdma; struct list_head cc_rwctxts; int cc_sqecount; + enum ib_wc_status cc_status; + struct completion cc_done; }; static void svc_rdma_cc_cid_init(struct svcxprt_rdma *rdma, @@ -299,29 +301,15 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc) struct svc_rdma_chunk_ctxt *cc = container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe); struct svcxprt_rdma *rdma = cc->cc_rdma; - struct svc_rdma_read_info *info = - container_of(cc, struct svc_rdma_read_info, ri_cc); trace_svcrdma_wc_read(wc, &cc->cc_cid); atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); wake_up(&rdma->sc_send_wait); - if (unlikely(wc->status != IB_WC_SUCCESS)) { - set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); - svc_rdma_recv_ctxt_put(rdma, info->ri_readctxt); - } else { - spin_lock(&rdma->sc_rq_dto_lock); - list_add_tail(&info->ri_readctxt->rc_list, - &rdma->sc_read_complete_q); - /* Note the unlock pairs with the smp_rmb in svc_xprt_ready: */ - set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags); - spin_unlock(&rdma->sc_rq_dto_lock); - - svc_xprt_enqueue(&rdma->sc_xprt); - } - - svc_rdma_read_info_free(info); + cc->cc_status = wc->status; + complete(&cc->cc_done); + return; } /* This function sleeps when the transport's Send Queue is congested. @@ -676,8 +664,8 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info, struct svc_rdma_recv_ctxt *head = info->ri_readctxt; struct svc_rdma_chunk_ctxt *cc = &info->ri_cc; struct svc_rqst *rqstp = info->ri_rqst; - struct svc_rdma_rw_ctxt *ctxt; unsigned int sge_no, seg_len, len; + struct svc_rdma_rw_ctxt *ctxt; struct scatterlist *sg; int ret; @@ -693,8 +681,6 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info, seg_len = min_t(unsigned int, len, PAGE_SIZE - info->ri_pageoff); - head->rc_arg.pages[info->ri_pageno] = - rqstp->rq_pages[info->ri_pageno]; if (!info->ri_pageoff) head->rc_page_count++; @@ -788,12 +774,10 @@ static int svc_rdma_copy_inline_range(struct svc_rdma_read_info *info, page_len = min_t(unsigned int, remaining, PAGE_SIZE - info->ri_pageoff); - head->rc_arg.pages[info->ri_pageno] = - rqstp->rq_pages[info->ri_pageno]; if (!info->ri_pageoff) head->rc_page_count++; - dst = page_address(head->rc_arg.pages[info->ri_pageno]); + dst = page_address(rqstp->rq_pages[info->ri_pageno]); memcpy(dst + info->ri_pageno, src + offset, page_len); info->ri_totalbytes += page_len; @@ -813,7 +797,7 @@ static int svc_rdma_copy_inline_range(struct svc_rdma_read_info *info, * svc_rdma_read_multiple_chunks - Construct RDMA Reads to pull data item Read chunks * @info: context for RDMA Reads * - * The chunk data lands in head->rc_arg as a series of contiguous pages, + * The chunk data lands in rqstp->rq_arg as a series of contiguous pages, * like an incoming TCP call. * * Return values: @@ -827,8 +811,8 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf { struct svc_rdma_recv_ctxt *head = info->ri_readctxt; const struct svc_rdma_pcl *pcl = &head->rc_read_pcl; + struct xdr_buf *buf = &info->ri_rqst->rq_arg; struct svc_rdma_chunk *chunk, *next; - struct xdr_buf *buf = &head->rc_arg; unsigned int start, length; int ret; @@ -864,9 +848,9 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf buf->len += info->ri_totalbytes; buf->buflen += info->ri_totalbytes; - head->rc_hdr_count = 1; - buf->head[0].iov_base = page_address(head->rc_pages[0]); + buf->head[0].iov_base = page_address(info->ri_rqst->rq_pages[0]); buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, info->ri_totalbytes); + buf->pages = &info->ri_rqst->rq_pages[1]; buf->page_len = info->ri_totalbytes - buf->head[0].iov_len; return 0; } @@ -875,9 +859,9 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf * svc_rdma_read_data_item - Construct RDMA Reads to pull data item Read chunks * @info: context for RDMA Reads * - * The chunk data lands in the page list of head->rc_arg.pages. + * The chunk data lands in the page list of rqstp->rq_arg.pages. * - * Currently NFSD does not look at the head->rc_arg.tail[0] iovec. + * Currently NFSD does not look at the rqstp->rq_arg.tail[0] kvec. * Therefore, XDR round-up of the Read chunk and trailing * inline content must both be added at the end of the pagelist. * @@ -891,7 +875,7 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf static int svc_rdma_read_data_item(struct svc_rdma_read_info *info) { struct svc_rdma_recv_ctxt *head = info->ri_readctxt; - struct xdr_buf *buf = &head->rc_arg; + struct xdr_buf *buf = &info->ri_rqst->rq_arg; struct svc_rdma_chunk *chunk; unsigned int length; int ret; @@ -901,8 +885,6 @@ static int svc_rdma_read_data_item(struct svc_rdma_read_info *info) if (ret < 0) goto out; - head->rc_hdr_count = 0; - /* Split the Receive buffer between the head and tail * buffers at Read chunk's position. XDR roundup of the * chunk is not included in either the pagelist or in @@ -921,7 +903,8 @@ static int svc_rdma_read_data_item(struct svc_rdma_read_info *info) * Currently these chunks always start at page offset 0, * thus the rounded-up length never crosses a page boundary. */ - length = XDR_QUADLEN(info->ri_totalbytes) << 2; + buf->pages = &info->ri_rqst->rq_pages[0]; + length = xdr_align_size(chunk->ch_length); buf->page_len = length; buf->len += length; buf->buflen += length; @@ -1033,8 +1016,7 @@ static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info) * @info: context for RDMA Reads * * The start of the data lands in the first page just after the - * Transport header, and the rest lands in the page list of - * head->rc_arg.pages. + * Transport header, and the rest lands in rqstp->rq_arg.pages. * * Assumptions: * - A PZRC is never sent in an RDMA_MSG message, though it's @@ -1049,8 +1031,7 @@ static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info) */ static noinline int svc_rdma_read_special(struct svc_rdma_read_info *info) { - struct svc_rdma_recv_ctxt *head = info->ri_readctxt; - struct xdr_buf *buf = &head->rc_arg; + struct xdr_buf *buf = &info->ri_rqst->rq_arg; int ret; ret = svc_rdma_read_call_chunk(info); @@ -1060,35 +1041,15 @@ static noinline int svc_rdma_read_special(struct svc_rdma_read_info *info) buf->len += info->ri_totalbytes; buf->buflen += info->ri_totalbytes; - head->rc_hdr_count = 1; - buf->head[0].iov_base = page_address(head->rc_pages[0]); + buf->head[0].iov_base = page_address(info->ri_rqst->rq_pages[0]); buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, info->ri_totalbytes); + buf->pages = &info->ri_rqst->rq_pages[1]; buf->page_len = info->ri_totalbytes - buf->head[0].iov_len; out: return ret; } -/* Pages under I/O have been copied to head->rc_pages. Ensure they - * are not released by svc_xprt_release() until the I/O is complete. - * - * This has to be done after all Read WRs are constructed to properly - * handle a page that is part of I/O on behalf of two different RDMA - * segments. - * - * Do this only if I/O has been posted. Otherwise, we do indeed want - * svc_xprt_release() to clean things up properly. - */ -static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, - const unsigned int start, - const unsigned int num_pages) -{ - unsigned int i; - - for (i = start; i < num_pages + start; i++) - rqstp->rq_pages[i] = NULL; -} - /** * svc_rdma_process_read_list - Pull list of Read chunks from the client * @rdma: controlling RDMA transport @@ -1153,11 +1114,22 @@ int svc_rdma_process_read_list(struct svcxprt_rdma *rdma, goto out_err; trace_svcrdma_post_read_chunk(&cc->cc_cid, cc->cc_sqecount); + init_completion(&cc->cc_done); ret = svc_rdma_post_chunk_ctxt(cc); if (ret < 0) goto out_err; - svc_rdma_save_io_pages(rqstp, 0, head->rc_page_count); - return 1; + + ret = 1; + wait_for_completion(&cc->cc_done); + if (cc->cc_status != IB_WC_SUCCESS) + ret = -EIO; + + /* rq_respages starts after the last arg page */ + rqstp->rq_respages = &rqstp->rq_pages[head->rc_page_count]; + rqstp->rq_next_page = rqstp->rq_respages + 1; + + /* Ensure svc_rdma_recv_ctxt_put() does not try to release pages */ + head->rc_page_count = 0; out_err: svc_rdma_read_info_free(info); From 9af723be863904c746a6a6bf4f3686087b16b9ff Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 30 Dec 2020 12:43:34 -0500 Subject: [PATCH 64/70] svcrdma: Remove sc_read_complete_q Now that svc_rdma_recvfrom() waits for Read completion, sc_read_complete_q is no longer used. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 2 - net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 57 +++--------------------- net/sunrpc/xprtrdma/svc_rdma_transport.c | 1 - 3 files changed, 6 insertions(+), 54 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 6e621e1f56b8..b72f75091404 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -106,7 +106,6 @@ struct svcxprt_rdma { wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */ unsigned long sc_flags; - struct list_head sc_read_complete_q; struct work_struct sc_work; struct llist_head sc_recv_ctxts; @@ -140,7 +139,6 @@ struct svc_rdma_recv_ctxt { bool rc_temp; u32 rc_byte_len; unsigned int rc_page_count; - unsigned int rc_hdr_count; u32 rc_inv_rkey; __be32 rc_msgtype; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index ae932435e1d3..88d80d701241 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -89,8 +89,7 @@ * svc_rdma_recvfrom call returns. * * During the second svc_rdma_recvfrom call, RDMA Read sink pages - * are transferred from the svc_rdma_recv_ctxt to the second svc_rqst - * (see rdma_read_complete() below). + * are transferred from the svc_rdma_recv_ctxt to the second svc_rqst. */ #include @@ -379,10 +378,6 @@ void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma) { struct svc_rdma_recv_ctxt *ctxt; - while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_read_complete_q))) { - list_del(&ctxt->rc_list); - svc_rdma_recv_ctxt_put(rdma, ctxt); - } while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_rq_dto_q))) { list_del(&ctxt->rc_list); svc_rdma_recv_ctxt_put(rdma, ctxt); @@ -720,35 +715,6 @@ out_inval: return -EINVAL; } -static void rdma_read_complete(struct svc_rqst *rqstp, - struct svc_rdma_recv_ctxt *head) -{ - int page_no; - - /* Move Read chunk pages to rqstp so that they will be released - * when svc_process is done with them. - */ - for (page_no = 0; page_no < head->rc_page_count; page_no++) { - put_page(rqstp->rq_pages[page_no]); - rqstp->rq_pages[page_no] = head->rc_pages[page_no]; - } - head->rc_page_count = 0; - - /* Point rq_arg.pages past header */ - rqstp->rq_arg.pages = &rqstp->rq_pages[head->rc_hdr_count]; - rqstp->rq_arg.page_len = head->rc_arg.page_len; - - /* rq_respages starts after the last arg page */ - rqstp->rq_respages = &rqstp->rq_pages[page_no]; - rqstp->rq_next_page = rqstp->rq_respages + 1; - - /* Rebuild rq_arg head and tail. */ - rqstp->rq_arg.head[0] = head->rc_arg.head[0]; - rqstp->rq_arg.tail[0] = head->rc_arg.tail[0]; - rqstp->rq_arg.len = head->rc_arg.len; - rqstp->rq_arg.buflen = head->rc_arg.buflen; -} - static void svc_rdma_send_error(struct svcxprt_rdma *rdma, struct svc_rdma_recv_ctxt *rctxt, int status) @@ -834,13 +800,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) rqstp->rq_xprt_ctxt = NULL; spin_lock(&rdma_xprt->sc_rq_dto_lock); - ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_read_complete_q); - if (ctxt) { - list_del(&ctxt->rc_list); - spin_unlock(&rdma_xprt->sc_rq_dto_lock); - rdma_read_complete(rqstp, ctxt); - goto complete; - } ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_rq_dto_q); if (!ctxt) { /* No new incoming requests, terminate the loop */ @@ -880,21 +839,17 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) svc_rdma_get_inv_rkey(rdma_xprt, ctxt); if (!pcl_is_empty(&ctxt->rc_read_pcl) || - !pcl_is_empty(&ctxt->rc_call_pcl)) - goto out_readlist; + !pcl_is_empty(&ctxt->rc_call_pcl)) { + ret = svc_rdma_process_read_list(rdma_xprt, rqstp, ctxt); + if (ret < 0) + goto out_readfail; + } -complete: rqstp->rq_xprt_ctxt = ctxt; rqstp->rq_prot = IPPROTO_MAX; svc_xprt_copy_addrs(rqstp, xprt); return rqstp->rq_arg.len; -out_readlist: - ret = svc_rdma_process_read_list(rdma_xprt, rqstp, ctxt); - if (ret < 0) - goto out_readfail; - goto complete; - out_err: svc_rdma_send_error(rdma_xprt, ctxt, ret); svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 3646216211c5..d94b7759ada1 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -136,7 +136,6 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv, svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv); INIT_LIST_HEAD(&cma_xprt->sc_accept_q); INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); - INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts); init_llist_head(&cma_xprt->sc_recv_ctxts); INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts); From 5533c4f4b996b7fc36d16b5e0807ebbc08c93af4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 13 Jan 2021 09:31:50 -0500 Subject: [PATCH 65/70] svcrdma: Remove svc_rdma_recv_ctxt::rc_pages and ::rc_arg These fields are no longer used. The size of struct svc_rdma_recv_ctxt is now less than 300 bytes on x86_64, down from 2440 bytes. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 3 --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 5 ----- net/sunrpc/xprtrdma/svc_rdma_rw.c | 12 ------------ 3 files changed, 20 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index b72f75091404..3184465de3a0 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -134,7 +134,6 @@ struct svc_rdma_recv_ctxt { struct rpc_rdma_cid rc_cid; struct ib_sge rc_recv_sge; void *rc_recv_buf; - struct xdr_buf rc_arg; struct xdr_stream rc_stream; bool rc_temp; u32 rc_byte_len; @@ -148,8 +147,6 @@ struct svc_rdma_recv_ctxt { struct svc_rdma_chunk *rc_cur_result_payload; struct svc_rdma_pcl rc_write_pcl; struct svc_rdma_pcl rc_reply_pcl; - - struct page *rc_pages[RPCSVC_MAXPAGES]; }; struct svc_rdma_send_ctxt { diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 88d80d701241..232860ea683b 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -227,11 +227,6 @@ out_empty: void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, struct svc_rdma_recv_ctxt *ctxt) { - unsigned int i; - - for (i = 0; i < ctxt->rc_page_count; i++) - put_page(ctxt->rc_pages[i]); - pcl_free(&ctxt->rc_call_pcl); pcl_free(&ctxt->rc_read_pcl); pcl_free(&ctxt->rc_write_pcl); diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 36959a14375e..5238bc829235 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -1081,18 +1081,6 @@ int svc_rdma_process_read_list(struct svcxprt_rdma *rdma, struct svc_rdma_chunk_ctxt *cc; int ret; - /* The request (with page list) is constructed in - * head->rc_arg. Pages involved with RDMA Read I/O are - * transferred there. - */ - head->rc_arg.head[0] = rqstp->rq_arg.head[0]; - head->rc_arg.tail[0] = rqstp->rq_arg.tail[0]; - head->rc_arg.pages = head->rc_pages; - head->rc_arg.page_base = 0; - head->rc_arg.page_len = 0; - head->rc_arg.len = rqstp->rq_arg.len; - head->rc_arg.buflen = rqstp->rq_arg.buflen; - info = svc_rdma_read_info_alloc(rdma); if (!info) return -ENOMEM; From e3eded5e81c4df60006e94614ec645da089e35e7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 1 Mar 2021 10:44:49 -0500 Subject: [PATCH 66/70] svcrdma: Clean up dto_q critical section in svc_rdma_recvfrom() This, to me, seems less cluttered and less redundant. I was hoping it could help reduce lock contention on the dto_q lock by reducing the size of the critical section, but alas, the only improvement is readability. Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 232860ea683b..6be23ce7a93d 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -794,22 +794,22 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) rqstp->rq_xprt_ctxt = NULL; + ctxt = NULL; spin_lock(&rdma_xprt->sc_rq_dto_lock); ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_rq_dto_q); - if (!ctxt) { + if (ctxt) + list_del(&ctxt->rc_list); + else /* No new incoming requests, terminate the loop */ clear_bit(XPT_DATA, &xprt->xpt_flags); - spin_unlock(&rdma_xprt->sc_rq_dto_lock); - svc_xprt_received(xprt); - return 0; - } - list_del(&ctxt->rc_list); spin_unlock(&rdma_xprt->sc_rq_dto_lock); - percpu_counter_inc(&svcrdma_stat_recv); /* Unblock the transport for the next receive */ svc_xprt_received(xprt); + if (!ctxt) + return 0; + percpu_counter_inc(&svcrdma_stat_recv); ib_dma_sync_single_for_cpu(rdma_xprt->sc_pd->device, ctxt->rc_recv_sge.addr, ctxt->rc_byte_len, DMA_FROM_DEVICE); From c0a744dcaa29e9537e8607ae9c965ad936124a4d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 23 Mar 2021 17:48:58 -0500 Subject: [PATCH 67/70] UAPI: nfsfh.h: Replace one-element array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. Use an anonymous union with a couple of anonymous structs in order to keep userspace unchanged: $ pahole -C nfs_fhbase_new fs/nfsd/nfsfh.o struct nfs_fhbase_new { union { struct { __u8 fb_version_aux; /* 0 1 */ __u8 fb_auth_type_aux; /* 1 1 */ __u8 fb_fsid_type_aux; /* 2 1 */ __u8 fb_fileid_type_aux; /* 3 1 */ __u32 fb_auth[1]; /* 4 4 */ }; /* 0 8 */ struct { __u8 fb_version; /* 0 1 */ __u8 fb_auth_type; /* 1 1 */ __u8 fb_fsid_type; /* 2 1 */ __u8 fb_fileid_type; /* 3 1 */ __u32 fb_auth_flex[0]; /* 4 0 */ }; /* 0 4 */ }; /* 0 8 */ /* size: 8, cachelines: 1, members: 1 */ /* last cacheline: 8 bytes */ }; Also, this helps with the ongoing efforts to enable -Warray-bounds by fixing the following warnings: fs/nfsd/nfsfh.c: In function ‘nfsd_set_fh_dentry’: fs/nfsd/nfsfh.c:191:41: warning: array subscript 1 is above array bounds of ‘__u32[1]’ {aka ‘unsigned int[1]’} [-Warray-bounds] 191 | ntohl((__force __be32)fh->fh_fsid[1]))); | ~~~~~~~~~~~^~~ ./include/linux/kdev_t.h:12:46: note: in definition of macro ‘MKDEV’ 12 | #define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi)) | ^~ ./include/uapi/linux/byteorder/little_endian.h:40:26: note: in expansion of macro ‘__swab32’ 40 | #define __be32_to_cpu(x) __swab32((__force __u32)(__be32)(x)) | ^~~~~~~~ ./include/linux/byteorder/generic.h:136:21: note: in expansion of macro ‘__be32_to_cpu’ 136 | #define ___ntohl(x) __be32_to_cpu(x) | ^~~~~~~~~~~~~ ./include/linux/byteorder/generic.h:140:18: note: in expansion of macro ‘___ntohl’ 140 | #define ntohl(x) ___ntohl(x) | ^~~~~~~~ fs/nfsd/nfsfh.c:191:8: note: in expansion of macro ‘ntohl’ 191 | ntohl((__force __be32)fh->fh_fsid[1]))); | ^~~~~ fs/nfsd/nfsfh.c:192:32: warning: array subscript 2 is above array bounds of ‘__u32[1]’ {aka ‘unsigned int[1]’} [-Warray-bounds] 192 | fh->fh_fsid[1] = fh->fh_fsid[2]; | ~~~~~~~~~~~^~~ fs/nfsd/nfsfh.c:192:15: warning: array subscript 1 is above array bounds of ‘__u32[1]’ {aka ‘unsigned int[1]’} [-Warray-bounds] 192 | fh->fh_fsid[1] = fh->fh_fsid[2]; | ~~~~~~~~~~~^~~ [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.10/process/deprecated.html#zero-length-and-one-element-arrays Link: https://github.com/KSPP/linux/issues/79 Link: https://github.com/KSPP/linux/issues/109 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Chuck Lever --- include/uapi/linux/nfsd/nfsfh.h | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/nfsd/nfsfh.h b/include/uapi/linux/nfsd/nfsfh.h index ff0ca88b1c8f..427294dd56a1 100644 --- a/include/uapi/linux/nfsd/nfsfh.h +++ b/include/uapi/linux/nfsd/nfsfh.h @@ -64,13 +64,24 @@ struct nfs_fhbase_old { * in include/linux/exportfs.h for currently registered values. */ struct nfs_fhbase_new { - __u8 fb_version; /* == 1, even => nfs_fhbase_old */ - __u8 fb_auth_type; - __u8 fb_fsid_type; - __u8 fb_fileid_type; - __u32 fb_auth[1]; -/* __u32 fb_fsid[0]; floating */ -/* __u32 fb_fileid[0]; floating */ + union { + struct { + __u8 fb_version_aux; /* == 1, even => nfs_fhbase_old */ + __u8 fb_auth_type_aux; + __u8 fb_fsid_type_aux; + __u8 fb_fileid_type_aux; + __u32 fb_auth[1]; + /* __u32 fb_fsid[0]; floating */ + /* __u32 fb_fileid[0]; floating */ + }; + struct { + __u8 fb_version; /* == 1, even => nfs_fhbase_old */ + __u8 fb_auth_type; + __u8 fb_fsid_type; + __u8 fb_fileid_type; + __u32 fb_auth_flex[]; /* flexible-array member */ + }; + }; }; struct knfsd_fh { @@ -97,7 +108,7 @@ struct knfsd_fh { #define fh_fsid_type fh_base.fh_new.fb_fsid_type #define fh_auth_type fh_base.fh_new.fb_auth_type #define fh_fileid_type fh_base.fh_new.fb_fileid_type -#define fh_fsid fh_base.fh_new.fb_auth +#define fh_fsid fh_base.fh_new.fb_auth_flex /* Do not use, provided for userspace compatiblity. */ #define fh_auth fh_base.fh_new.fb_auth From e739b12042b6b079a397a3c234f96c09d1de0b40 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 30 Mar 2021 15:03:59 -0400 Subject: [PATCH 68/70] NFSv4.2: fix copy stateid copying for the async copy This patch fixes Dan Carpenter's report that the static checker found a problem where memcpy() was copying into too small of a buffer. Reported-by: Dan Carpenter Fixes: e0639dc5805a ("NFSD introduce async copy feature") Signed-off-by: Olga Kornievskaia Signed-off-by: Chuck Lever Reviewed-by: Dai Ngo --- fs/nfsd/nfs4proc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index edcb380fbf12..daf43b980d4b 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1541,8 +1541,8 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!nfs4_init_copy_state(nn, copy)) goto out_err; refcount_set(&async_copy->refcount, 1); - memcpy(©->cp_res.cb_stateid, ©->cp_stateid, - sizeof(copy->cp_stateid)); + memcpy(©->cp_res.cb_stateid, ©->cp_stateid.stid, + sizeof(copy->cp_res.cb_stateid)); dup_copy_fields(copy, async_copy); async_copy->copy_task = kthread_create(nfsd4_do_async_copy, async_copy, "%s", "copy thread"); From dee9f6ade389b71b8ef679454f6f4d00cd3075f1 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 6 Apr 2021 11:46:59 +0800 Subject: [PATCH 69/70] sunrpc: Remove unused function ip_map_lookup Fix the following clang warnings: net/sunrpc/svcauth_unix.c:306:30: warning: unused function 'ip_map_lookup' [-Wunused-function]. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Chuck Lever --- net/sunrpc/svcauth_unix.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 97c0bddba7a3..35b7966ac3b3 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -303,15 +303,6 @@ static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, return NULL; } -static inline struct ip_map *ip_map_lookup(struct net *net, char *class, - struct in6_addr *addr) -{ - struct sunrpc_net *sn; - - sn = net_generic(net, sunrpc_net_id); - return __ip_map_lookup(sn->ip_map_cache, class, addr); -} - static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, struct unix_domain *udom, time64_t expiry) { From b73ac6808b0f7994a05ebc38571e2e9eaf98a0f4 Mon Sep 17 00:00:00 2001 From: Guobin Huang Date: Tue, 6 Apr 2021 20:08:18 +0800 Subject: [PATCH 70/70] NFSD: Use DEFINE_SPINLOCK() for spinlock spinlock can be initialized automatically with DEFINE_SPINLOCK() rather than explicitly calling spin_lock_init(). Reported-by: Hulk Robot Signed-off-by: Guobin Huang Signed-off-by: Chuck Lever --- fs/nfsd/nfssvc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index b2eef4112bc2..82ba034fa579 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -84,7 +84,7 @@ DEFINE_MUTEX(nfsd_mutex); * version 4.1 DRC caches. * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage. */ -spinlock_t nfsd_drc_lock; +DEFINE_SPINLOCK(nfsd_drc_lock); unsigned long nfsd_drc_max_mem; unsigned long nfsd_drc_mem_used; @@ -563,7 +563,6 @@ static void set_max_drc(void) nfsd_drc_max_mem = (nr_free_buffer_pages() >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; nfsd_drc_mem_used = 0; - spin_lock_init(&nfsd_drc_lock); dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem); }