A bunch of CephFS fixups from Xiubo, mostly around dropping caps, along
with a fix for a regression in the readahead handling code which sneaked in with the switch to netfs helpers. -----BEGIN PGP SIGNATURE----- iQFHBAABCAAxFiEEydHwtzie9C7TfviiSn/eOAIR84sFAmSoL3ITHGlkcnlvbW92 QGdtYWlsLmNvbQAKCRBKf944AhHzi5GdCACVzRsWU75gmO74yrOKOy2BR70Kgz2q +uTAeXLYL57Q5Z2kREiLQQQsBhqkvkUcsE2kPZC40DIVP2554A8nBTnWLcdg//PM 6e94UVYMW66GqDeTYvCA2gD0V+uPnnDc5frrcxsNb2F1hxGFuO+tYMYDASJgmuuV 0gUKSqM5HbvFi30nM+RrNzOLPxr+/gMHahAVoM8uwuWN2LBFANADDY/7ya7JA4ZP 61BVF7jEDpb2btNUH1z4RfFVIIJE0IpJRH+bSb5d7CsrbrrkZhAh90QZaAGtIo7C NhoZlT5fyQ57u4g4PM2UvoFJHeaxNMRb1JR73sN0FT8ngvw5Wb2HzaQb =rFqz -----END PGP SIGNATURE----- Merge tag 'ceph-for-6.5-rc1' of https://github.com/ceph/ceph-client Pull ceph updates from Ilya Dryomov: "A bunch of CephFS fixups from Xiubo, mostly around dropping caps, along with a fix for a regression in the readahead handling code which sneaked in with the switch to netfs helpers" * tag 'ceph-for-6.5-rc1' of https://github.com/ceph/ceph-client: ceph: don't let check_caps skip sending responses for revoke msgs ceph: issue a cap release immediately if no cap exists ceph: trigger to flush the buffer when making snapshot ceph: fix blindly expanding the readahead windows ceph: add a dedicated private data for netfs rreq ceph: voluntarily drop Xx caps for requests those touch parent mtime ceph: try to dump the msgs when decoding fails ceph: only send metrics when the MDS rank is ready
This commit is contained in:
Коммит
3290badd1b
|
@ -187,16 +187,42 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq)
|
|||
struct inode *inode = rreq->inode;
|
||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||
struct ceph_file_layout *lo = &ci->i_layout;
|
||||
unsigned long max_pages = inode->i_sb->s_bdi->ra_pages;
|
||||
loff_t end = rreq->start + rreq->len, new_end;
|
||||
struct ceph_netfs_request_data *priv = rreq->netfs_priv;
|
||||
unsigned long max_len;
|
||||
u32 blockoff;
|
||||
u64 blockno;
|
||||
|
||||
/* Expand the start downward */
|
||||
blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
|
||||
rreq->start = blockno * lo->stripe_unit;
|
||||
rreq->len += blockoff;
|
||||
if (priv) {
|
||||
/* Readahead is disabled by posix_fadvise POSIX_FADV_RANDOM */
|
||||
if (priv->file_ra_disabled)
|
||||
max_pages = 0;
|
||||
else
|
||||
max_pages = priv->file_ra_pages;
|
||||
|
||||
/* Now, round up the length to the next block */
|
||||
rreq->len = roundup(rreq->len, lo->stripe_unit);
|
||||
}
|
||||
|
||||
/* Readahead is disabled */
|
||||
if (!max_pages)
|
||||
return;
|
||||
|
||||
max_len = max_pages << PAGE_SHIFT;
|
||||
|
||||
/*
|
||||
* Try to expand the length forward by rounding up it to the next
|
||||
* block, but do not exceed the file size, unless the original
|
||||
* request already exceeds it.
|
||||
*/
|
||||
new_end = min(round_up(end, lo->stripe_unit), rreq->i_size);
|
||||
if (new_end > end && new_end <= rreq->start + max_len)
|
||||
rreq->len = new_end - rreq->start;
|
||||
|
||||
/* Try to expand the start downward */
|
||||
div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
|
||||
if (rreq->len + blockoff <= max_len) {
|
||||
rreq->start -= blockoff;
|
||||
rreq->len += blockoff;
|
||||
}
|
||||
}
|
||||
|
||||
static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)
|
||||
|
@ -362,18 +388,28 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
|
|||
{
|
||||
struct inode *inode = rreq->inode;
|
||||
int got = 0, want = CEPH_CAP_FILE_CACHE;
|
||||
struct ceph_netfs_request_data *priv;
|
||||
int ret = 0;
|
||||
|
||||
if (rreq->origin != NETFS_READAHEAD)
|
||||
return 0;
|
||||
|
||||
priv = kzalloc(sizeof(*priv), GFP_NOFS);
|
||||
if (!priv)
|
||||
return -ENOMEM;
|
||||
|
||||
if (file) {
|
||||
struct ceph_rw_context *rw_ctx;
|
||||
struct ceph_file_info *fi = file->private_data;
|
||||
|
||||
priv->file_ra_pages = file->f_ra.ra_pages;
|
||||
priv->file_ra_disabled = file->f_mode & FMODE_RANDOM;
|
||||
|
||||
rw_ctx = ceph_find_rw_context(fi);
|
||||
if (rw_ctx)
|
||||
if (rw_ctx) {
|
||||
rreq->netfs_priv = priv;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -383,27 +419,40 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
|
|||
ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want, true, &got);
|
||||
if (ret < 0) {
|
||||
dout("start_read %p, error getting cap\n", inode);
|
||||
return ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!(got & want)) {
|
||||
dout("start_read %p, no cache cap\n", inode);
|
||||
return -EACCES;
|
||||
ret = -EACCES;
|
||||
goto out;
|
||||
}
|
||||
if (ret == 0) {
|
||||
ret = -EACCES;
|
||||
goto out;
|
||||
}
|
||||
if (ret == 0)
|
||||
return -EACCES;
|
||||
|
||||
rreq->netfs_priv = (void *)(uintptr_t)got;
|
||||
return 0;
|
||||
priv->caps = got;
|
||||
rreq->netfs_priv = priv;
|
||||
|
||||
out:
|
||||
if (ret < 0)
|
||||
kfree(priv);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ceph_netfs_free_request(struct netfs_io_request *rreq)
|
||||
{
|
||||
struct ceph_inode_info *ci = ceph_inode(rreq->inode);
|
||||
int got = (uintptr_t)rreq->netfs_priv;
|
||||
struct ceph_netfs_request_data *priv = rreq->netfs_priv;
|
||||
|
||||
if (got)
|
||||
ceph_put_cap_refs(ci, got);
|
||||
if (!priv)
|
||||
return;
|
||||
|
||||
if (priv->caps)
|
||||
ceph_put_cap_refs(ceph_inode(rreq->inode), priv->caps);
|
||||
kfree(priv);
|
||||
rreq->netfs_priv = NULL;
|
||||
}
|
||||
|
||||
const struct netfs_request_ops ceph_netfs_ops = {
|
||||
|
|
|
@ -3109,6 +3109,12 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
|
|||
}
|
||||
if (had & CEPH_CAP_FILE_WR) {
|
||||
if (--ci->i_wr_ref == 0) {
|
||||
/*
|
||||
* The Fb caps will always be took and released
|
||||
* together with the Fw caps.
|
||||
*/
|
||||
WARN_ON_ONCE(ci->i_wb_ref);
|
||||
|
||||
last++;
|
||||
check_flushsnaps = true;
|
||||
if (ci->i_wrbuffer_ref_head == 0 &&
|
||||
|
@ -3560,6 +3566,15 @@ static void handle_cap_grant(struct inode *inode,
|
|||
}
|
||||
BUG_ON(cap->issued & ~cap->implemented);
|
||||
|
||||
/* don't let check_caps skip sending a response to MDS for revoke msgs */
|
||||
if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
|
||||
cap->mds_wanted = 0;
|
||||
if (cap == ci->i_auth_cap)
|
||||
check_caps = 1; /* check auth cap only */
|
||||
else
|
||||
check_caps = 2; /* check all caps */
|
||||
}
|
||||
|
||||
if (extra_info->inline_version > 0 &&
|
||||
extra_info->inline_version >= ci->i_inline_version) {
|
||||
ci->i_inline_version = extra_info->inline_version;
|
||||
|
@ -4086,6 +4101,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
|
|||
struct cap_extra_info extra_info = {};
|
||||
bool queue_trunc;
|
||||
bool close_sessions = false;
|
||||
bool do_cap_release = false;
|
||||
|
||||
dout("handle_caps from mds%d\n", session->s_mds);
|
||||
|
||||
|
@ -4192,17 +4208,14 @@ void ceph_handle_caps(struct ceph_mds_session *session,
|
|||
if (!inode) {
|
||||
dout(" i don't have ino %llx\n", vino.ino);
|
||||
|
||||
if (op == CEPH_CAP_OP_IMPORT) {
|
||||
cap = ceph_get_cap(mdsc, NULL);
|
||||
cap->cap_ino = vino.ino;
|
||||
cap->queue_release = 1;
|
||||
cap->cap_id = le64_to_cpu(h->cap_id);
|
||||
cap->mseq = mseq;
|
||||
cap->seq = seq;
|
||||
cap->issue_seq = seq;
|
||||
spin_lock(&session->s_cap_lock);
|
||||
__ceph_queue_cap_release(session, cap);
|
||||
spin_unlock(&session->s_cap_lock);
|
||||
switch (op) {
|
||||
case CEPH_CAP_OP_IMPORT:
|
||||
case CEPH_CAP_OP_REVOKE:
|
||||
case CEPH_CAP_OP_GRANT:
|
||||
do_cap_release = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
goto flush_cap_releases;
|
||||
}
|
||||
|
@ -4252,6 +4265,14 @@ void ceph_handle_caps(struct ceph_mds_session *session,
|
|||
inode, ceph_ino(inode), ceph_snap(inode),
|
||||
session->s_mds);
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
switch (op) {
|
||||
case CEPH_CAP_OP_REVOKE:
|
||||
case CEPH_CAP_OP_GRANT:
|
||||
do_cap_release = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
goto flush_cap_releases;
|
||||
}
|
||||
|
||||
|
@ -4302,6 +4323,18 @@ flush_cap_releases:
|
|||
* along for the mds (who clearly thinks we still have this
|
||||
* cap).
|
||||
*/
|
||||
if (do_cap_release) {
|
||||
cap = ceph_get_cap(mdsc, NULL);
|
||||
cap->cap_ino = vino.ino;
|
||||
cap->queue_release = 1;
|
||||
cap->cap_id = le64_to_cpu(h->cap_id);
|
||||
cap->mseq = mseq;
|
||||
cap->seq = seq;
|
||||
cap->issue_seq = seq;
|
||||
spin_lock(&session->s_cap_lock);
|
||||
__ceph_queue_cap_release(session, cap);
|
||||
spin_unlock(&session->s_cap_lock);
|
||||
}
|
||||
ceph_flush_cap_releases(mdsc, session);
|
||||
goto done;
|
||||
|
||||
|
|
|
@ -886,7 +886,8 @@ static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir,
|
|||
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
|
||||
req->r_args.mknod.mode = cpu_to_le32(mode);
|
||||
req->r_args.mknod.rdev = cpu_to_le32(rdev);
|
||||
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
|
||||
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
|
||||
CEPH_CAP_XATTR_EXCL;
|
||||
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
|
||||
if (as_ctx.pagelist) {
|
||||
req->r_pagelist = as_ctx.pagelist;
|
||||
|
@ -953,7 +954,8 @@ static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir,
|
|||
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
|
||||
req->r_dentry = dget(dentry);
|
||||
req->r_num_caps = 2;
|
||||
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
|
||||
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
|
||||
CEPH_CAP_XATTR_EXCL;
|
||||
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
|
||||
if (as_ctx.pagelist) {
|
||||
req->r_pagelist = as_ctx.pagelist;
|
||||
|
@ -1022,7 +1024,8 @@ static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
|
|||
ihold(dir);
|
||||
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
|
||||
req->r_args.mkdir.mode = cpu_to_le32(mode);
|
||||
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
|
||||
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
|
||||
CEPH_CAP_XATTR_EXCL;
|
||||
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
|
||||
if (as_ctx.pagelist) {
|
||||
req->r_pagelist = as_ctx.pagelist;
|
||||
|
@ -1079,7 +1082,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
|
|||
req->r_parent = dir;
|
||||
ihold(dir);
|
||||
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
|
||||
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
|
||||
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
|
||||
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
|
||||
/* release LINK_SHARED on source inode (mds will lock it) */
|
||||
req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
|
||||
|
@ -1218,7 +1221,7 @@ retry:
|
|||
req->r_num_caps = 2;
|
||||
req->r_parent = dir;
|
||||
ihold(dir);
|
||||
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
|
||||
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
|
||||
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
|
||||
req->r_inode_drop = ceph_drop_caps_for_unlink(inode);
|
||||
|
||||
|
@ -1320,9 +1323,9 @@ static int ceph_rename(struct mnt_idmap *idmap, struct inode *old_dir,
|
|||
req->r_parent = new_dir;
|
||||
ihold(new_dir);
|
||||
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
|
||||
req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
|
||||
req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
|
||||
req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
|
||||
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
|
||||
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
|
||||
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
|
||||
/* release LINK_RDCACHE on source inode (mds will lock it) */
|
||||
req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
|
||||
|
|
|
@ -791,7 +791,8 @@ retry:
|
|||
if (flags & O_CREAT) {
|
||||
struct ceph_file_layout lo;
|
||||
|
||||
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
|
||||
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
|
||||
CEPH_CAP_XATTR_EXCL;
|
||||
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
|
||||
if (as_ctx.pagelist) {
|
||||
req->r_pagelist = as_ctx.pagelist;
|
||||
|
|
|
@ -645,6 +645,7 @@ bad:
|
|||
err = -EIO;
|
||||
out_bad:
|
||||
pr_err("mds parse_reply err %d\n", err);
|
||||
ceph_msg_dump(msg);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -3538,6 +3539,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
|
|||
|
||||
bad:
|
||||
pr_err("mdsc_handle_forward decode error err=%d\n", err);
|
||||
ceph_msg_dump(msg);
|
||||
}
|
||||
|
||||
static int __decode_session_metadata(void **p, void *end,
|
||||
|
@ -5258,6 +5260,7 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
|
|||
bad:
|
||||
pr_err("error decoding fsmap %d. Shutting down mount.\n", err);
|
||||
ceph_umount_begin(mdsc->fsc->sb);
|
||||
ceph_msg_dump(msg);
|
||||
err_out:
|
||||
mutex_lock(&mdsc->mutex);
|
||||
mdsc->mdsmap_err = err;
|
||||
|
@ -5326,6 +5329,7 @@ bad_unlock:
|
|||
bad:
|
||||
pr_err("error decoding mdsmap %d. Shutting down mount.\n", err);
|
||||
ceph_umount_begin(mdsc->fsc->sb);
|
||||
ceph_msg_dump(msg);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -36,6 +36,14 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
|
|||
s32 items = 0;
|
||||
s32 len;
|
||||
|
||||
/* Do not send the metrics until the MDS rank is ready */
|
||||
mutex_lock(&mdsc->mutex);
|
||||
if (ceph_mdsmap_get_state(mdsc->mdsmap, s->s_mds) != CEPH_MDS_STATE_ACTIVE) {
|
||||
mutex_unlock(&mdsc->mutex);
|
||||
return false;
|
||||
}
|
||||
mutex_unlock(&mdsc->mutex);
|
||||
|
||||
len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write)
|
||||
+ sizeof(*meta) + sizeof(*dlease) + sizeof(*files)
|
||||
+ sizeof(*icaps) + sizeof(*inodes) + sizeof(*rsize)
|
||||
|
|
|
@ -675,14 +675,17 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* Fb cap still in use, delay it */
|
||||
if (ci->i_wb_ref) {
|
||||
/*
|
||||
* Defer flushing the capsnap if the dirty buffer not flushed yet.
|
||||
* And trigger to flush the buffer immediately.
|
||||
*/
|
||||
if (ci->i_wrbuffer_ref) {
|
||||
dout("%s %p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu "
|
||||
"used WRBUFFER, delaying\n", __func__, inode,
|
||||
ceph_vinop(inode), capsnap, capsnap->context,
|
||||
capsnap->context->seq, ceph_cap_string(capsnap->dirty),
|
||||
capsnap->size);
|
||||
capsnap->writing = 1;
|
||||
ceph_queue_writeback(inode);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -451,6 +451,19 @@ struct ceph_inode_info {
|
|||
unsigned long i_work_mask;
|
||||
};
|
||||
|
||||
struct ceph_netfs_request_data {
|
||||
int caps;
|
||||
|
||||
/*
|
||||
* Maximum size of a file readahead request.
|
||||
* The fadvise could update the bdi's default ra_pages.
|
||||
*/
|
||||
unsigned int file_ra_pages;
|
||||
|
||||
/* Set it if fadvise disables file readahead entirely */
|
||||
bool file_ra_disabled;
|
||||
};
|
||||
|
||||
static inline struct ceph_inode_info *
|
||||
ceph_inode(const struct inode *inode)
|
||||
{
|
||||
|
|
Загрузка…
Ссылка в новой задаче