ceph: check availability of mds cluster on mount
Signed-off-by: Yan, Zheng <zyan@redhat.com>
This commit is contained in:
Родитель
7ce469a53e
Коммит
e9e427f0a1
|
@ -2100,17 +2100,26 @@ static int __do_request(struct ceph_mds_client *mdsc,
|
|||
err = -EIO;
|
||||
goto finish;
|
||||
}
|
||||
if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) {
|
||||
if (mdsc->mdsmap_err) {
|
||||
err = mdsc->mdsmap_err;
|
||||
dout("do_request mdsmap err %d\n", err);
|
||||
goto finish;
|
||||
}
|
||||
if (!(mdsc->fsc->mount_options->flags &
|
||||
CEPH_MOUNT_OPT_MOUNTWAIT) &&
|
||||
!ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) {
|
||||
err = -ENOENT;
|
||||
pr_info("probably no mds server is up\n");
|
||||
goto finish;
|
||||
}
|
||||
}
|
||||
|
||||
put_request_session(req);
|
||||
|
||||
mds = __choose_mds(mdsc, req);
|
||||
if (mds < 0 ||
|
||||
ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) {
|
||||
if (mdsc->mdsmap_err) {
|
||||
err = mdsc->mdsmap_err;
|
||||
dout("do_request mdsmap err %d\n", err);
|
||||
goto finish;
|
||||
}
|
||||
dout("do_request no mds or not active, waiting for map\n");
|
||||
list_add(&req->r_wait, &mdsc->waiting_for_map);
|
||||
goto out;
|
||||
|
|
163
fs/ceph/mdsmap.c
163
fs/ceph/mdsmap.c
|
@ -42,6 +42,60 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
|
|||
return i;
|
||||
}
|
||||
|
||||
#define __decode_and_drop_type(p, end, type, bad) \
|
||||
do { \
|
||||
if (*p + sizeof(type) > end) \
|
||||
goto bad; \
|
||||
*p += sizeof(type); \
|
||||
} while (0)
|
||||
|
||||
#define __decode_and_drop_set(p, end, type, bad) \
|
||||
do { \
|
||||
u32 n; \
|
||||
size_t need; \
|
||||
ceph_decode_32_safe(p, end, n, bad); \
|
||||
need = sizeof(type) * n; \
|
||||
ceph_decode_need(p, end, need, bad); \
|
||||
*p += need; \
|
||||
} while (0)
|
||||
|
||||
#define __decode_and_drop_map(p, end, ktype, vtype, bad) \
|
||||
do { \
|
||||
u32 n; \
|
||||
size_t need; \
|
||||
ceph_decode_32_safe(p, end, n, bad); \
|
||||
need = (sizeof(ktype) + sizeof(vtype)) * n; \
|
||||
ceph_decode_need(p, end, need, bad); \
|
||||
*p += need; \
|
||||
} while (0)
|
||||
|
||||
|
||||
static int __decode_and_drop_compat_set(void **p, void* end)
|
||||
{
|
||||
int i;
|
||||
/* compat, ro_compat, incompat*/
|
||||
for (i = 0; i < 3; i++) {
|
||||
u32 n;
|
||||
ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad);
|
||||
/* mask */
|
||||
*p += sizeof(u64);
|
||||
/* names (map<u64, string>) */
|
||||
n = ceph_decode_32(p);
|
||||
while (n-- > 0) {
|
||||
u32 len;
|
||||
ceph_decode_need(p, end, sizeof(u64) + sizeof(u32),
|
||||
bad);
|
||||
*p += sizeof(u64);
|
||||
len = ceph_decode_32(p);
|
||||
ceph_decode_need(p, end, len, bad);
|
||||
*p += len;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
bad:
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode an MDS map
|
||||
*
|
||||
|
@ -55,6 +109,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
|
|||
int i, j, n;
|
||||
int err = -EINVAL;
|
||||
u8 mdsmap_v, mdsmap_cv;
|
||||
u16 mdsmap_ev;
|
||||
|
||||
m = kzalloc(sizeof(*m), GFP_NOFS);
|
||||
if (m == NULL)
|
||||
|
@ -83,7 +138,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
|
|||
|
||||
m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS);
|
||||
if (m->m_info == NULL)
|
||||
goto badmem;
|
||||
goto nomem;
|
||||
|
||||
/* pick out active nodes from mds_info (state > 0) */
|
||||
n = ceph_decode_32(p);
|
||||
|
@ -166,7 +221,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
|
|||
info->export_targets = kcalloc(num_export_targets,
|
||||
sizeof(u32), GFP_NOFS);
|
||||
if (info->export_targets == NULL)
|
||||
goto badmem;
|
||||
goto nomem;
|
||||
for (j = 0; j < num_export_targets; j++)
|
||||
info->export_targets[j] =
|
||||
ceph_decode_32(&pexport_targets);
|
||||
|
@ -180,24 +235,104 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
|
|||
m->m_num_data_pg_pools = n;
|
||||
m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS);
|
||||
if (!m->m_data_pg_pools)
|
||||
goto badmem;
|
||||
goto nomem;
|
||||
ceph_decode_need(p, end, sizeof(u64)*(n+1), bad);
|
||||
for (i = 0; i < n; i++)
|
||||
m->m_data_pg_pools[i] = ceph_decode_64(p);
|
||||
m->m_cas_pg_pool = ceph_decode_64(p);
|
||||
m->m_enabled = m->m_epoch > 1;
|
||||
|
||||
/* ok, we don't care about the rest. */
|
||||
mdsmap_ev = 1;
|
||||
if (mdsmap_v >= 2) {
|
||||
ceph_decode_16_safe(p, end, mdsmap_ev, bad_ext);
|
||||
}
|
||||
if (mdsmap_ev >= 3) {
|
||||
if (__decode_and_drop_compat_set(p, end) < 0)
|
||||
goto bad_ext;
|
||||
}
|
||||
/* metadata_pool */
|
||||
if (mdsmap_ev < 5) {
|
||||
__decode_and_drop_type(p, end, u32, bad_ext);
|
||||
} else {
|
||||
__decode_and_drop_type(p, end, u64, bad_ext);
|
||||
}
|
||||
|
||||
/* created + modified + tableserver */
|
||||
__decode_and_drop_type(p, end, struct ceph_timespec, bad_ext);
|
||||
__decode_and_drop_type(p, end, struct ceph_timespec, bad_ext);
|
||||
__decode_and_drop_type(p, end, u32, bad_ext);
|
||||
|
||||
/* in */
|
||||
{
|
||||
int num_laggy = 0;
|
||||
ceph_decode_32_safe(p, end, n, bad_ext);
|
||||
ceph_decode_need(p, end, sizeof(u32) * n, bad_ext);
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
s32 mds = ceph_decode_32(p);
|
||||
if (mds >= 0 && mds < m->m_max_mds) {
|
||||
if (m->m_info[mds].laggy)
|
||||
num_laggy++;
|
||||
}
|
||||
}
|
||||
m->m_num_laggy = num_laggy;
|
||||
}
|
||||
|
||||
/* inc */
|
||||
__decode_and_drop_map(p, end, u32, u32, bad_ext);
|
||||
/* up */
|
||||
__decode_and_drop_map(p, end, u32, u64, bad_ext);
|
||||
/* failed */
|
||||
__decode_and_drop_set(p, end, u32, bad_ext);
|
||||
/* stopped */
|
||||
__decode_and_drop_set(p, end, u32, bad_ext);
|
||||
|
||||
if (mdsmap_ev >= 4) {
|
||||
/* last_failure_osd_epoch */
|
||||
__decode_and_drop_type(p, end, u32, bad_ext);
|
||||
}
|
||||
if (mdsmap_ev >= 6) {
|
||||
/* ever_allowed_snaps */
|
||||
__decode_and_drop_type(p, end, u8, bad_ext);
|
||||
/* explicitly_allowed_snaps */
|
||||
__decode_and_drop_type(p, end, u8, bad_ext);
|
||||
}
|
||||
if (mdsmap_ev >= 7) {
|
||||
/* inline_data_enabled */
|
||||
__decode_and_drop_type(p, end, u8, bad_ext);
|
||||
}
|
||||
if (mdsmap_ev >= 8) {
|
||||
u32 name_len;
|
||||
/* enabled */
|
||||
ceph_decode_8_safe(p, end, m->m_enabled, bad_ext);
|
||||
ceph_decode_32_safe(p, end, name_len, bad_ext);
|
||||
ceph_decode_need(p, end, name_len, bad_ext);
|
||||
*p += name_len;
|
||||
}
|
||||
/* damaged */
|
||||
if (mdsmap_ev >= 9) {
|
||||
size_t need;
|
||||
ceph_decode_32_safe(p, end, n, bad_ext);
|
||||
need = sizeof(u32) * n;
|
||||
ceph_decode_need(p, end, need, bad_ext);
|
||||
*p += need;
|
||||
m->m_damaged = n > 0;
|
||||
} else {
|
||||
m->m_damaged = false;
|
||||
}
|
||||
bad_ext:
|
||||
*p = end;
|
||||
dout("mdsmap_decode success epoch %u\n", m->m_epoch);
|
||||
return m;
|
||||
|
||||
badmem:
|
||||
nomem:
|
||||
err = -ENOMEM;
|
||||
goto out_err;
|
||||
bad:
|
||||
pr_err("corrupt mdsmap\n");
|
||||
print_hex_dump(KERN_DEBUG, "mdsmap: ",
|
||||
DUMP_PREFIX_OFFSET, 16, 1,
|
||||
start, end - start, true);
|
||||
out_err:
|
||||
ceph_mdsmap_destroy(m);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
@ -212,3 +347,19 @@ void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
|
|||
kfree(m->m_data_pg_pools);
|
||||
kfree(m);
|
||||
}
|
||||
|
||||
bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m)
|
||||
{
|
||||
int i, nr_active = 0;
|
||||
if (!m->m_enabled)
|
||||
return false;
|
||||
if (m->m_damaged)
|
||||
return false;
|
||||
if (m->m_num_laggy > 0)
|
||||
return false;
|
||||
for (i = 0; i < m->m_max_mds; i++) {
|
||||
if (m->m_info[i].state == CEPH_MDS_STATE_ACTIVE)
|
||||
nr_active++;
|
||||
}
|
||||
return nr_active > 0;
|
||||
}
|
||||
|
|
|
@ -137,6 +137,8 @@ enum {
|
|||
Opt_nofscache,
|
||||
Opt_poolperm,
|
||||
Opt_nopoolperm,
|
||||
Opt_require_active_mds,
|
||||
Opt_norequire_active_mds,
|
||||
#ifdef CONFIG_CEPH_FS_POSIX_ACL
|
||||
Opt_acl,
|
||||
#endif
|
||||
|
@ -171,6 +173,8 @@ static match_table_t fsopt_tokens = {
|
|||
{Opt_nofscache, "nofsc"},
|
||||
{Opt_poolperm, "poolperm"},
|
||||
{Opt_nopoolperm, "nopoolperm"},
|
||||
{Opt_require_active_mds, "require_active_mds"},
|
||||
{Opt_norequire_active_mds, "norequire_active_mds"},
|
||||
#ifdef CONFIG_CEPH_FS_POSIX_ACL
|
||||
{Opt_acl, "acl"},
|
||||
#endif
|
||||
|
@ -287,6 +291,12 @@ static int parse_fsopt_token(char *c, void *private)
|
|||
case Opt_nopoolperm:
|
||||
fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM;
|
||||
break;
|
||||
case Opt_require_active_mds:
|
||||
fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT;
|
||||
break;
|
||||
case Opt_norequire_active_mds:
|
||||
fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT;
|
||||
break;
|
||||
#ifdef CONFIG_CEPH_FS_POSIX_ACL
|
||||
case Opt_acl:
|
||||
fsopt->sb_flags |= MS_POSIXACL;
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */
|
||||
#define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */
|
||||
#define CEPH_MOUNT_OPT_NOPOOLPERM (1<<11) /* no pool permission check */
|
||||
#define CEPH_MOUNT_OPT_MOUNTWAIT (1<<12) /* mount waits if no mds is up */
|
||||
|
||||
#define CEPH_MOUNT_OPT_DEFAULT CEPH_MOUNT_OPT_DCACHE
|
||||
|
||||
|
|
|
@ -31,6 +31,10 @@ struct ceph_mdsmap {
|
|||
int m_num_data_pg_pools;
|
||||
u64 *m_data_pg_pools;
|
||||
u64 m_cas_pg_pool;
|
||||
|
||||
bool m_enabled;
|
||||
bool m_damaged;
|
||||
int m_num_laggy;
|
||||
};
|
||||
|
||||
static inline struct ceph_entity_addr *
|
||||
|
@ -59,5 +63,6 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
|
|||
extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
|
||||
extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
|
||||
extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
|
||||
extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m);
|
||||
|
||||
#endif
|
||||
|
|
Загрузка…
Ссылка в новой задаче