ceph: check availability of mds cluster on mount

Signed-off-by: Yan, Zheng <zyan@redhat.com>
This commit is contained in:
Yan, Zheng 2016-11-10 16:02:06 +08:00 коммит произвёл Ilya Dryomov
Родитель 7ce469a53e
Коммит e9e427f0a1
5 изменённых файлов: 187 добавлений и 11 удалений

Просмотреть файл

@ -2100,17 +2100,26 @@ static int __do_request(struct ceph_mds_client *mdsc,
err = -EIO;
goto finish;
}
if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) {
if (mdsc->mdsmap_err) {
err = mdsc->mdsmap_err;
dout("do_request mdsmap err %d\n", err);
goto finish;
}
if (!(mdsc->fsc->mount_options->flags &
CEPH_MOUNT_OPT_MOUNTWAIT) &&
!ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) {
err = -ENOENT;
pr_info("probably no mds server is up\n");
goto finish;
}
}
put_request_session(req);
mds = __choose_mds(mdsc, req);
if (mds < 0 ||
ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) {
if (mdsc->mdsmap_err) {
err = mdsc->mdsmap_err;
dout("do_request mdsmap err %d\n", err);
goto finish;
}
dout("do_request no mds or not active, waiting for map\n");
list_add(&req->r_wait, &mdsc->waiting_for_map);
goto out;

Просмотреть файл

@ -42,6 +42,60 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
return i;
}
#define __decode_and_drop_type(p, end, type, bad) \
do { \
if (*p + sizeof(type) > end) \
goto bad; \
*p += sizeof(type); \
} while (0)
#define __decode_and_drop_set(p, end, type, bad) \
do { \
u32 n; \
size_t need; \
ceph_decode_32_safe(p, end, n, bad); \
need = sizeof(type) * n; \
ceph_decode_need(p, end, need, bad); \
*p += need; \
} while (0)
#define __decode_and_drop_map(p, end, ktype, vtype, bad) \
do { \
u32 n; \
size_t need; \
ceph_decode_32_safe(p, end, n, bad); \
need = (sizeof(ktype) + sizeof(vtype)) * n; \
ceph_decode_need(p, end, need, bad); \
*p += need; \
} while (0)
static int __decode_and_drop_compat_set(void **p, void* end)
{
int i;
/* compat, ro_compat, incompat*/
for (i = 0; i < 3; i++) {
u32 n;
ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad);
/* mask */
*p += sizeof(u64);
/* names (map<u64, string>) */
n = ceph_decode_32(p);
while (n-- > 0) {
u32 len;
ceph_decode_need(p, end, sizeof(u64) + sizeof(u32),
bad);
*p += sizeof(u64);
len = ceph_decode_32(p);
ceph_decode_need(p, end, len, bad);
*p += len;
}
}
return 0;
bad:
return -1;
}
/*
* Decode an MDS map
*
@ -55,6 +109,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
int i, j, n;
int err = -EINVAL;
u8 mdsmap_v, mdsmap_cv;
u16 mdsmap_ev;
m = kzalloc(sizeof(*m), GFP_NOFS);
if (m == NULL)
@ -83,7 +138,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS);
if (m->m_info == NULL)
goto badmem;
goto nomem;
/* pick out active nodes from mds_info (state > 0) */
n = ceph_decode_32(p);
@ -166,7 +221,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
info->export_targets = kcalloc(num_export_targets,
sizeof(u32), GFP_NOFS);
if (info->export_targets == NULL)
goto badmem;
goto nomem;
for (j = 0; j < num_export_targets; j++)
info->export_targets[j] =
ceph_decode_32(&pexport_targets);
@ -180,24 +235,104 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
m->m_num_data_pg_pools = n;
m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS);
if (!m->m_data_pg_pools)
goto badmem;
goto nomem;
ceph_decode_need(p, end, sizeof(u64)*(n+1), bad);
for (i = 0; i < n; i++)
m->m_data_pg_pools[i] = ceph_decode_64(p);
m->m_cas_pg_pool = ceph_decode_64(p);
m->m_enabled = m->m_epoch > 1;
/* ok, we don't care about the rest. */
mdsmap_ev = 1;
if (mdsmap_v >= 2) {
ceph_decode_16_safe(p, end, mdsmap_ev, bad_ext);
}
if (mdsmap_ev >= 3) {
if (__decode_and_drop_compat_set(p, end) < 0)
goto bad_ext;
}
/* metadata_pool */
if (mdsmap_ev < 5) {
__decode_and_drop_type(p, end, u32, bad_ext);
} else {
__decode_and_drop_type(p, end, u64, bad_ext);
}
/* created + modified + tableserver */
__decode_and_drop_type(p, end, struct ceph_timespec, bad_ext);
__decode_and_drop_type(p, end, struct ceph_timespec, bad_ext);
__decode_and_drop_type(p, end, u32, bad_ext);
/* in */
{
int num_laggy = 0;
ceph_decode_32_safe(p, end, n, bad_ext);
ceph_decode_need(p, end, sizeof(u32) * n, bad_ext);
for (i = 0; i < n; i++) {
s32 mds = ceph_decode_32(p);
if (mds >= 0 && mds < m->m_max_mds) {
if (m->m_info[mds].laggy)
num_laggy++;
}
}
m->m_num_laggy = num_laggy;
}
/* inc */
__decode_and_drop_map(p, end, u32, u32, bad_ext);
/* up */
__decode_and_drop_map(p, end, u32, u64, bad_ext);
/* failed */
__decode_and_drop_set(p, end, u32, bad_ext);
/* stopped */
__decode_and_drop_set(p, end, u32, bad_ext);
if (mdsmap_ev >= 4) {
/* last_failure_osd_epoch */
__decode_and_drop_type(p, end, u32, bad_ext);
}
if (mdsmap_ev >= 6) {
/* ever_allowed_snaps */
__decode_and_drop_type(p, end, u8, bad_ext);
/* explicitly_allowed_snaps */
__decode_and_drop_type(p, end, u8, bad_ext);
}
if (mdsmap_ev >= 7) {
/* inline_data_enabled */
__decode_and_drop_type(p, end, u8, bad_ext);
}
if (mdsmap_ev >= 8) {
u32 name_len;
/* enabled */
ceph_decode_8_safe(p, end, m->m_enabled, bad_ext);
ceph_decode_32_safe(p, end, name_len, bad_ext);
ceph_decode_need(p, end, name_len, bad_ext);
*p += name_len;
}
/* damaged */
if (mdsmap_ev >= 9) {
size_t need;
ceph_decode_32_safe(p, end, n, bad_ext);
need = sizeof(u32) * n;
ceph_decode_need(p, end, need, bad_ext);
*p += need;
m->m_damaged = n > 0;
} else {
m->m_damaged = false;
}
bad_ext:
*p = end;
dout("mdsmap_decode success epoch %u\n", m->m_epoch);
return m;
badmem:
nomem:
err = -ENOMEM;
goto out_err;
bad:
pr_err("corrupt mdsmap\n");
print_hex_dump(KERN_DEBUG, "mdsmap: ",
DUMP_PREFIX_OFFSET, 16, 1,
start, end - start, true);
out_err:
ceph_mdsmap_destroy(m);
return ERR_PTR(err);
}
@ -212,3 +347,19 @@ void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
kfree(m->m_data_pg_pools);
kfree(m);
}
bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m)
{
int i, nr_active = 0;
if (!m->m_enabled)
return false;
if (m->m_damaged)
return false;
if (m->m_num_laggy > 0)
return false;
for (i = 0; i < m->m_max_mds; i++) {
if (m->m_info[i].state == CEPH_MDS_STATE_ACTIVE)
nr_active++;
}
return nr_active > 0;
}

Просмотреть файл

@ -137,6 +137,8 @@ enum {
Opt_nofscache,
Opt_poolperm,
Opt_nopoolperm,
Opt_require_active_mds,
Opt_norequire_active_mds,
#ifdef CONFIG_CEPH_FS_POSIX_ACL
Opt_acl,
#endif
@ -171,6 +173,8 @@ static match_table_t fsopt_tokens = {
{Opt_nofscache, "nofsc"},
{Opt_poolperm, "poolperm"},
{Opt_nopoolperm, "nopoolperm"},
{Opt_require_active_mds, "require_active_mds"},
{Opt_norequire_active_mds, "norequire_active_mds"},
#ifdef CONFIG_CEPH_FS_POSIX_ACL
{Opt_acl, "acl"},
#endif
@ -287,6 +291,12 @@ static int parse_fsopt_token(char *c, void *private)
case Opt_nopoolperm:
fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM;
break;
case Opt_require_active_mds:
fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT;
break;
case Opt_norequire_active_mds:
fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT;
break;
#ifdef CONFIG_CEPH_FS_POSIX_ACL
case Opt_acl:
fsopt->sb_flags |= MS_POSIXACL;

Просмотреть файл

@ -36,6 +36,7 @@
#define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */
#define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */
#define CEPH_MOUNT_OPT_NOPOOLPERM (1<<11) /* no pool permission check */
#define CEPH_MOUNT_OPT_MOUNTWAIT (1<<12) /* mount waits if no mds is up */
#define CEPH_MOUNT_OPT_DEFAULT CEPH_MOUNT_OPT_DCACHE

Просмотреть файл

@ -31,6 +31,10 @@ struct ceph_mdsmap {
int m_num_data_pg_pools;
u64 *m_data_pg_pools;
u64 m_cas_pg_pool;
bool m_enabled;
bool m_damaged;
int m_num_laggy;
};
static inline struct ceph_entity_addr *
@ -59,5 +63,6 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m);
#endif