The big ticket item here is support for rbd exclusive-lock feature,

with maintenance operations offloaded to userspace (Douglas Fuller,
 Mike Christie and myself).  Another block device bullet is a series
 fixing up layering error paths (myself).
 
 On the filesystem side, we've got patches that improve our handling of
 buffered vs dio write races (Neil Brown) and a few assorted fixes from
 Zheng.  Also included a couple of random cleanups and a minor CRUSH
 update.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iQEcBAABCAAGBQJX+PjZAAoJEEp/3jgCEfOLVuoH/RwtFLIb6/KZUYtBOrVVrTun
 kReRlfq2xKYrGGtyQEqSuz7fBdwT1LVCVcL8kC4GFD4R67o+tNMAr6PfM/7pZABj
 HRoRLgSZ9FLw4W5n0VpBIznih75QUbCdXiTCtH9eorMHU5q1YpTvVHHlF9W9Pm2I
 eNGnBWpGyHVeiK66mpUCH+EQKQ4GkAVD9rneTNqLHgq2yotHkVl1j258+DL6JRGs
 OBoh3RmNQaGOAS37Lss8erCSusAGEcAeGV6ubuK2lFUKyR41EkD3I0xkhNSPe+CD
 RifFcpVziIeTu//cLgl0nnHGtmUytD7HgJubaPthArKIOen9ZDAfEkgI0o+JI2A=
 =45O7
 -----END PGP SIGNATURE-----

Merge tag 'ceph-for-4.9-rc1' of git://github.com/ceph/ceph-client

Pull Ceph updates from Ilya Dryomov:
 "The big ticket item here is support for rbd exclusive-lock feature,
  with maintenance operations offloaded to userspace (Douglas Fuller,
  Mike Christie and myself). Another block device bullet is a series
  fixing up layering error paths (myself).

  On the filesystem side, we've got patches that improve our handling of
  buffered vs dio write races (Neil Brown) and a few assorted fixes from
  Zheng. Also included a couple of random cleanups and a minor CRUSH
  update"

* tag 'ceph-for-4.9-rc1' of git://github.com/ceph/ceph-client: (39 commits)
  crush: remove redundant local variable
  crush: don't normalize input of crush_ln iteratively
  libceph: ceph_build_auth() doesn't need ceph_auth_build_hello()
  libceph: use CEPH_AUTH_UNKNOWN in ceph_auth_build_hello()
  ceph: fix description for rsize and rasize mount options
  rbd: use kmalloc_array() in rbd_header_from_disk()
  ceph: use list_move instead of list_del/list_add
  ceph: handle CEPH_SESSION_REJECT message
  ceph: avoid accessing / when mounting a subpath
  ceph: fix mandatory flock check
  ceph: remove warning when ceph_releasepage() is called on dirty page
  ceph: ignore error from invalidate_inode_pages2_range() in direct write
  ceph: fix error handling of start_read()
  rbd: add rbd_obj_request_error() helper
  rbd: img_data requests don't own their page array
  rbd: don't call rbd_osd_req_format_read() for !img_data requests
  rbd: rework rbd_img_obj_exists_submit() error paths
  rbd: don't crash or leak on errors in rbd_img_obj_parent_read_full_callback()
  rbd: move bumping img_request refcount into rbd_obj_request_submit()
  rbd: mark the original request as done if stat request fails
  ...
This commit is contained in:
Linus Torvalds 2016-10-10 13:52:05 -07:00
Родитель fed41f7d03 64f77566e1
Коммит 8dfb790b15
26 изменённых файлов: 1971 добавлений и 344 удалений

Просмотреть файл

@ -6,7 +6,7 @@ Description:
Being used for adding and removing rbd block devices.
Usage: <mon ip addr> <options> <pool name> <rbd image name> [snap name]
Usage: <mon ip addr> <options> <pool name> <rbd image name> [<snap name>]
$ echo "192.168.0.1 name=admin rbd foo" > /sys/bus/rbd/add
@ -14,9 +14,13 @@ The snapshot name can be "-" or omitted to map the image read/write. A <dev-id>
will be assigned for any registered block device. If snapshot is used, it will
be mapped read-only.
Removal of a device:
Usage: <dev-id> [force]
$ echo <dev-id> > /sys/bus/rbd/remove
$ echo 2 > /sys/bus/rbd/remove
Optional "force" argument which when passed will wait for running requests and
then unmap the image. Requests sent to the driver after initiating the removal
will be failed. (August 2016, since 4.9.)
What: /sys/bus/rbd/add_single_major
Date: December 2013
@ -43,10 +47,25 @@ Description: Available only if rbd module is inserted with single_major
Entries under /sys/bus/rbd/devices/<dev-id>/
--------------------------------------------
client_addr
The ceph unique client entity_addr_t (address + nonce).
The format is <address>:<port>/<nonce>: '1.2.3.4:1234/5678' or
'[1:2:3:4:5:6:7:8]:1234/5678'. (August 2016, since 4.9.)
client_id
The ceph unique client id that was assigned for this specific session.
cluster_fsid
The ceph cluster UUID. (August 2016, since 4.9.)
config_info
The string written into /sys/bus/rbd/add{,_single_major}. (August
2016, since 4.9.)
features
A hexadecimal encoding of the feature bits for this image.
@ -92,6 +111,10 @@ current_snap
The current snapshot for which the device is mapped.
snap_id
The current snapshot's id. (August 2016, since 4.9.)
parent
Information identifying the chain of parent images in a layered rbd

Просмотреть файл

@ -98,6 +98,10 @@ Mount Options
size.
rsize=X
Specify the maximum read size in bytes. By default there is no
maximum.
rasize=X
Specify the maximum readahead.
mount_timeout=X

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -28,6 +28,17 @@
#define RBD_DATA_PREFIX "rbd_data."
#define RBD_ID_PREFIX "rbd_id."
#define RBD_LOCK_NAME "rbd_lock"
#define RBD_LOCK_TAG "internal"
#define RBD_LOCK_COOKIE_PREFIX "auto"
enum rbd_notify_op {
RBD_NOTIFY_OP_ACQUIRED_LOCK = 0,
RBD_NOTIFY_OP_RELEASED_LOCK = 1,
RBD_NOTIFY_OP_REQUEST_LOCK = 2,
RBD_NOTIFY_OP_HEADER_UPDATE = 3,
};
/*
* For format version 1, rbd image 'foo' consists of objects
* foo.rbd - image metadata

Просмотреть файл

@ -175,9 +175,8 @@ static void ceph_invalidatepage(struct page *page, unsigned int offset,
static int ceph_releasepage(struct page *page, gfp_t g)
{
dout("%p releasepage %p idx %lu\n", page->mapping->host,
page, page->index);
WARN_ON(PageDirty(page));
dout("%p releasepage %p idx %lu (%sdirty)\n", page->mapping->host,
page, page->index, PageDirty(page) ? "" : "not ");
/* Can we release the page from the cache? */
if (!ceph_release_fscache_page(page, g))
@ -298,14 +297,6 @@ unlock:
kfree(osd_data->pages);
}
static void ceph_unlock_page_vector(struct page **pages, int num_pages)
{
int i;
for (i = 0; i < num_pages; i++)
unlock_page(pages[i]);
}
/*
* start an async read(ahead) operation. return nr_pages we submitted
* a read for on success, or negative error code.
@ -370,6 +361,10 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
dout("start_read %p add_to_page_cache failed %p\n",
inode, page);
nr_pages = i;
if (nr_pages > 0) {
len = nr_pages << PAGE_SHIFT;
break;
}
goto out_pages;
}
pages[i] = page;
@ -386,8 +381,11 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
return nr_pages;
out_pages:
ceph_unlock_page_vector(pages, nr_pages);
ceph_release_page_vector(pages, nr_pages);
for (i = 0; i < nr_pages; ++i) {
ceph_fscache_readpage_cancel(inode, pages[i]);
unlock_page(pages[i]);
}
ceph_put_page_vector(pages, nr_pages, false);
out:
ceph_osdc_put_request(req);
return ret;

Просмотреть файл

@ -902,10 +902,10 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
return ret;
if (write) {
ret = invalidate_inode_pages2_range(inode->i_mapping,
int ret2 = invalidate_inode_pages2_range(inode->i_mapping,
pos >> PAGE_SHIFT,
(pos + count) >> PAGE_SHIFT);
if (ret < 0)
if (ret2 < 0)
dout("invalidate_inode_pages2_range returned %d\n", ret);
flags = CEPH_OSD_FLAG_ORDERSNAP |

Просмотреть файл

@ -210,8 +210,8 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
if (!(fl->fl_flags & FL_FLOCK))
return -ENOLCK;
/* No mandatory locks */
if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
return -ENOLCK;
if (fl->fl_type & LOCK_MAND)
return -EOPNOTSUPP;
dout("ceph_flock, fl_file: %p", fl->fl_file);

Просмотреть файл

@ -370,6 +370,7 @@ const char *ceph_session_state_name(int s)
case CEPH_MDS_SESSION_CLOSING: return "closing";
case CEPH_MDS_SESSION_RESTARTING: return "restarting";
case CEPH_MDS_SESSION_RECONNECTING: return "reconnecting";
case CEPH_MDS_SESSION_REJECTED: return "rejected";
default: return "???";
}
}
@ -1150,8 +1151,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
while (!list_empty(&ci->i_cap_flush_list)) {
cf = list_first_entry(&ci->i_cap_flush_list,
struct ceph_cap_flush, i_list);
list_del(&cf->i_list);
list_add(&cf->i_list, &to_remove);
list_move(&cf->i_list, &to_remove);
}
spin_lock(&mdsc->cap_dirty_lock);
@ -1378,7 +1378,7 @@ static int request_close_session(struct ceph_mds_client *mdsc,
if (!msg)
return -ENOMEM;
ceph_con_send(&session->s_con, msg);
return 0;
return 1;
}
/*
@ -2131,6 +2131,10 @@ static int __do_request(struct ceph_mds_client *mdsc,
ceph_session_state_name(session->s_state));
if (session->s_state != CEPH_MDS_SESSION_OPEN &&
session->s_state != CEPH_MDS_SESSION_HUNG) {
if (session->s_state == CEPH_MDS_SESSION_REJECTED) {
err = -EACCES;
goto out_session;
}
if (session->s_state == CEPH_MDS_SESSION_NEW ||
session->s_state == CEPH_MDS_SESSION_CLOSING)
__open_session(mdsc, session);
@ -2652,6 +2656,15 @@ static void handle_session(struct ceph_mds_session *session,
wake_up_session_caps(session, 0);
break;
case CEPH_SESSION_REJECT:
WARN_ON(session->s_state != CEPH_MDS_SESSION_OPENING);
pr_info("mds%d rejected session\n", session->s_mds);
session->s_state = CEPH_MDS_SESSION_REJECTED;
cleanup_session_requests(mdsc, session);
remove_session_caps(session);
wake = 2; /* for good measure */
break;
default:
pr_err("mdsc_handle_session bad op %d mds%d\n", op, mds);
WARN_ON(1);
@ -3557,11 +3570,11 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
/*
* true if all sessions are closed, or we force unmount
*/
static bool done_closing_sessions(struct ceph_mds_client *mdsc)
static bool done_closing_sessions(struct ceph_mds_client *mdsc, int skipped)
{
if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
return true;
return atomic_read(&mdsc->num_sessions) == 0;
return atomic_read(&mdsc->num_sessions) <= skipped;
}
/*
@ -3572,6 +3585,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
struct ceph_options *opts = mdsc->fsc->client->options;
struct ceph_mds_session *session;
int i;
int skipped = 0;
dout("close_sessions\n");
@ -3583,7 +3597,8 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
continue;
mutex_unlock(&mdsc->mutex);
mutex_lock(&session->s_mutex);
__close_session(mdsc, session);
if (__close_session(mdsc, session) <= 0)
skipped++;
mutex_unlock(&session->s_mutex);
ceph_put_mds_session(session);
mutex_lock(&mdsc->mutex);
@ -3591,7 +3606,8 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
mutex_unlock(&mdsc->mutex);
dout("waiting for sessions to close\n");
wait_event_timeout(mdsc->session_close_wq, done_closing_sessions(mdsc),
wait_event_timeout(mdsc->session_close_wq,
done_closing_sessions(mdsc, skipped),
ceph_timeout_jiffies(opts->mount_timeout));
/* tear down remaining sessions */

Просмотреть файл

@ -121,6 +121,7 @@ enum {
CEPH_MDS_SESSION_CLOSING = 5,
CEPH_MDS_SESSION_RESTARTING = 6,
CEPH_MDS_SESSION_RECONNECTING = 7,
CEPH_MDS_SESSION_REJECTED = 8,
};
struct ceph_mds_session {

Просмотреть файл

@ -43,6 +43,8 @@ const char *ceph_session_op_name(int op)
case CEPH_SESSION_RECALL_STATE: return "recall_state";
case CEPH_SESSION_FLUSHMSG: return "flushmsg";
case CEPH_SESSION_FLUSHMSG_ACK: return "flushmsg_ack";
case CEPH_SESSION_FORCE_RO: return "force_ro";
case CEPH_SESSION_REJECT: return "reject";
}
return "???";
}

Просмотреть файл

@ -396,10 +396,12 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
*/
dev_name_end = strchr(dev_name, '/');
if (dev_name_end) {
fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL);
if (!fsopt->server_path) {
err = -ENOMEM;
goto out;
if (strlen(dev_name_end) > 1) {
fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL);
if (!fsopt->server_path) {
err = -ENOMEM;
goto out;
}
}
} else {
dev_name_end = dev_name + strlen(dev_name);
@ -788,15 +790,10 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
struct inode *inode = req->r_target_inode;
req->r_target_inode = NULL;
dout("open_root_inode success\n");
if (ceph_ino(inode) == CEPH_INO_ROOT &&
fsc->sb->s_root == NULL) {
root = d_make_root(inode);
if (!root) {
root = ERR_PTR(-ENOMEM);
goto out;
}
} else {
root = d_obtain_root(inode);
root = d_make_root(inode);
if (!root) {
root = ERR_PTR(-ENOMEM);
goto out;
}
ceph_init_dentry(root);
dout("open_root_inode success, root dentry is %p\n", root);
@ -825,17 +822,24 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
mutex_lock(&fsc->client->mount_mutex);
if (!fsc->sb->s_root) {
const char *path;
err = __ceph_open_session(fsc->client, started);
if (err < 0)
goto out;
dout("mount opening root\n");
root = open_root_dentry(fsc, "", started);
if (!fsc->mount_options->server_path) {
path = "";
dout("mount opening path \\t\n");
} else {
path = fsc->mount_options->server_path + 1;
dout("mount opening path %s\n", path);
}
root = open_root_dentry(fsc, path, started);
if (IS_ERR(root)) {
err = PTR_ERR(root);
goto out;
}
fsc->sb->s_root = root;
fsc->sb->s_root = dget(root);
first = 1;
err = ceph_fs_debugfs_init(fsc);
@ -843,19 +847,6 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
goto fail;
}
if (!fsc->mount_options->server_path) {
root = fsc->sb->s_root;
dget(root);
} else {
const char *path = fsc->mount_options->server_path + 1;
dout("mount opening path %s\n", path);
root = open_root_dentry(fsc, path, started);
if (IS_ERR(root)) {
err = PTR_ERR(root);
goto fail;
}
}
fsc->mount_state = CEPH_MOUNT_MOUNTED;
dout("mount success\n");
mutex_unlock(&fsc->client->mount_mutex);

Просмотреть файл

@ -104,7 +104,7 @@ extern int ceph_auth_build_hello(struct ceph_auth_client *ac,
extern int ceph_handle_auth_reply(struct ceph_auth_client *ac,
void *buf, size_t len,
void *reply_buf, size_t reply_len);
extern int ceph_entity_name_encode(const char *name, void **p, void *end);
int ceph_auth_entity_name_encode(const char *name, void **p, void *end);
extern int ceph_build_auth(struct ceph_auth_client *ac,
void *msg_buf, size_t msg_len);

Просмотреть файл

@ -138,6 +138,9 @@ struct ceph_dir_layout {
#define CEPH_MSG_POOLOP_REPLY 48
#define CEPH_MSG_POOLOP 49
/* mon commands */
#define CEPH_MSG_MON_COMMAND 50
#define CEPH_MSG_MON_COMMAND_ACK 51
/* osd */
#define CEPH_MSG_OSD_MAP 41
@ -176,6 +179,14 @@ struct ceph_mon_statfs_reply {
struct ceph_statfs st;
} __attribute__ ((packed));
struct ceph_mon_command {
struct ceph_mon_request_header monhdr;
struct ceph_fsid fsid;
__le32 num_strs; /* always 1 */
__le32 str_len;
char str[];
} __attribute__ ((packed));
struct ceph_osd_getmap {
struct ceph_mon_request_header monhdr;
struct ceph_fsid fsid;
@ -270,6 +281,7 @@ enum {
CEPH_SESSION_FLUSHMSG,
CEPH_SESSION_FLUSHMSG_ACK,
CEPH_SESSION_FORCE_RO,
CEPH_SESSION_REJECT,
};
extern const char *ceph_session_op_name(int op);

Просмотреть файл

@ -0,0 +1,49 @@
#ifndef _LINUX_CEPH_CLS_LOCK_CLIENT_H
#define _LINUX_CEPH_CLS_LOCK_CLIENT_H
#include <linux/ceph/osd_client.h>
enum ceph_cls_lock_type {
CEPH_CLS_LOCK_NONE = 0,
CEPH_CLS_LOCK_EXCLUSIVE = 1,
CEPH_CLS_LOCK_SHARED = 2,
};
struct ceph_locker_id {
struct ceph_entity_name name; /* locker's client name */
char *cookie; /* locker's cookie */
};
struct ceph_locker_info {
struct ceph_entity_addr addr; /* locker's address */
};
struct ceph_locker {
struct ceph_locker_id id;
struct ceph_locker_info info;
};
int ceph_cls_lock(struct ceph_osd_client *osdc,
struct ceph_object_id *oid,
struct ceph_object_locator *oloc,
char *lock_name, u8 type, char *cookie,
char *tag, char *desc, u8 flags);
int ceph_cls_unlock(struct ceph_osd_client *osdc,
struct ceph_object_id *oid,
struct ceph_object_locator *oloc,
char *lock_name, char *cookie);
int ceph_cls_break_lock(struct ceph_osd_client *osdc,
struct ceph_object_id *oid,
struct ceph_object_locator *oloc,
char *lock_name, char *cookie,
struct ceph_entity_name *locker);
void ceph_free_lockers(struct ceph_locker *lockers, u32 num_lockers);
int ceph_cls_lock_info(struct ceph_osd_client *osdc,
struct ceph_object_id *oid,
struct ceph_object_locator *oloc,
char *lock_name, u8 *type, char **tag,
struct ceph_locker **lockers, u32 *num_lockers);
#endif

Просмотреть файл

@ -264,7 +264,8 @@ extern struct ceph_client *ceph_create_client(struct ceph_options *opt,
void *private,
u64 supported_features,
u64 required_features);
extern u64 ceph_client_id(struct ceph_client *client);
struct ceph_entity_addr *ceph_client_addr(struct ceph_client *client);
u64 ceph_client_gid(struct ceph_client *client);
extern void ceph_destroy_client(struct ceph_client *client);
extern int __ceph_open_session(struct ceph_client *client,
unsigned long started);

Просмотреть файл

@ -141,6 +141,9 @@ int ceph_monc_get_version(struct ceph_mon_client *monc, const char *what,
int ceph_monc_get_version_async(struct ceph_mon_client *monc, const char *what,
ceph_monc_callback_t cb, u64 private_data);
int ceph_monc_blacklist_add(struct ceph_mon_client *monc,
struct ceph_entity_addr *client_addr);
extern int ceph_monc_open_session(struct ceph_mon_client *monc);
extern int ceph_monc_validate_auth(struct ceph_mon_client *monc);

Просмотреть файл

@ -120,6 +120,9 @@ struct ceph_osd_req_op {
struct ceph_osd_data request_data;
struct ceph_osd_data response_data;
} notify;
struct {
struct ceph_osd_data response_data;
} list_watchers;
struct {
u64 expected_object_size;
u64 expected_write_size;
@ -249,6 +252,12 @@ struct ceph_osd_linger_request {
size_t *preply_len;
};
struct ceph_watch_item {
struct ceph_entity_name name;
u64 cookie;
struct ceph_entity_addr addr;
};
struct ceph_osd_client {
struct ceph_client *client;
@ -346,7 +355,6 @@ extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
struct page **pages, u64 length,
u32 alignment, bool pages_from_pool,
bool own_pages);
extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req,
unsigned int which, u16 opcode,
const char *class, const char *method);
@ -389,6 +397,14 @@ extern void ceph_osdc_sync(struct ceph_osd_client *osdc);
extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc);
void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc);
int ceph_osdc_call(struct ceph_osd_client *osdc,
struct ceph_object_id *oid,
struct ceph_object_locator *oloc,
const char *class, const char *method,
unsigned int flags,
struct page *req_page, size_t req_len,
struct page *resp_page, size_t *resp_len);
extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
struct ceph_vino vino,
struct ceph_file_layout *layout,
@ -434,5 +450,10 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
size_t *preply_len);
int ceph_osdc_watch_check(struct ceph_osd_client *osdc,
struct ceph_osd_linger_request *lreq);
int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
struct ceph_object_id *oid,
struct ceph_object_locator *oloc,
struct ceph_watch_item **watchers,
u32 *num_watchers);
#endif

Просмотреть файл

@ -5,6 +5,7 @@ obj-$(CONFIG_CEPH_LIB) += libceph.o
libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
mon_client.o \
cls_lock_client.o \
osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
debugfs.o \
auth.o auth_none.o \

Просмотреть файл

@ -82,7 +82,10 @@ void ceph_auth_reset(struct ceph_auth_client *ac)
mutex_unlock(&ac->mutex);
}
int ceph_entity_name_encode(const char *name, void **p, void *end)
/*
* EntityName, not to be confused with entity_name_t
*/
int ceph_auth_entity_name_encode(const char *name, void **p, void *end)
{
int len = strlen(name);
@ -111,7 +114,7 @@ int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len)
monhdr->session_mon = cpu_to_le16(-1);
monhdr->session_mon_tid = 0;
ceph_encode_32(&p, 0); /* no protocol, yet */
ceph_encode_32(&p, CEPH_AUTH_UNKNOWN); /* no protocol, yet */
lenp = p;
p += sizeof(u32);
@ -124,7 +127,7 @@ int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len)
for (i = 0; i < num; i++)
ceph_encode_32(&p, supported_protocols[i]);
ret = ceph_entity_name_encode(ac->name, &p, end);
ret = ceph_auth_entity_name_encode(ac->name, &p, end);
if (ret < 0)
goto out;
ceph_decode_need(&p, end, sizeof(u64), bad);
@ -259,9 +262,7 @@ int ceph_build_auth(struct ceph_auth_client *ac,
int ret = 0;
mutex_lock(&ac->mutex);
if (!ac->protocol)
ret = ceph_auth_build_hello(ac, msg_buf, msg_len);
else if (ac->ops->should_authenticate(ac))
if (ac->ops->should_authenticate(ac))
ret = ceph_build_auth_request(ac, msg_buf, msg_len);
mutex_unlock(&ac->mutex);
return ret;

Просмотреть файл

@ -46,7 +46,7 @@ static int ceph_auth_none_build_authorizer(struct ceph_auth_client *ac,
int ret;
ceph_encode_8_safe(&p, end, 1, e_range);
ret = ceph_entity_name_encode(ac->name, &p, end);
ret = ceph_auth_entity_name_encode(ac->name, &p, end);
if (ret < 0)
return ret;

Просмотреть файл

@ -566,11 +566,17 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
}
EXPORT_SYMBOL(ceph_print_client_options);
u64 ceph_client_id(struct ceph_client *client)
struct ceph_entity_addr *ceph_client_addr(struct ceph_client *client)
{
return &client->msgr.inst.addr;
}
EXPORT_SYMBOL(ceph_client_addr);
u64 ceph_client_gid(struct ceph_client *client)
{
return client->monc.auth->global_id;
}
EXPORT_SYMBOL(ceph_client_id);
EXPORT_SYMBOL(ceph_client_gid);
/*
* create a fresh client instance
@ -685,7 +691,8 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started)
return client->auth_err;
}
pr_info("client%llu fsid %pU\n", ceph_client_id(client), &client->fsid);
pr_info("client%llu fsid %pU\n", ceph_client_gid(client),
&client->fsid);
ceph_debugfs_client_init(client);
return 0;

Просмотреть файл

@ -15,6 +15,7 @@ const char *ceph_entity_type_name(int type)
default: return "unknown";
}
}
EXPORT_SYMBOL(ceph_entity_type_name);
const char *ceph_osd_op_name(int op)
{

325
net/ceph/cls_lock_client.c Normal file
Просмотреть файл

@ -0,0 +1,325 @@
#include <linux/ceph/ceph_debug.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/ceph/cls_lock_client.h>
#include <linux/ceph/decode.h>
/**
* ceph_cls_lock - grab rados lock for object
* @oid, @oloc: object to lock
* @lock_name: the name of the lock
* @type: lock type (CEPH_CLS_LOCK_EXCLUSIVE or CEPH_CLS_LOCK_SHARED)
* @cookie: user-defined identifier for this instance of the lock
* @tag: user-defined tag
* @desc: user-defined lock description
* @flags: lock flags
*
* All operations on the same lock should use the same tag.
*/
int ceph_cls_lock(struct ceph_osd_client *osdc,
struct ceph_object_id *oid,
struct ceph_object_locator *oloc,
char *lock_name, u8 type, char *cookie,
char *tag, char *desc, u8 flags)
{
int lock_op_buf_size;
int name_len = strlen(lock_name);
int cookie_len = strlen(cookie);
int tag_len = strlen(tag);
int desc_len = strlen(desc);
void *p, *end;
struct page *lock_op_page;
struct timespec mtime;
int ret;
lock_op_buf_size = name_len + sizeof(__le32) +
cookie_len + sizeof(__le32) +
tag_len + sizeof(__le32) +
desc_len + sizeof(__le32) +
sizeof(struct ceph_timespec) +
/* flag and type */
sizeof(u8) + sizeof(u8) +
CEPH_ENCODING_START_BLK_LEN;
if (lock_op_buf_size > PAGE_SIZE)
return -E2BIG;
lock_op_page = alloc_page(GFP_NOIO);
if (!lock_op_page)
return -ENOMEM;
p = page_address(lock_op_page);
end = p + lock_op_buf_size;
/* encode cls_lock_lock_op struct */
ceph_start_encoding(&p, 1, 1,
lock_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
ceph_encode_string(&p, end, lock_name, name_len);
ceph_encode_8(&p, type);
ceph_encode_string(&p, end, cookie, cookie_len);
ceph_encode_string(&p, end, tag, tag_len);
ceph_encode_string(&p, end, desc, desc_len);
/* only support infinite duration */
memset(&mtime, 0, sizeof(mtime));
ceph_encode_timespec(p, &mtime);
p += sizeof(struct ceph_timespec);
ceph_encode_8(&p, flags);
dout("%s lock_name %s type %d cookie %s tag %s desc %s flags 0x%x\n",
__func__, lock_name, type, cookie, tag, desc, flags);
ret = ceph_osdc_call(osdc, oid, oloc, "lock", "lock",
CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
lock_op_page, lock_op_buf_size, NULL, NULL);
dout("%s: status %d\n", __func__, ret);
__free_page(lock_op_page);
return ret;
}
EXPORT_SYMBOL(ceph_cls_lock);
/**
* ceph_cls_unlock - release rados lock for object
* @oid, @oloc: object to lock
* @lock_name: the name of the lock
* @cookie: user-defined identifier for this instance of the lock
*/
int ceph_cls_unlock(struct ceph_osd_client *osdc,
struct ceph_object_id *oid,
struct ceph_object_locator *oloc,
char *lock_name, char *cookie)
{
int unlock_op_buf_size;
int name_len = strlen(lock_name);
int cookie_len = strlen(cookie);
void *p, *end;
struct page *unlock_op_page;
int ret;
unlock_op_buf_size = name_len + sizeof(__le32) +
cookie_len + sizeof(__le32) +
CEPH_ENCODING_START_BLK_LEN;
if (unlock_op_buf_size > PAGE_SIZE)
return -E2BIG;
unlock_op_page = alloc_page(GFP_NOIO);
if (!unlock_op_page)
return -ENOMEM;
p = page_address(unlock_op_page);
end = p + unlock_op_buf_size;
/* encode cls_lock_unlock_op struct */
ceph_start_encoding(&p, 1, 1,
unlock_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
ceph_encode_string(&p, end, lock_name, name_len);
ceph_encode_string(&p, end, cookie, cookie_len);
dout("%s lock_name %s cookie %s\n", __func__, lock_name, cookie);
ret = ceph_osdc_call(osdc, oid, oloc, "lock", "unlock",
CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
unlock_op_page, unlock_op_buf_size, NULL, NULL);
dout("%s: status %d\n", __func__, ret);
__free_page(unlock_op_page);
return ret;
}
EXPORT_SYMBOL(ceph_cls_unlock);
/**
* ceph_cls_break_lock - release rados lock for object for specified client
* @oid, @oloc: object to lock
* @lock_name: the name of the lock
* @cookie: user-defined identifier for this instance of the lock
* @locker: current lock owner
*/
int ceph_cls_break_lock(struct ceph_osd_client *osdc,
struct ceph_object_id *oid,
struct ceph_object_locator *oloc,
char *lock_name, char *cookie,
struct ceph_entity_name *locker)
{
int break_op_buf_size;
int name_len = strlen(lock_name);
int cookie_len = strlen(cookie);
struct page *break_op_page;
void *p, *end;
int ret;
break_op_buf_size = name_len + sizeof(__le32) +
cookie_len + sizeof(__le32) +
sizeof(u8) + sizeof(__le64) +
CEPH_ENCODING_START_BLK_LEN;
if (break_op_buf_size > PAGE_SIZE)
return -E2BIG;
break_op_page = alloc_page(GFP_NOIO);
if (!break_op_page)
return -ENOMEM;
p = page_address(break_op_page);
end = p + break_op_buf_size;
/* encode cls_lock_break_op struct */
ceph_start_encoding(&p, 1, 1,
break_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
ceph_encode_string(&p, end, lock_name, name_len);
ceph_encode_copy(&p, locker, sizeof(*locker));
ceph_encode_string(&p, end, cookie, cookie_len);
dout("%s lock_name %s cookie %s locker %s%llu\n", __func__, lock_name,
cookie, ENTITY_NAME(*locker));
ret = ceph_osdc_call(osdc, oid, oloc, "lock", "break_lock",
CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
break_op_page, break_op_buf_size, NULL, NULL);
dout("%s: status %d\n", __func__, ret);
__free_page(break_op_page);
return ret;
}
EXPORT_SYMBOL(ceph_cls_break_lock);
void ceph_free_lockers(struct ceph_locker *lockers, u32 num_lockers)
{
int i;
for (i = 0; i < num_lockers; i++)
kfree(lockers[i].id.cookie);
kfree(lockers);
}
EXPORT_SYMBOL(ceph_free_lockers);
static int decode_locker(void **p, void *end, struct ceph_locker *locker)
{
u8 struct_v;
u32 len;
char *s;
int ret;
ret = ceph_start_decoding(p, end, 1, "locker_id_t", &struct_v, &len);
if (ret)
return ret;
ceph_decode_copy(p, &locker->id.name, sizeof(locker->id.name));
s = ceph_extract_encoded_string(p, end, NULL, GFP_NOIO);
if (IS_ERR(s))
return PTR_ERR(s);
locker->id.cookie = s;
ret = ceph_start_decoding(p, end, 1, "locker_info_t", &struct_v, &len);
if (ret)
return ret;
*p += sizeof(struct ceph_timespec); /* skip expiration */
ceph_decode_copy(p, &locker->info.addr, sizeof(locker->info.addr));
ceph_decode_addr(&locker->info.addr);
len = ceph_decode_32(p);
*p += len; /* skip description */
dout("%s %s%llu cookie %s addr %s\n", __func__,
ENTITY_NAME(locker->id.name), locker->id.cookie,
ceph_pr_addr(&locker->info.addr.in_addr));
return 0;
}
static int decode_lockers(void **p, void *end, u8 *type, char **tag,
struct ceph_locker **lockers, u32 *num_lockers)
{
u8 struct_v;
u32 struct_len;
char *s;
int i;
int ret;
ret = ceph_start_decoding(p, end, 1, "cls_lock_get_info_reply",
&struct_v, &struct_len);
if (ret)
return ret;
*num_lockers = ceph_decode_32(p);
*lockers = kcalloc(*num_lockers, sizeof(**lockers), GFP_NOIO);
if (!*lockers)
return -ENOMEM;
for (i = 0; i < *num_lockers; i++) {
ret = decode_locker(p, end, *lockers + i);
if (ret)
goto err_free_lockers;
}
*type = ceph_decode_8(p);
s = ceph_extract_encoded_string(p, end, NULL, GFP_NOIO);
if (IS_ERR(s)) {
ret = PTR_ERR(s);
goto err_free_lockers;
}
*tag = s;
return 0;
err_free_lockers:
ceph_free_lockers(*lockers, *num_lockers);
return ret;
}
/*
* On success, the caller is responsible for:
*
* kfree(tag);
* ceph_free_lockers(lockers, num_lockers);
*/
int ceph_cls_lock_info(struct ceph_osd_client *osdc,
struct ceph_object_id *oid,
struct ceph_object_locator *oloc,
char *lock_name, u8 *type, char **tag,
struct ceph_locker **lockers, u32 *num_lockers)
{
int get_info_op_buf_size;
int name_len = strlen(lock_name);
struct page *get_info_op_page, *reply_page;
size_t reply_len;
void *p, *end;
int ret;
get_info_op_buf_size = name_len + sizeof(__le32) +
CEPH_ENCODING_START_BLK_LEN;
if (get_info_op_buf_size > PAGE_SIZE)
return -E2BIG;
get_info_op_page = alloc_page(GFP_NOIO);
if (!get_info_op_page)
return -ENOMEM;
reply_page = alloc_page(GFP_NOIO);
if (!reply_page) {
__free_page(get_info_op_page);
return -ENOMEM;
}
p = page_address(get_info_op_page);
end = p + get_info_op_buf_size;
/* encode cls_lock_get_info_op struct */
ceph_start_encoding(&p, 1, 1,
get_info_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
ceph_encode_string(&p, end, lock_name, name_len);
dout("%s lock_name %s\n", __func__, lock_name);
ret = ceph_osdc_call(osdc, oid, oloc, "lock", "get_info",
CEPH_OSD_FLAG_READ, get_info_op_page,
get_info_op_buf_size, reply_page, &reply_len);
dout("%s: status %d\n", __func__, ret);
if (ret >= 0) {
p = page_address(reply_page);
end = p + reply_len;
ret = decode_lockers(&p, end, type, tag, lockers, num_lockers);
}
__free_page(get_info_op_page);
__free_page(reply_page);
return ret;
}
EXPORT_SYMBOL(ceph_cls_lock_info);

Просмотреть файл

@ -245,7 +245,7 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
/* compute 2^44*log2(input+1) */
static __u64 crush_ln(unsigned int xin)
{
unsigned int x = xin, x1;
unsigned int x = xin;
int iexpon, index1, index2;
__u64 RH, LH, LL, xl64, result;
@ -253,9 +253,15 @@ static __u64 crush_ln(unsigned int xin)
/* normalize input */
iexpon = 15;
while (!(x & 0x18000)) {
x <<= 1;
iexpon--;
/*
* figure out number of bits we need to shift and
* do it in one step instead of iteratively
*/
if (!(x & 0x18000)) {
int bits = __builtin_clz(x & 0x1FFFF) - 16;
x <<= bits;
iexpon = 15 - bits;
}
index1 = (x >> 8) << 1;
@ -267,12 +273,11 @@ static __u64 crush_ln(unsigned int xin)
/* RH*x ~ 2^48 * (2^15 + xf), xf<2^8 */
xl64 = (__s64)x * RH;
xl64 >>= 48;
x1 = xl64;
result = iexpon;
result <<= (12 + 32);
index2 = x1 & 0xff;
index2 = xl64 & 0xff;
/* LL ~ 2^48*log2(1.0+index2/2^15) */
LL = __LL_tbl[index2];

Просмотреть файл

@ -835,6 +835,83 @@ int ceph_monc_get_version_async(struct ceph_mon_client *monc, const char *what,
}
EXPORT_SYMBOL(ceph_monc_get_version_async);
static void handle_command_ack(struct ceph_mon_client *monc,
struct ceph_msg *msg)
{
struct ceph_mon_generic_request *req;
void *p = msg->front.iov_base;
void *const end = p + msg->front_alloc_len;
u64 tid = le64_to_cpu(msg->hdr.tid);
dout("%s msg %p tid %llu\n", __func__, msg, tid);
ceph_decode_need(&p, end, sizeof(struct ceph_mon_request_header) +
sizeof(u32), bad);
p += sizeof(struct ceph_mon_request_header);
mutex_lock(&monc->mutex);
req = lookup_generic_request(&monc->generic_request_tree, tid);
if (!req) {
mutex_unlock(&monc->mutex);
return;
}
req->result = ceph_decode_32(&p);
__finish_generic_request(req);
mutex_unlock(&monc->mutex);
complete_generic_request(req);
return;
bad:
pr_err("corrupt mon_command ack, tid %llu\n", tid);
ceph_msg_dump(msg);
}
int ceph_monc_blacklist_add(struct ceph_mon_client *monc,
struct ceph_entity_addr *client_addr)
{
struct ceph_mon_generic_request *req;
struct ceph_mon_command *h;
int ret = -ENOMEM;
int len;
req = alloc_generic_request(monc, GFP_NOIO);
if (!req)
goto out;
req->request = ceph_msg_new(CEPH_MSG_MON_COMMAND, 256, GFP_NOIO, true);
if (!req->request)
goto out;
req->reply = ceph_msg_new(CEPH_MSG_MON_COMMAND_ACK, 512, GFP_NOIO,
true);
if (!req->reply)
goto out;
mutex_lock(&monc->mutex);
register_generic_request(req);
h = req->request->front.iov_base;
h->monhdr.have_version = 0;
h->monhdr.session_mon = cpu_to_le16(-1);
h->monhdr.session_mon_tid = 0;
h->fsid = monc->monmap->fsid;
h->num_strs = cpu_to_le32(1);
len = sprintf(h->str, "{ \"prefix\": \"osd blacklist\", \
\"blacklistop\": \"add\", \
\"addr\": \"%pISpc/%u\" }",
&client_addr->in_addr, le32_to_cpu(client_addr->nonce));
h->str_len = cpu_to_le32(len);
send_generic_request(monc, req);
mutex_unlock(&monc->mutex);
ret = wait_generic_request(req);
out:
put_generic_request(req);
return ret;
}
EXPORT_SYMBOL(ceph_monc_blacklist_add);
/*
* Resend pending generic requests.
*/
@ -1139,6 +1216,10 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
handle_get_version_reply(monc, msg);
break;
case CEPH_MSG_MON_COMMAND_ACK:
handle_command_ack(monc, msg);
break;
case CEPH_MSG_MON_MAP:
ceph_monc_handle_map(monc, msg);
break;
@ -1178,6 +1259,7 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
m = ceph_msg_get(monc->m_subscribe_ack);
break;
case CEPH_MSG_STATFS_REPLY:
case CEPH_MSG_MON_COMMAND_ACK:
return get_generic_reply(con, hdr, skip);
case CEPH_MSG_AUTH_REPLY:
m = ceph_msg_get(monc->m_auth_reply);

Просмотреть файл

@ -338,6 +338,9 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
ceph_osd_data_release(&op->notify.request_data);
ceph_osd_data_release(&op->notify.response_data);
break;
case CEPH_OSD_OP_LIST_WATCHERS:
ceph_osd_data_release(&op->list_watchers.response_data);
break;
default:
break;
}
@ -863,6 +866,8 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst,
case CEPH_OSD_OP_NOTIFY:
dst->notify.cookie = cpu_to_le64(src->notify.cookie);
break;
case CEPH_OSD_OP_LIST_WATCHERS:
break;
case CEPH_OSD_OP_SETALLOCHINT:
dst->alloc_hint.expected_object_size =
cpu_to_le64(src->alloc_hint.expected_object_size);
@ -1445,6 +1450,10 @@ static void setup_request_data(struct ceph_osd_request *req,
ceph_osdc_msg_data_add(req->r_reply,
&op->extent.osd_data);
break;
case CEPH_OSD_OP_LIST_WATCHERS:
ceph_osdc_msg_data_add(req->r_reply,
&op->list_watchers.response_data);
break;
/* both */
case CEPH_OSD_OP_CALL:
@ -3891,12 +3900,121 @@ int ceph_osdc_watch_check(struct ceph_osd_client *osdc,
return ret;
}
static int decode_watcher(void **p, void *end, struct ceph_watch_item *item)
{
u8 struct_v;
u32 struct_len;
int ret;
ret = ceph_start_decoding(p, end, 2, "watch_item_t",
&struct_v, &struct_len);
if (ret)
return ret;
ceph_decode_copy(p, &item->name, sizeof(item->name));
item->cookie = ceph_decode_64(p);
*p += 4; /* skip timeout_seconds */
if (struct_v >= 2) {
ceph_decode_copy(p, &item->addr, sizeof(item->addr));
ceph_decode_addr(&item->addr);
}
dout("%s %s%llu cookie %llu addr %s\n", __func__,
ENTITY_NAME(item->name), item->cookie,
ceph_pr_addr(&item->addr.in_addr));
return 0;
}
static int decode_watchers(void **p, void *end,
struct ceph_watch_item **watchers,
u32 *num_watchers)
{
u8 struct_v;
u32 struct_len;
int i;
int ret;
ret = ceph_start_decoding(p, end, 1, "obj_list_watch_response_t",
&struct_v, &struct_len);
if (ret)
return ret;
*num_watchers = ceph_decode_32(p);
*watchers = kcalloc(*num_watchers, sizeof(**watchers), GFP_NOIO);
if (!*watchers)
return -ENOMEM;
for (i = 0; i < *num_watchers; i++) {
ret = decode_watcher(p, end, *watchers + i);
if (ret) {
kfree(*watchers);
return ret;
}
}
return 0;
}
/*
* On success, the caller is responsible for:
*
* kfree(watchers);
*/
int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
struct ceph_object_id *oid,
struct ceph_object_locator *oloc,
struct ceph_watch_item **watchers,
u32 *num_watchers)
{
struct ceph_osd_request *req;
struct page **pages;
int ret;
req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO);
if (!req)
return -ENOMEM;
ceph_oid_copy(&req->r_base_oid, oid);
ceph_oloc_copy(&req->r_base_oloc, oloc);
req->r_flags = CEPH_OSD_FLAG_READ;
ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
if (ret)
goto out_put_req;
pages = ceph_alloc_page_vector(1, GFP_NOIO);
if (IS_ERR(pages)) {
ret = PTR_ERR(pages);
goto out_put_req;
}
osd_req_op_init(req, 0, CEPH_OSD_OP_LIST_WATCHERS, 0);
ceph_osd_data_pages_init(osd_req_op_data(req, 0, list_watchers,
response_data),
pages, PAGE_SIZE, 0, false, true);
ceph_osdc_start_request(osdc, req, false);
ret = ceph_osdc_wait_request(osdc, req);
if (ret >= 0) {
void *p = page_address(pages[0]);
void *const end = p + req->r_ops[0].outdata_len;
ret = decode_watchers(&p, end, watchers, num_watchers);
}
out_put_req:
ceph_osdc_put_request(req);
return ret;
}
EXPORT_SYMBOL(ceph_osdc_list_watchers);
/*
* Call all pending notify callbacks - for use after a watch is
* unregistered, to make sure no more callbacks for it will be invoked
*/
void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc)
{
dout("%s osdc %p\n", __func__, osdc);
flush_workqueue(osdc->notify_wq);
}
EXPORT_SYMBOL(ceph_osdc_flush_notifies);
@ -3909,6 +4027,57 @@ void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc)
}
EXPORT_SYMBOL(ceph_osdc_maybe_request_map);
/*
* Execute an OSD class method on an object.
*
* @flags: CEPH_OSD_FLAG_*
* @resp_len: out param for reply length
*/
int ceph_osdc_call(struct ceph_osd_client *osdc,
struct ceph_object_id *oid,
struct ceph_object_locator *oloc,
const char *class, const char *method,
unsigned int flags,
struct page *req_page, size_t req_len,
struct page *resp_page, size_t *resp_len)
{
struct ceph_osd_request *req;
int ret;
req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO);
if (!req)
return -ENOMEM;
ceph_oid_copy(&req->r_base_oid, oid);
ceph_oloc_copy(&req->r_base_oloc, oloc);
req->r_flags = flags;
ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
if (ret)
goto out_put_req;
osd_req_op_cls_init(req, 0, CEPH_OSD_OP_CALL, class, method);
if (req_page)
osd_req_op_cls_request_data_pages(req, 0, &req_page, req_len,
0, false, false);
if (resp_page)
osd_req_op_cls_response_data_pages(req, 0, &resp_page,
PAGE_SIZE, 0, false, false);
ceph_osdc_start_request(osdc, req, false);
ret = ceph_osdc_wait_request(osdc, req);
if (ret >= 0) {
ret = req->r_ops[0].rval;
if (resp_page)
*resp_len = req->r_ops[0].outdata_len;
}
out_put_req:
ceph_osdc_put_request(req);
return ret;
}
EXPORT_SYMBOL(ceph_osdc_call);
/*
* init, shutdown
*/