libceph: force resend of osd requests if we skip an osdmap

If we skip over one or more map epochs, we need to resend all osd requests
because it is possible they remapped to other servers and then back.

Signed-off-by: Sage Weil <sage@newdream.net>
This commit is contained in:
Sage Weil 2011-10-14 13:33:55 -07:00
Родитель ee3b56f265
Коммит 38d6453ca3
1 изменённых файлов: 16 добавлений и 10 удалений

Просмотреть файл

@ -943,7 +943,7 @@ EXPORT_SYMBOL(ceph_osdc_set_request_linger);
* Caller should hold map_sem for read and request_mutex. * Caller should hold map_sem for read and request_mutex.
*/ */
static int __map_request(struct ceph_osd_client *osdc, static int __map_request(struct ceph_osd_client *osdc,
struct ceph_osd_request *req) struct ceph_osd_request *req, int force_resend)
{ {
struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
struct ceph_pg pgid; struct ceph_pg pgid;
@ -967,7 +967,8 @@ static int __map_request(struct ceph_osd_client *osdc,
num = err; num = err;
} }
if ((req->r_osd && req->r_osd->o_osd == o && if ((!force_resend &&
req->r_osd && req->r_osd->o_osd == o &&
req->r_sent >= req->r_osd->o_incarnation && req->r_sent >= req->r_osd->o_incarnation &&
req->r_num_pg_osds == num && req->r_num_pg_osds == num &&
memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) || memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) ||
@ -1289,18 +1290,18 @@ static void reset_changed_osds(struct ceph_osd_client *osdc)
* *
* Caller should hold map_sem for read and request_mutex. * Caller should hold map_sem for read and request_mutex.
*/ */
static void kick_requests(struct ceph_osd_client *osdc) static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
{ {
struct ceph_osd_request *req, *nreq; struct ceph_osd_request *req, *nreq;
struct rb_node *p; struct rb_node *p;
int needmap = 0; int needmap = 0;
int err; int err;
dout("kick_requests\n"); dout("kick_requests %s\n", force_resend ? " (force resend)" : "");
mutex_lock(&osdc->request_mutex); mutex_lock(&osdc->request_mutex);
for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
req = rb_entry(p, struct ceph_osd_request, r_node); req = rb_entry(p, struct ceph_osd_request, r_node);
err = __map_request(osdc, req); err = __map_request(osdc, req, force_resend);
if (err < 0) if (err < 0)
continue; /* error */ continue; /* error */
if (req->r_osd == NULL) { if (req->r_osd == NULL) {
@ -1318,7 +1319,7 @@ static void kick_requests(struct ceph_osd_client *osdc)
r_linger_item) { r_linger_item) {
dout("linger req=%p req->r_osd=%p\n", req, req->r_osd); dout("linger req=%p req->r_osd=%p\n", req, req->r_osd);
err = __map_request(osdc, req); err = __map_request(osdc, req, force_resend);
if (err == 0) if (err == 0)
continue; /* no change and no osd was specified */ continue; /* no change and no osd was specified */
if (err < 0) if (err < 0)
@ -1395,7 +1396,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
ceph_osdmap_destroy(osdc->osdmap); ceph_osdmap_destroy(osdc->osdmap);
osdc->osdmap = newmap; osdc->osdmap = newmap;
} }
kick_requests(osdc); kick_requests(osdc, 0);
reset_changed_osds(osdc); reset_changed_osds(osdc);
} else { } else {
dout("ignoring incremental map %u len %d\n", dout("ignoring incremental map %u len %d\n",
@ -1423,6 +1424,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
"older than our %u\n", epoch, maplen, "older than our %u\n", epoch, maplen,
osdc->osdmap->epoch); osdc->osdmap->epoch);
} else { } else {
int skipped_map = 0;
dout("taking full map %u len %d\n", epoch, maplen); dout("taking full map %u len %d\n", epoch, maplen);
newmap = osdmap_decode(&p, p+maplen); newmap = osdmap_decode(&p, p+maplen);
if (IS_ERR(newmap)) { if (IS_ERR(newmap)) {
@ -1432,9 +1435,12 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
BUG_ON(!newmap); BUG_ON(!newmap);
oldmap = osdc->osdmap; oldmap = osdc->osdmap;
osdc->osdmap = newmap; osdc->osdmap = newmap;
if (oldmap) if (oldmap) {
if (oldmap->epoch + 1 < newmap->epoch)
skipped_map = 1;
ceph_osdmap_destroy(oldmap); ceph_osdmap_destroy(oldmap);
kick_requests(osdc); }
kick_requests(osdc, skipped_map);
} }
p += maplen; p += maplen;
nr_maps--; nr_maps--;
@ -1707,7 +1713,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
* the request still han't been touched yet. * the request still han't been touched yet.
*/ */
if (req->r_sent == 0) { if (req->r_sent == 0) {
rc = __map_request(osdc, req); rc = __map_request(osdc, req, 0);
if (rc < 0) { if (rc < 0) {
if (nofail) { if (nofail) {
dout("osdc_start_request failed map, " dout("osdc_start_request failed map, "