drbd: New packet for Ahead/Behind mode: P_OUT_OF_SYNC

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
This commit is contained in:
Philipp Reisner 2010-10-27 14:33:00 +02:00
Родитель 67531718d8
Коммит 73a01a18b9
8 изменённых файлов: 91 добавлений и 19 удалений

Просмотреть файл

@ -1007,22 +1007,22 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size,
* called by tl_clear and drbd_send_dblock (==drbd_make_request).
* so this can be _any_ process.
*/
void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
const char *file, const unsigned int line)
{
unsigned long sbnr, ebnr, lbnr, flags;
sector_t esector, nr_sectors;
unsigned int enr, count;
unsigned int enr, count = 0;
struct lc_element *e;
if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
dev_err(DEV, "sector: %llus, size: %d\n",
(unsigned long long)sector, size);
return;
return 0;
}
if (!get_ldev(mdev))
return; /* no disk, no metadata, no bitmap to set bits in */
return 0; /* no disk, no metadata, no bitmap to set bits in */
nr_sectors = drbd_get_capacity(mdev->this_bdev);
esector = sector + (size >> 9) - 1;
@ -1052,6 +1052,8 @@ void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
out:
put_ldev(mdev);
return count;
}
static

Просмотреть файл

@ -212,6 +212,7 @@ enum drbd_packets {
/* P_CKPT_FENCE_REQ = 0x25, * currently reserved for protocol D */
/* P_CKPT_DISABLE_REQ = 0x26, * currently reserved for protocol D */
P_DELAY_PROBE = 0x27, /* is used on BOTH sockets */
P_OUT_OF_SYNC = 0x28, /* Mark as out of sync (Outrunning), data socket */
P_MAX_CMD = 0x28,
P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
@ -269,6 +270,7 @@ static inline const char *cmdname(enum drbd_packets cmd)
[P_RS_IS_IN_SYNC] = "CsumRSIsInSync",
[P_COMPRESSED_BITMAP] = "CBitmap",
[P_DELAY_PROBE] = "DelayProbe",
[P_OUT_OF_SYNC] = "OutOfSync",
[P_MAX_CMD] = NULL,
};
@ -550,6 +552,13 @@ struct p_discard {
u32 pad;
} __packed;
struct p_block_desc {
struct p_header80 head;
u64 sector;
u32 blksize;
u32 pad; /* to multiple of 8 Byte */
} __packed;
/* Valid values for the encoding field.
* Bump proto version when changing this. */
enum drbd_bitmap_code {
@ -647,6 +656,7 @@ union p_polymorph {
struct p_block_req block_req;
struct p_delay_probe93 delay_probe93;
struct p_rs_uuid rs_uuid;
struct p_block_desc block_desc;
} __packed;
/**********************************************************************/
@ -1221,6 +1231,7 @@ extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
struct p_data *dp, int data_size);
extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
sector_t sector, int blksize, u64 block_id);
extern int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req);
extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
struct drbd_epoch_entry *e);
extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req);
@ -1534,6 +1545,7 @@ extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int);
extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int);
extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int);
extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int);
extern int w_send_oos(struct drbd_conf *, struct drbd_work *, int);
extern void resync_timer_fn(unsigned long data);
@ -1626,7 +1638,7 @@ extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector,
int size, const char *file, const unsigned int line);
#define drbd_set_in_sync(mdev, sector, size) \
__drbd_set_in_sync(mdev, sector, size, __FILE__, __LINE__)
extern void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
int size, const char *file, const unsigned int line);
#define drbd_set_out_of_sync(mdev, sector, size) \
__drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)

Просмотреть файл

@ -2634,6 +2634,16 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
return ok;
}
int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req)
{
struct p_block_desc p;
p.sector = cpu_to_be64(req->sector);
p.blksize = cpu_to_be32(req->size);
return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OUT_OF_SYNC, &p.head, sizeof(p));
}
/*
drbd_send distinguishes two cases:

Просмотреть файл

@ -3562,6 +3562,15 @@ static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, u
return TRUE;
}
static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
{
struct p_block_desc *p = &mdev->data.rbuf.block_desc;
drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
return TRUE;
}
typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packets cmd, unsigned int to_receive);
struct data_cmd {
@ -3592,6 +3601,7 @@ static struct data_cmd drbd_cmd_handler[] = {
[P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
[P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
[P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
[P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
/* anything missing from this table is in
* the asender_tbl, see get_asender_cmd */
[P_MAX_CMD] = { 0, 0, NULL },

Просмотреть файл

@ -142,7 +142,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev,
/* before we can signal completion to the upper layers,
* we may need to close the current epoch */
if (mdev->state.conn >= C_CONNECTED &&
if (mdev->state.conn >= C_CONNECTED && mdev->state.conn < C_AHEAD &&
req->epoch == mdev->newest_tle->br_number)
queue_barrier(mdev);
@ -545,6 +545,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
break;
case queue_for_send_oos:
req->rq_state |= RQ_NET_QUEUED;
req->w.cb = w_send_oos;
drbd_queue_work(&mdev->data.work, &req->w);
break;
case oos_handed_to_network:
/* actually the same */
case send_canceled:
/* treat it the same */
case send_failed:
@ -756,7 +764,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
const sector_t sector = bio->bi_sector;
struct drbd_tl_epoch *b = NULL;
struct drbd_request *req;
int local, remote;
int local, remote, send_oos = 0;
int err = -EIO;
int ret = 0;
@ -820,8 +828,11 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
}
remote = remote && (mdev->state.pdsk == D_UP_TO_DATE ||
(mdev->state.pdsk == D_INCONSISTENT &&
mdev->state.conn >= C_CONNECTED));
(mdev->state.pdsk >= D_INCONSISTENT &&
mdev->state.conn >= C_CONNECTED &&
mdev->state.conn < C_AHEAD));
send_oos = (rw == WRITE && mdev->state.conn == C_AHEAD &&
mdev->state.pdsk >= D_INCONSISTENT);
if (!(local || remote) && !is_susp(mdev->state)) {
if (__ratelimit(&drbd_ratelimit_state))
@ -835,7 +846,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
* but there is a race between testing the bit and pointer outside the
* spinlock, and grabbing the spinlock.
* if we lost that race, we retry. */
if (rw == WRITE && remote &&
if (rw == WRITE && (remote || send_oos) &&
mdev->unused_spare_tle == NULL &&
test_bit(CREATE_BARRIER, &mdev->flags)) {
allocate_barrier:
@ -860,11 +871,15 @@ allocate_barrier:
goto fail_free_complete;
}
if (remote) {
if (remote || send_oos) {
remote = (mdev->state.pdsk == D_UP_TO_DATE ||
(mdev->state.pdsk == D_INCONSISTENT &&
mdev->state.conn >= C_CONNECTED));
if (!remote)
(mdev->state.pdsk >= D_INCONSISTENT &&
mdev->state.conn >= C_CONNECTED &&
mdev->state.conn < C_AHEAD));
send_oos = (rw == WRITE && mdev->state.conn == C_AHEAD &&
mdev->state.pdsk >= D_INCONSISTENT);
if (!(remote || send_oos))
dev_warn(DEV, "lost connection while grabbing the req_lock!\n");
if (!(local || remote)) {
dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
@ -877,7 +892,7 @@ allocate_barrier:
mdev->unused_spare_tle = b;
b = NULL;
}
if (rw == WRITE && remote &&
if (rw == WRITE && (remote || send_oos) &&
mdev->unused_spare_tle == NULL &&
test_bit(CREATE_BARRIER, &mdev->flags)) {
/* someone closed the current epoch
@ -900,7 +915,7 @@ allocate_barrier:
* barrier packet. To get the write ordering right, we only have to
* make sure that, if this is a write request and it triggered a
* barrier packet, this request is queued within the same spinlock. */
if (remote && mdev->unused_spare_tle &&
if ((remote || send_oos) && mdev->unused_spare_tle &&
test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
_tl_add_barrier(mdev, mdev->unused_spare_tle);
mdev->unused_spare_tle = NULL;
@ -948,8 +963,11 @@ allocate_barrier:
? queue_for_net_write
: queue_for_net_read);
}
if (send_oos && drbd_set_out_of_sync(mdev, sector, size))
_req_mod(req, queue_for_send_oos);
if (remote && mdev->net_conf->on_congestion != OC_BLOCK) {
if (remote &&
mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) {
int congested = 0;
if (mdev->net_conf->cong_fill &&
@ -964,6 +982,8 @@ allocate_barrier:
}
if (congested) {
queue_barrier(mdev);
if (mdev->net_conf->on_congestion == OC_PULL_AHEAD)
_drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL);
else /*mdev->net_conf->on_congestion == OC_DISCONNECT */

Просмотреть файл

@ -82,14 +82,16 @@ enum drbd_req_event {
to_be_submitted,
/* XXX yes, now I am inconsistent...
* these two are not "events" but "actions"
* these are not "events" but "actions"
* oh, well... */
queue_for_net_write,
queue_for_net_read,
queue_for_send_oos,
send_canceled,
send_failed,
handed_over_to_network,
oos_handed_to_network,
connection_lost_while_pending,
read_retry_remote_canceled,
recv_acked_by_peer,

Просмотреть файл

@ -1237,6 +1237,22 @@ int w_send_write_hint(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE);
}
int w_send_oos(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
{
struct drbd_request *req = container_of(w, struct drbd_request, w);
int ok;
if (unlikely(cancel)) {
req_mod(req, send_canceled);
return 1;
}
ok = drbd_send_oos(mdev, req);
req_mod(req, oos_handed_to_network);
return ok;
}
/**
* w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
* @mdev: DRBD device.

Просмотреть файл

@ -56,7 +56,7 @@ extern const char *drbd_buildtag(void);
#define REL_VERSION "8.3.9"
#define API_VERSION 88
#define PRO_VERSION_MIN 86
#define PRO_VERSION_MAX 95
#define PRO_VERSION_MAX 96
enum drbd_io_error_p {