From d5675bd204efd87a174eeea592de23c4c4e7f908 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 24 Jun 2010 16:59:59 +0300 Subject: [PATCH 1/2] vhost: break out of polling loop on error When ring parsing fails, we currently handle this as ring empty condition. This means that we enable kicks and recheck ring empty: if this not empty, we re-start polling which of course will fail again. Instead, let's return a negative error code and stop polling. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 12 ++++++++++-- drivers/vhost/vhost.c | 33 +++++++++++++++++---------------- drivers/vhost/vhost.h | 8 ++++---- 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 0f41c9195e9b..54096eef4840 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -98,7 +98,8 @@ static void tx_poll_start(struct vhost_net *net, struct socket *sock) static void handle_tx(struct vhost_net *net) { struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX]; - unsigned head, out, in, s; + unsigned out, in, s; + int head; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, @@ -135,6 +136,9 @@ static void handle_tx(struct vhost_net *net) ARRAY_SIZE(vq->iov), &out, &in, NULL, NULL); + /* On error, stop handling until the next kick. */ + if (head < 0) + break; /* Nothing new? Wait for eventfd to tell us they refilled. */ if (head == vq->num) { wmem = atomic_read(&sock->sk->sk_wmem_alloc); @@ -192,7 +196,8 @@ static void handle_tx(struct vhost_net *net) static void handle_rx(struct vhost_net *net) { struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; - unsigned head, out, in, log, s; + unsigned out, in, log, s; + int head; struct vhost_log *vq_log; struct msghdr msg = { .msg_name = NULL, @@ -228,6 +233,9 @@ static void handle_rx(struct vhost_net *net) ARRAY_SIZE(vq->iov), &out, &in, vq_log, &log); + /* On error, stop handling until the next kick. */ + if (head < 0) + break; /* OK, now we need to know about added descriptors. */ if (head == vq->num) { if (unlikely(vhost_enable_notify(vq))) { diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 3b83382e06eb..5ccd384ec0be 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -873,12 +873,13 @@ static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, * number of output then some number of input descriptors, it's actually two * iovecs, but we pack them into one and note how many of each there were. * - * This function returns the descriptor number found, or vq->num (which - * is never a valid descriptor number) if none was found. */ -unsigned vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, - struct iovec iov[], unsigned int iov_size, - unsigned int *out_num, unsigned int *in_num, - struct vhost_log *log, unsigned int *log_num) + * This function returns the descriptor number found, or vq->num (which is + * never a valid descriptor number) if none was found. A negative code is + * returned on error. */ +int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num, + struct vhost_log *log, unsigned int *log_num) { struct vring_desc desc; unsigned int i, head, found = 0; @@ -890,13 +891,13 @@ unsigned vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, if (get_user(vq->avail_idx, &vq->avail->idx)) { vq_err(vq, "Failed to access avail idx at %p\n", &vq->avail->idx); - return vq->num; + return -EFAULT; } if ((u16)(vq->avail_idx - last_avail_idx) > vq->num) { vq_err(vq, "Guest moved used index from %u to %u", last_avail_idx, vq->avail_idx); - return vq->num; + return -EFAULT; } /* If there's nothing new since last we looked, return invalid. */ @@ -912,14 +913,14 @@ unsigned vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, vq_err(vq, "Failed to read head: idx %d address %p\n", last_avail_idx, &vq->avail->ring[last_avail_idx % vq->num]); - return vq->num; + return -EFAULT; } /* If their number is silly, that's an error. */ if (head >= vq->num) { vq_err(vq, "Guest says index %u > %u is available", head, vq->num); - return vq->num; + return -EINVAL; } /* When we start there are none of either input nor output. */ @@ -933,19 +934,19 @@ unsigned vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, if (i >= vq->num) { vq_err(vq, "Desc index is %u > %u, head = %u", i, vq->num, head); - return vq->num; + return -EINVAL; } if (++found > vq->num) { vq_err(vq, "Loop detected: last one at %u " "vq size %u head %u\n", i, vq->num, head); - return vq->num; + return -EINVAL; } ret = copy_from_user(&desc, vq->desc + i, sizeof desc); if (ret) { vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", i, vq->desc + i); - return vq->num; + return -EFAULT; } if (desc.flags & VRING_DESC_F_INDIRECT) { ret = get_indirect(dev, vq, iov, iov_size, @@ -954,7 +955,7 @@ unsigned vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, if (ret < 0) { vq_err(vq, "Failure detected " "in indirect descriptor at idx %d\n", i); - return vq->num; + return ret; } continue; } @@ -964,7 +965,7 @@ unsigned vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, if (ret < 0) { vq_err(vq, "Translation failure %d descriptor idx %d\n", ret, i); - return vq->num; + return ret; } if (desc.flags & VRING_DESC_F_WRITE) { /* If this is an input descriptor, @@ -981,7 +982,7 @@ unsigned vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, if (*in_num) { vq_err(vq, "Descriptor has out after in: " "idx %d\n", i); - return vq->num; + return -EINVAL; } *out_num += ret; } diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 44591ba9b07a..11ee13dba0f7 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -120,10 +120,10 @@ long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, unsigned long arg); int vhost_vq_access_ok(struct vhost_virtqueue *vq); int vhost_log_access_ok(struct vhost_dev *); -unsigned vhost_get_vq_desc(struct vhost_dev *, struct vhost_virtqueue *, - struct iovec iov[], unsigned int iov_count, - unsigned int *out_num, unsigned int *in_num, - struct vhost_log *log, unsigned int *log_num); +int vhost_get_vq_desc(struct vhost_dev *, struct vhost_virtqueue *, + struct iovec iov[], unsigned int iov_count, + unsigned int *out_num, unsigned int *in_num, + struct vhost_log *log, unsigned int *log_num); void vhost_discard_vq_desc(struct vhost_virtqueue *); int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len); From 7b3384fc30633738ae4eaf8e1bc6ce70470ced80 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 1 Jul 2010 18:40:12 +0300 Subject: [PATCH 2/2] vhost: add unlikely annotations to error path patch 'break out of polling loop on error' caused a minor performance regression on my machine: recover that performance by adding a bunch of unlikely annotations in the error handling. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 4 ++-- drivers/vhost/vhost.c | 53 ++++++++++++++++++++++--------------------- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 54096eef4840..2406377a6e5e 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -137,7 +137,7 @@ static void handle_tx(struct vhost_net *net) &out, &in, NULL, NULL); /* On error, stop handling until the next kick. */ - if (head < 0) + if (unlikely(head < 0)) break; /* Nothing new? Wait for eventfd to tell us they refilled. */ if (head == vq->num) { @@ -234,7 +234,7 @@ static void handle_rx(struct vhost_net *net) &out, &in, vq_log, &log); /* On error, stop handling until the next kick. */ - if (head < 0) + if (unlikely(head < 0)) break; /* OK, now we need to know about added descriptors. */ if (head == vq->num) { diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 5ccd384ec0be..0b99783083f6 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -736,12 +736,12 @@ static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len, mem = rcu_dereference(dev->memory); while ((u64)len > s) { u64 size; - if (ret >= iov_size) { + if (unlikely(ret >= iov_size)) { ret = -ENOBUFS; break; } reg = find_region(mem, addr, len); - if (!reg) { + if (unlikely(!reg)) { ret = -EFAULT; break; } @@ -780,18 +780,18 @@ static unsigned next_desc(struct vring_desc *desc) return next; } -static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, - struct iovec iov[], unsigned int iov_size, - unsigned int *out_num, unsigned int *in_num, - struct vhost_log *log, unsigned int *log_num, - struct vring_desc *indirect) +static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num, + struct vhost_log *log, unsigned int *log_num, + struct vring_desc *indirect) { struct vring_desc desc; unsigned int i = 0, count, found = 0; int ret; /* Sanity check */ - if (indirect->len % sizeof desc) { + if (unlikely(indirect->len % sizeof desc)) { vq_err(vq, "Invalid length in indirect descriptor: " "len 0x%llx not multiple of 0x%zx\n", (unsigned long long)indirect->len, @@ -801,7 +801,7 @@ static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, ret = translate_desc(dev, indirect->addr, indirect->len, vq->indirect, ARRAY_SIZE(vq->indirect)); - if (ret < 0) { + if (unlikely(ret < 0)) { vq_err(vq, "Translation failure %d in indirect.\n", ret); return ret; } @@ -813,7 +813,7 @@ static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, count = indirect->len / sizeof desc; /* Buffers are chained via a 16 bit next field, so * we can have at most 2^16 of these. */ - if (count > USHRT_MAX + 1) { + if (unlikely(count > USHRT_MAX + 1)) { vq_err(vq, "Indirect buffer length too big: %d\n", indirect->len); return -E2BIG; @@ -821,19 +821,19 @@ static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, do { unsigned iov_count = *in_num + *out_num; - if (++found > count) { + if (unlikely(++found > count)) { vq_err(vq, "Loop detected: last one at %u " "indirect size %u\n", i, count); return -EINVAL; } - if (memcpy_fromiovec((unsigned char *)&desc, vq->indirect, - sizeof desc)) { + if (unlikely(memcpy_fromiovec((unsigned char *)&desc, vq->indirect, + sizeof desc))) { vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n", i, (size_t)indirect->addr + i * sizeof desc); return -EINVAL; } - if (desc.flags & VRING_DESC_F_INDIRECT) { + if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) { vq_err(vq, "Nested indirect descriptor: idx %d, %zx\n", i, (size_t)indirect->addr + i * sizeof desc); return -EINVAL; @@ -841,7 +841,7 @@ static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count, iov_size - iov_count); - if (ret < 0) { + if (unlikely(ret < 0)) { vq_err(vq, "Translation failure %d indirect idx %d\n", ret, i); return ret; @@ -857,7 +857,7 @@ static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, } else { /* If it's an output descriptor, they're all supposed * to come before any input descriptors. */ - if (*in_num) { + if (unlikely(*in_num)) { vq_err(vq, "Indirect descriptor " "has out after in: idx %d\n", i); return -EINVAL; @@ -888,13 +888,13 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, /* Check it isn't doing very strange things with descriptor numbers. */ last_avail_idx = vq->last_avail_idx; - if (get_user(vq->avail_idx, &vq->avail->idx)) { + if (unlikely(get_user(vq->avail_idx, &vq->avail->idx))) { vq_err(vq, "Failed to access avail idx at %p\n", &vq->avail->idx); return -EFAULT; } - if ((u16)(vq->avail_idx - last_avail_idx) > vq->num) { + if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) { vq_err(vq, "Guest moved used index from %u to %u", last_avail_idx, vq->avail_idx); return -EFAULT; @@ -909,7 +909,8 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, /* Grab the next descriptor number they're advertising, and increment * the index we've seen. */ - if (get_user(head, &vq->avail->ring[last_avail_idx % vq->num])) { + if (unlikely(get_user(head, + &vq->avail->ring[last_avail_idx % vq->num]))) { vq_err(vq, "Failed to read head: idx %d address %p\n", last_avail_idx, &vq->avail->ring[last_avail_idx % vq->num]); @@ -917,7 +918,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, } /* If their number is silly, that's an error. */ - if (head >= vq->num) { + if (unlikely(head >= vq->num)) { vq_err(vq, "Guest says index %u > %u is available", head, vq->num); return -EINVAL; @@ -931,19 +932,19 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, i = head; do { unsigned iov_count = *in_num + *out_num; - if (i >= vq->num) { + if (unlikely(i >= vq->num)) { vq_err(vq, "Desc index is %u > %u, head = %u", i, vq->num, head); return -EINVAL; } - if (++found > vq->num) { + if (unlikely(++found > vq->num)) { vq_err(vq, "Loop detected: last one at %u " "vq size %u head %u\n", i, vq->num, head); return -EINVAL; } ret = copy_from_user(&desc, vq->desc + i, sizeof desc); - if (ret) { + if (unlikely(ret)) { vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", i, vq->desc + i); return -EFAULT; @@ -952,7 +953,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, ret = get_indirect(dev, vq, iov, iov_size, out_num, in_num, log, log_num, &desc); - if (ret < 0) { + if (unlikely(ret < 0)) { vq_err(vq, "Failure detected " "in indirect descriptor at idx %d\n", i); return ret; @@ -962,7 +963,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count, iov_size - iov_count); - if (ret < 0) { + if (unlikely(ret < 0)) { vq_err(vq, "Translation failure %d descriptor idx %d\n", ret, i); return ret; @@ -979,7 +980,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, } else { /* If it's an output descriptor, they're all supposed * to come before any input descriptors. */ - if (*in_num) { + if (unlikely(*in_num)) { vq_err(vq, "Descriptor has out after in: " "idx %d\n", i); return -EINVAL;