From 078adb3bf43388d4e1c8f1a63b14f2629f2ad995 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Sat, 17 Sep 2022 16:38:03 +0800 Subject: [PATCH 01/17] vhost: add __init/__exit annotations to module init/exit funcs Add missing __init/__exit annotations to module init/exit funcs. Signed-off-by: Xiu Jianfeng Message-Id: <20220917083803.21521-1-xiujianfeng@huawei.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 68e4ecd1cc0e..1cab1a831bb6 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1781,7 +1781,7 @@ static struct miscdevice vhost_net_misc = { .fops = &vhost_net_fops, }; -static int vhost_net_init(void) +static int __init vhost_net_init(void) { if (experimental_zcopytx) vhost_net_enable_zcopy(VHOST_NET_VQ_TX); @@ -1789,7 +1789,7 @@ static int vhost_net_init(void) } module_init(vhost_net_init); -static void vhost_net_exit(void) +static void __exit vhost_net_exit(void) { misc_deregister(&vhost_net_misc); } From bdeb2f9836c4fd323d87fad4d7a8abd00746c359 Mon Sep 17 00:00:00 2001 From: Deming Wang Date: Sun, 25 Sep 2022 22:22:02 -0400 Subject: [PATCH 02/17] virtio_ring: split: Operators use unified style The operators of vring_alloc_queue_split should use the unified style.Add space for the '|' ,make it be looked more pretty. Signed-off-by: Deming Wang Message-Id: <20220926022202.1516-1-wangdeming@inspur.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 4620e9d79dde..ac68bc0e3750 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1066,7 +1066,7 @@ static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split, if (!queue) { /* Try to get a single page. You are my only hope! */ queue = vring_alloc_queue(vdev, vring_size(num, vring_align), - &dma_addr, GFP_KERNEL|__GFP_ZERO); + &dma_addr, GFP_KERNEL | __GFP_ZERO); } if (!queue) return -ENOMEM; From f7adf38928301576193910c94ab575804b81cf73 Mon Sep 17 00:00:00 2001 From: Deming Wang Date: Mon, 26 Sep 2022 14:33:06 -0400 Subject: [PATCH 03/17] virtio_ring: make vring_alloc_queue_packed prettier Add some spaces to vring_alloc_queue(make it look prettier). Signed-off-by: Deming Wang Message-Id: <20220926183306.4535-1-wangdeming@inspur.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index ac68bc0e3750..af16a7e8c67e 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1867,7 +1867,7 @@ static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, ring = vring_alloc_queue(vdev, ring_size_in_bytes, &ring_dma_addr, - GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); if (!ring) goto err; @@ -1879,7 +1879,7 @@ static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, driver = vring_alloc_queue(vdev, event_size_in_bytes, &driver_event_dma_addr, - GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); if (!driver) goto err; @@ -1889,7 +1889,7 @@ static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, device = vring_alloc_queue(vdev, event_size_in_bytes, &device_event_dma_addr, - GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); if (!device) goto err; From cdbd952bb7b5fca36676b3d318796b196b127397 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 15 Aug 2022 18:04:20 -0400 Subject: [PATCH 04/17] virtio: drop vp_legacy_set_queue_size There's actually no way to set queue size on legacy virtio pci. Signed-off-by: Michael S. Tsirkin Message-Id: <20220815220447.155860-1-mst@redhat.com> --- include/linux/virtio_pci_legacy.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/virtio_pci_legacy.h b/include/linux/virtio_pci_legacy.h index e5d665faf00e..a8dc757d0367 100644 --- a/include/linux/virtio_pci_legacy.h +++ b/include/linux/virtio_pci_legacy.h @@ -32,8 +32,6 @@ void vp_legacy_set_queue_address(struct virtio_pci_legacy_device *ldev, u16 index, u32 queue_pfn); bool vp_legacy_get_queue_enable(struct virtio_pci_legacy_device *ldev, u16 idx); -void vp_legacy_set_queue_size(struct virtio_pci_legacy_device *ldev, - u16 idx, u16 size); u16 vp_legacy_get_queue_size(struct virtio_pci_legacy_device *ldev, u16 idx); int vp_legacy_probe(struct virtio_pci_legacy_device *ldev); From 46cd26f4108714d2e28c93d773e1602917ac0691 Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Wed, 14 Sep 2022 17:49:10 +0300 Subject: [PATCH 05/17] virtio-net: introduce and use helper function for guest gso support checks Probe routine is already several hundred lines. Use helper function for guest gso support check. Signed-off-by: Gavin Li Reviewed-by: Gavi Teitz Reviewed-by: Parav Pandit Reviewed-by: Xuan Zhuo Reviewed-by: Si-Wei Liu Message-Id: <20220914144911.56422-2-gavinl@nvidia.com> Signed-off-by: Michael S. Tsirkin --- drivers/net/virtio_net.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 9cce7dec7366..f831a0290998 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3682,6 +3682,14 @@ static int virtnet_validate(struct virtio_device *vdev) return 0; } +static bool virtnet_check_guest_gso(const struct virtnet_info *vi) +{ + return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || + virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || + virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || + virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO); +} + static int virtnet_probe(struct virtio_device *vdev) { int i, err = -ENOMEM; @@ -3777,10 +3785,7 @@ static int virtnet_probe(struct virtio_device *vdev) spin_lock_init(&vi->refill_lock); /* If we can receive ANY GSO packets, we must allocate large ones. */ - if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || - virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) || - virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN) || - virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UFO)) + if (virtnet_check_guest_gso(vi)) vi->big_packets = true; if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) From 4959aebba8c06992abafa09d1e80965e0825af54 Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Wed, 14 Sep 2022 17:49:11 +0300 Subject: [PATCH 06/17] virtio-net: use mtu size as buffer length for big packets Currently add_recvbuf_big() allocates MAX_SKB_FRAGS segments for big packets even when GUEST_* offloads are not present on the device. However, if guest GSO is not supported, it would be sufficient to allocate segments to cover just up the MTU size and no further. Allocating the maximum amount of segments results in a large waste of buffer space in the queue, which limits the number of packets that can be buffered and can result in reduced performance. Therefore, if guest GSO is not supported, use the MTU to calculate the optimal amount of segments required. Below is the iperf TCP test results over a Mellanox NIC, using vDPA for 1 VQ, queue size 1024, before and after the change, with the iperf server running over the virtio-net interface. MTU(Bytes)/Bandwidth (Gbit/s) Before After 1500 22.5 22.4 9000 12.8 25.9 And result of queue size 256. MTU(Bytes)/Bandwidth (Gbit/s) Before After 9000 2.15 11.9 With this patch no degradation is observed with multiple below tests and feature bit combinations. Results are summarized below for q depth of 1024. Interface MTU is 1500 if MTU feature is disabled. MTU is set to 9000 in other tests. Features/ Bandwidth (Gbit/s) Before After mtu off 20.1 20.2 mtu/indirect on 17.4 17.3 mtu/indirect/packed on 17.2 17.2 Signed-off-by: Gavin Li Reviewed-by: Gavi Teitz Reviewed-by: Parav Pandit Reviewed-by: Xuan Zhuo Reviewed-by: Si-Wei Liu Message-Id: <20220914144911.56422-3-gavinl@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/net/virtio_net.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index f831a0290998..a8fa690b1195 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -225,6 +225,9 @@ struct virtnet_info { /* I like... big packets and I cannot lie! */ bool big_packets; + /* number of sg entries allocated for big packets */ + unsigned int big_packets_num_skbfrags; + /* Host will merge rx buffers for big packets (shake it! shake it!) */ bool mergeable_rx_bufs; @@ -1331,10 +1334,10 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, char *p; int i, err, offset; - sg_init_table(rq->sg, MAX_SKB_FRAGS + 2); + sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2); - /* page in rq->sg[MAX_SKB_FRAGS + 1] is list tail */ - for (i = MAX_SKB_FRAGS + 1; i > 1; --i) { + /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */ + for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) { first = get_a_page(rq, gfp); if (!first) { if (list) @@ -1365,7 +1368,7 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, /* chain first in list head */ first->private = (unsigned long)list; - err = virtqueue_add_inbuf(rq->vq, rq->sg, MAX_SKB_FRAGS + 2, + err = virtqueue_add_inbuf(rq->vq, rq->sg, vi->big_packets_num_skbfrags + 2, first, gfp); if (err < 0) give_pages(rq, first); @@ -3690,13 +3693,27 @@ static bool virtnet_check_guest_gso(const struct virtnet_info *vi) virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO); } +static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu) +{ + bool guest_gso = virtnet_check_guest_gso(vi); + + /* If device can receive ANY guest GSO packets, regardless of mtu, + * allocate packets of maximum size, otherwise limit it to only + * mtu size worth only. + */ + if (mtu > ETH_DATA_LEN || guest_gso) { + vi->big_packets = true; + vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE); + } +} + static int virtnet_probe(struct virtio_device *vdev) { int i, err = -ENOMEM; struct net_device *dev; struct virtnet_info *vi; u16 max_queue_pairs; - int mtu; + int mtu = 0; /* Find if host supports multiqueue/rss virtio_net device */ max_queue_pairs = 1; @@ -3784,10 +3801,6 @@ static int virtnet_probe(struct virtio_device *vdev) INIT_WORK(&vi->config_work, virtnet_config_changed_work); spin_lock_init(&vi->refill_lock); - /* If we can receive ANY GSO packets, we must allocate large ones. */ - if (virtnet_check_guest_gso(vi)) - vi->big_packets = true; - if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) vi->mergeable_rx_bufs = true; @@ -3853,12 +3866,10 @@ static int virtnet_probe(struct virtio_device *vdev) dev->mtu = mtu; dev->max_mtu = mtu; - - /* TODO: size buffers correctly in this case. */ - if (dev->mtu > ETH_DATA_LEN) - vi->big_packets = true; } + virtnet_set_big_packets(vi, mtu); + if (vi->any_header_sg) dev->needed_headroom = vi->hdr_len; From 90fea5a800c3dd80fb8ad9a02929bcef5fde42b8 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 27 Sep 2022 15:48:08 +0800 Subject: [PATCH 07/17] vdpa: device feature provisioning This patch allows the device features to be provisioned through netlink. A new attribute is introduced to allow the userspace to pass a 64bit device features during device adding. This provides several advantages: - Allow to provision a subset of the features to ease the cross vendor live migration. - Better debug-ability for vDPA framework and parent. Reviewed-by: Eli Cohen Signed-off-by: Jason Wang Message-Id: <20220927074810.28627-2-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 5 +++++ include/linux/vdpa.h | 1 + include/uapi/linux/vdpa.h | 2 ++ 3 files changed, 8 insertions(+) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index c06c02704461..278e26bfa492 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -600,6 +600,11 @@ static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *i } config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP); } + if (nl_attrs[VDPA_ATTR_DEV_FEATURES]) { + config.device_features = + nla_get_u64(nl_attrs[VDPA_ATTR_DEV_FEATURES]); + config.mask |= BIT_ULL(VDPA_ATTR_DEV_FEATURES); + } /* Skip checking capability if user didn't prefer to configure any * device networking attributes. It is likely that user might have used diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index d282f464d2f1..6d0f5e4e82c2 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -104,6 +104,7 @@ struct vdpa_iova_range { }; struct vdpa_dev_set_config { + u64 device_features; struct { u8 mac[ETH_ALEN]; u16 mtu; diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index 25c55cab3d7c..9dc855f37c59 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -52,6 +52,8 @@ enum vdpa_attr { VDPA_ATTR_DEV_VENDOR_ATTR_NAME, /* string */ VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, /* u64 */ + VDPA_ATTR_DEV_FEATURES, /* u64 */ + /* new attributes must be added above here */ VDPA_ATTR_MAX, }; From 477f71971422fb225337b30f7b79363387b1d78c Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 27 Sep 2022 15:48:09 +0800 Subject: [PATCH 08/17] vdpa_sim_net: support feature provisioning This patch implements features provisioning for vdpa_sim_net. 1) validating the provisioned features to be a subset of the parent features. 2) clearing the features that is not wanted by the userspace For example: vdpasim_net: supported_classes net max_supported_vqs 3 dev_features MTU MAC CTRL_VQ CTRL_MAC_ADDR ANY_LAYOUT VERSION_1 ACCESS_PLATFORM 1) provision vDPA device with all features that are supported by the net simulator dev1: mac 00:00:00:00:00:00 link up link_announce false mtu 1500 negotiated_features MTU MAC CTRL_VQ CTRL_MAC_ADDR VERSION_1 ACCESS_PLATFORM 2) provision vDPA device with a subset of the features dev1: mac 00:00:00:00:00:00 link up link_announce false mtu 1500 negotiated_features CTRL_VQ VERSION_1 ACCESS_PLATFORM Reviewed-by: Eli Cohen Signed-off-by: Jason Wang Message-Id: <20220927074810.28627-3-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 12 +++++++++++- drivers/vdpa/vdpa_sim/vdpa_sim.h | 3 ++- drivers/vdpa/vdpa_sim/vdpa_sim_blk.c | 2 +- drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 5 +++-- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 225b7f5d8be3..b071f0d842fb 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "vdpa_sim.h" @@ -245,13 +246,22 @@ static const struct dma_map_ops vdpasim_dma_ops = { static const struct vdpa_config_ops vdpasim_config_ops; static const struct vdpa_config_ops vdpasim_batch_config_ops; -struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) +struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr, + const struct vdpa_dev_set_config *config) { const struct vdpa_config_ops *ops; struct vdpasim *vdpasim; struct device *dev; int i, ret = -ENOMEM; + if (config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) { + if (config->device_features & + ~dev_attr->supported_features) + return ERR_PTR(-EINVAL); + dev_attr->supported_features = + config->device_features; + } + if (batch_mapping) ops = &vdpasim_batch_config_ops; else diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h index 061986f30911..0e78737dcc16 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.h +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h @@ -71,7 +71,8 @@ struct vdpasim { spinlock_t iommu_lock; }; -struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *attr); +struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *attr, + const struct vdpa_dev_set_config *config); /* TODO: cross-endian support */ static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c index c8bfea3b7db2..c6db1a1baf76 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c @@ -383,7 +383,7 @@ static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, dev_attr.work_fn = vdpasim_blk_work; dev_attr.buffer_size = VDPASIM_BLK_CAPACITY << SECTOR_SHIFT; - simdev = vdpasim_create(&dev_attr); + simdev = vdpasim_create(&dev_attr, config); if (IS_ERR(simdev)) return PTR_ERR(simdev); diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c index 886449e88502..c3cb225ea469 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -254,7 +254,7 @@ static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, dev_attr.work_fn = vdpasim_net_work; dev_attr.buffer_size = PAGE_SIZE; - simdev = vdpasim_create(&dev_attr); + simdev = vdpasim_create(&dev_attr, config); if (IS_ERR(simdev)) return PTR_ERR(simdev); @@ -294,7 +294,8 @@ static struct vdpa_mgmt_dev mgmt_dev = { .id_table = id_table, .ops = &vdpasim_net_mgmtdev_ops, .config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR | - 1 << VDPA_ATTR_DEV_NET_CFG_MTU), + 1 << VDPA_ATTR_DEV_NET_CFG_MTU | + 1 << VDPA_ATTR_DEV_FEATURES), .max_supported_vqs = VDPASIM_NET_VQ_NUM, .supported_features = VDPASIM_NET_FEATURES, }; From c1ca352d371f724f7fb40f016abdb563aa85fe55 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 27 Sep 2022 15:48:10 +0800 Subject: [PATCH 09/17] vp_vdpa: support feature provisioning This patch allows the device features to be provisioned via netlink. This is done by: 1) validating the provisioned features to be a subset of the parent features. 2) clearing the features that is not wanted by the userspace For example: # vdpa mgmtdev show pci/0000:02:00.0: supported_classes net max_supported_vqs 3 dev_features CSUM GUEST_CSUM CTRL_GUEST_OFFLOADS MAC GUEST_TSO4 GUEST_TSO6 GUEST_ECN GUEST_UFO HOST_TSO4 HOST_TSO6 HOST_ECN HOST_UFO MRG_RXBUF STATUS CTRL_VQ CTRL_RX CTRL_VLAN CTRL_RX_EXTRA GUEST_ANNOUNCE CTRL_MAC_ADDR RING_INDIRECT_DESC RING_EVENT_IDX VERSION_1 ACCESS_PLATFORM 1) provision vDPA device with all features that are supported by the virtio-pci # vdpa dev add name dev1 mgmtdev pci/0000:02:00.0 # vdpa dev config show dev1: mac 52:54:00:12:34:56 link up link_announce false mtu 65535 negotiated_features CSUM GUEST_CSUM CTRL_GUEST_OFFLOADS MAC GUEST_TSO4 GUEST_TSO6 GUEST_ECN GUEST_UFO HOST_TSO4 HOST_TSO6 HOST_ECN HOST_UFO MRG_RXBUF STATUS CTRL_VQ CTRL_RX CTRL_VLAN GUEST_ANNOUNCE CTRL_MAC_ADDR RING_INDIRECT_DESC RING_EVENT_IDX VERSION_1 ACCESS_PLATFORM 2) provision vDPA device with a subset of the features # vdpa dev add name dev1 mgmtdev pci/0000:02:00.0 device_features 0x300020000 # dev1: mac 52:54:00:12:34:56 link up link_announce false mtu 65535 negotiated_features CTRL_VQ VERSION_1 ACCESS_PLATFORM Reviewed-by: Eli Cohen Signed-off-by: Jason Wang Message-Id: <20220927074810.28627-4-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/virtio_pci/vp_vdpa.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c index 04522077735b..d448db0c4de3 100644 --- a/drivers/vdpa/virtio_pci/vp_vdpa.c +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c @@ -17,6 +17,7 @@ #include #include #include +#include #define VP_VDPA_QUEUE_MAX 256 #define VP_VDPA_DRIVER_NAME "vp_vdpa" @@ -35,6 +36,7 @@ struct vp_vdpa { struct virtio_pci_modern_device *mdev; struct vp_vring *vring; struct vdpa_callback config_cb; + u64 device_features; char msix_name[VP_VDPA_NAME_SIZE]; int config_irq; int queues; @@ -66,9 +68,9 @@ static struct virtio_pci_modern_device *vp_vdpa_to_mdev(struct vp_vdpa *vp_vdpa) static u64 vp_vdpa_get_device_features(struct vdpa_device *vdpa) { - struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa); + struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); - return vp_modern_get_features(mdev); + return vp_vdpa->device_features; } static int vp_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features) @@ -475,6 +477,7 @@ static int vp_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, struct pci_dev *pdev = mdev->pci_dev; struct device *dev = &pdev->dev; struct vp_vdpa *vp_vdpa = NULL; + u64 device_features; int ret, i; vp_vdpa = vdpa_alloc_device(struct vp_vdpa, vdpa, @@ -491,6 +494,20 @@ static int vp_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, vp_vdpa->queues = vp_modern_get_num_queues(mdev); vp_vdpa->mdev = mdev; + device_features = vp_modern_get_features(mdev); + if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) { + if (add_config->device_features & ~device_features) { + ret = -EINVAL; + dev_err(&pdev->dev, "Try to provision features " + "that are not supported by the device: " + "device_features 0x%llx provisioned 0x%llx\n", + device_features, add_config->device_features); + goto err; + } + device_features = add_config->device_features; + } + vp_vdpa->device_features = device_features; + ret = devm_add_action_or_reset(dev, vp_vdpa_free_irq_vectors, pdev); if (ret) { dev_err(&pdev->dev, @@ -599,6 +616,7 @@ static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) mgtdev->id_table = mdev_id; mgtdev->max_supported_vqs = vp_modern_get_num_queues(mdev); mgtdev->supported_features = vp_modern_get_features(mdev); + mgtdev->config_attr_mask = (1 << VDPA_ATTR_DEV_FEATURES); pci_set_master(pdev); pci_set_drvdata(pdev, vp_vdpa_mgtdev); From e60d64074214db7207fc13c25ee39d8d47cb4a34 Mon Sep 17 00:00:00 2001 From: Alvaro Karsz Date: Wed, 21 Sep 2022 11:27:29 +0300 Subject: [PATCH 10/17] virtio_blk: add SECURE ERASE command support Support for the VIRTIO_BLK_F_SECURE_ERASE VirtIO feature. A device that offers this feature can receive VIRTIO_BLK_T_SECURE_ERASE commands. A device which supports this feature has the following fields in the virtio config: - max_secure_erase_sectors - max_secure_erase_seg - secure_erase_sector_alignment max_secure_erase_sectors and secure_erase_sector_alignment are expressed in 512-byte units. Every secure erase command has the following fields: - sectors: The starting offset in 512-byte units. - num_sectors: The number of sectors. Signed-off-by: Alvaro Karsz Message-Id: <20220921082729.2516779-1-alvaro.karsz@solid-run.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi --- drivers/block/virtio_blk.c | 110 ++++++++++++++++++++++++++------ include/uapi/linux/virtio_blk.h | 19 ++++++ 2 files changed, 111 insertions(+), 18 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index dd9a05174726..f03505a65d08 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -130,7 +130,7 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr) return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); } -static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap) +static int virtblk_setup_discard_write_zeroes_erase(struct request *req, bool unmap) { unsigned short segments = blk_rq_nr_discard_segments(req); unsigned short n = 0; @@ -240,6 +240,9 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, type = VIRTIO_BLK_T_WRITE_ZEROES; unmap = !(req->cmd_flags & REQ_NOUNMAP); break; + case REQ_OP_SECURE_ERASE: + type = VIRTIO_BLK_T_SECURE_ERASE; + break; case REQ_OP_DRV_IN: type = VIRTIO_BLK_T_GET_ID; break; @@ -251,8 +254,9 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, vbr->out_hdr.type = cpu_to_virtio32(vdev, type); vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req)); - if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) { - if (virtblk_setup_discard_write_zeroes(req, unmap)) + if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES || + type == VIRTIO_BLK_T_SECURE_ERASE) { + if (virtblk_setup_discard_write_zeroes_erase(req, unmap)) return BLK_STS_RESOURCE; } @@ -888,6 +892,8 @@ static int virtblk_probe(struct virtio_device *vdev) int err, index; u32 v, blk_size, max_size, sg_elems, opt_io_size; + u32 max_discard_segs = 0; + u32 discard_granularity = 0; u16 min_io_size; u8 physical_block_exp, alignment_offset; unsigned int queue_depth; @@ -1045,27 +1051,14 @@ static int virtblk_probe(struct virtio_device *vdev) if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { virtio_cread(vdev, struct virtio_blk_config, - discard_sector_alignment, &v); - if (v) - q->limits.discard_granularity = v << SECTOR_SHIFT; - else - q->limits.discard_granularity = blk_size; + discard_sector_alignment, &discard_granularity); virtio_cread(vdev, struct virtio_blk_config, max_discard_sectors, &v); blk_queue_max_discard_sectors(q, v ? v : UINT_MAX); virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, - &v); - - /* - * max_discard_seg == 0 is out of spec but we always - * handled it. - */ - if (!v) - v = sg_elems; - blk_queue_max_discard_segments(q, - min(v, MAX_DISCARD_SEGMENTS)); + &max_discard_segs); } if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) { @@ -1074,6 +1067,85 @@ static int virtblk_probe(struct virtio_device *vdev) blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX); } + /* The discard and secure erase limits are combined since the Linux + * block layer uses the same limit for both commands. + * + * If both VIRTIO_BLK_F_SECURE_ERASE and VIRTIO_BLK_F_DISCARD features + * are negotiated, we will use the minimum between the limits. + * + * discard sector alignment is set to the minimum between discard_sector_alignment + * and secure_erase_sector_alignment. + * + * max discard sectors is set to the minimum between max_discard_seg and + * max_secure_erase_seg. + */ + if (virtio_has_feature(vdev, VIRTIO_BLK_F_SECURE_ERASE)) { + + virtio_cread(vdev, struct virtio_blk_config, + secure_erase_sector_alignment, &v); + + /* secure_erase_sector_alignment should not be zero, the device should set a + * valid number of sectors. + */ + if (!v) { + dev_err(&vdev->dev, + "virtio_blk: secure_erase_sector_alignment can't be 0\n"); + err = -EINVAL; + goto out_cleanup_disk; + } + + discard_granularity = min_not_zero(discard_granularity, v); + + virtio_cread(vdev, struct virtio_blk_config, + max_secure_erase_sectors, &v); + + /* max_secure_erase_sectors should not be zero, the device should set a + * valid number of sectors. + */ + if (!v) { + dev_err(&vdev->dev, + "virtio_blk: max_secure_erase_sectors can't be 0\n"); + err = -EINVAL; + goto out_cleanup_disk; + } + + blk_queue_max_secure_erase_sectors(q, v); + + virtio_cread(vdev, struct virtio_blk_config, + max_secure_erase_seg, &v); + + /* max_secure_erase_seg should not be zero, the device should set a + * valid number of segments + */ + if (!v) { + dev_err(&vdev->dev, + "virtio_blk: max_secure_erase_seg can't be 0\n"); + err = -EINVAL; + goto out_cleanup_disk; + } + + max_discard_segs = min_not_zero(max_discard_segs, v); + } + + if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD) || + virtio_has_feature(vdev, VIRTIO_BLK_F_SECURE_ERASE)) { + /* max_discard_seg and discard_granularity will be 0 only + * if max_discard_seg and discard_sector_alignment fields in the virtio + * config are 0 and VIRTIO_BLK_F_SECURE_ERASE feature is not negotiated. + * In this case, we use default values. + */ + if (!max_discard_segs) + max_discard_segs = sg_elems; + + blk_queue_max_discard_segments(q, + min(max_discard_segs, MAX_DISCARD_SEGMENTS)); + + if (discard_granularity) + q->limits.discard_granularity = discard_granularity << SECTOR_SHIFT; + else + q->limits.discard_granularity = blk_size; + } + virtblk_update_capacity(vblk, false); virtio_device_ready(vdev); @@ -1169,6 +1241,7 @@ static unsigned int features_legacy[] = { VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, + VIRTIO_BLK_F_SECURE_ERASE, } ; static unsigned int features[] = { @@ -1176,6 +1249,7 @@ static unsigned int features[] = { VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, + VIRTIO_BLK_F_SECURE_ERASE, }; static struct virtio_driver virtio_blk = { diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h index d888f013d9ff..58e70b24b504 100644 --- a/include/uapi/linux/virtio_blk.h +++ b/include/uapi/linux/virtio_blk.h @@ -40,6 +40,7 @@ #define VIRTIO_BLK_F_MQ 12 /* support more than one vq */ #define VIRTIO_BLK_F_DISCARD 13 /* DISCARD is supported */ #define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES is supported */ +#define VIRTIO_BLK_F_SECURE_ERASE 16 /* Secure Erase is supported */ /* Legacy feature bits */ #ifndef VIRTIO_BLK_NO_LEGACY @@ -121,6 +122,21 @@ struct virtio_blk_config { __u8 write_zeroes_may_unmap; __u8 unused1[3]; + + /* the next 3 entries are guarded by VIRTIO_BLK_F_SECURE_ERASE */ + /* + * The maximum secure erase sectors (in 512-byte sectors) for + * one segment. + */ + __virtio32 max_secure_erase_sectors; + /* + * The maximum number of secure erase segments in a + * secure erase command. + */ + __virtio32 max_secure_erase_seg; + /* Secure erase commands must be aligned to this number of sectors. */ + __virtio32 secure_erase_sector_alignment; + } __attribute__((packed)); /* @@ -155,6 +171,9 @@ struct virtio_blk_config { /* Write zeroes command */ #define VIRTIO_BLK_T_WRITE_ZEROES 13 +/* Secure erase command */ +#define VIRTIO_BLK_T_SECURE_ERASE 14 + #ifndef VIRTIO_BLK_NO_LEGACY /* Barrier before this op. */ #define VIRTIO_BLK_T_BARRIER 0x80000000 From 228565100def593df0f26ee07d5fb810039454d5 Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Thu, 29 Sep 2022 09:45:50 +0800 Subject: [PATCH 11/17] vDPA: allow userspace to query features of a vDPA device This commit adds a new vDPA netlink attribution VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES. Userspace can query features of vDPA devices through this new attr. This commit invokes vdpa_config_ops.get_config() rather than vdpa_get_config_unlocked() to read the device config spcae, so no races in vdpa_set_features_unlocked() Userspace tool iproute2 example: $ vdpa dev config show vdpa0 vdpa0: mac 00:e8:ca:11:be:05 link up link_announce false max_vq_pairs 4 mtu 1500 negotiated_features MRG_RXBUF CTRL_VQ MQ VERSION_1 ACCESS_PLATFORM dev_features MTU MAC MRG_RXBUF CTRL_VQ MQ ANY_LAYOUT VERSION_1 ACCESS_PLATFORM Signed-off-by: Zhu Lingshan Message-Id: <20220929014555.112323-2-lingshan.zhu@intel.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 16 +++++++++++----- include/uapi/linux/vdpa.h | 4 ++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 278e26bfa492..d0a7b76d9163 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -820,10 +820,10 @@ static int vdpa_dev_net_mq_config_fill(struct vdpa_device *vdev, static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *msg) { struct virtio_net_config config = {}; - u64 features; + u64 features_device, features_driver; u16 val_u16; - vdpa_get_config_unlocked(vdev, 0, &config, sizeof(config)); + vdev->config->get_config(vdev, 0, &config, sizeof(config)); if (nla_put(msg, VDPA_ATTR_DEV_NET_CFG_MACADDR, sizeof(config.mac), config.mac)) @@ -837,12 +837,18 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16)) return -EMSGSIZE; - features = vdev->config->get_driver_features(vdev); - if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_NEGOTIATED_FEATURES, features, + features_driver = vdev->config->get_driver_features(vdev); + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_NEGOTIATED_FEATURES, features_driver, VDPA_ATTR_PAD)) return -EMSGSIZE; - return vdpa_dev_net_mq_config_fill(vdev, msg, features, &config); + features_device = vdev->config->get_device_features(vdev); + + if (nla_put_u64_64bit(msg, VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES, features_device, + VDPA_ATTR_PAD)) + return -EMSGSIZE; + + return vdpa_dev_net_mq_config_fill(vdev, msg, features_driver, &config); } static int diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index 9dc855f37c59..9bd79235c875 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -46,6 +46,7 @@ enum vdpa_attr { VDPA_ATTR_DEV_NEGOTIATED_FEATURES, /* u64 */ VDPA_ATTR_DEV_MGMTDEV_MAX_VQS, /* u32 */ + /* virtio features that are supported by the vDPA management device */ VDPA_ATTR_DEV_SUPPORTED_FEATURES, /* u64 */ VDPA_ATTR_DEV_QUEUE_INDEX, /* u32 */ @@ -54,6 +55,9 @@ enum vdpa_attr { VDPA_ATTR_DEV_FEATURES, /* u64 */ + /* virtio features that are supported by the vDPA device */ + VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES, /* u64 */ + /* new attributes must be added above here */ VDPA_ATTR_MAX, }; From c6dac2ecfa36a73ffec65a869f34274f08ccbd4f Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Thu, 29 Sep 2022 09:45:51 +0800 Subject: [PATCH 12/17] vDPA: only report driver features if FEATURES_OK is set This commit reports driver features to user space only after FEATURES_OK is features negotiation is done. Signed-off-by: Zhu Lingshan Message-Id: <20220929014555.112323-3-lingshan.zhu@intel.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index d0a7b76d9163..1bed5274c542 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -820,7 +820,7 @@ static int vdpa_dev_net_mq_config_fill(struct vdpa_device *vdev, static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *msg) { struct virtio_net_config config = {}; - u64 features_device, features_driver; + u64 features_device; u16 val_u16; vdev->config->get_config(vdev, 0, &config, sizeof(config)); @@ -837,11 +837,6 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16)) return -EMSGSIZE; - features_driver = vdev->config->get_driver_features(vdev); - if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_NEGOTIATED_FEATURES, features_driver, - VDPA_ATTR_PAD)) - return -EMSGSIZE; - features_device = vdev->config->get_device_features(vdev); if (nla_put_u64_64bit(msg, VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES, features_device, @@ -855,6 +850,8 @@ static int vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq, int flags, struct netlink_ext_ack *extack) { + u64 features_driver; + u8 status = 0; u32 device_id; void *hdr; int err; @@ -878,6 +875,17 @@ vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, goto msg_err; } + /* only read driver features after the feature negotiation is done */ + status = vdev->config->get_status(vdev); + if (status & VIRTIO_CONFIG_S_FEATURES_OK) { + features_driver = vdev->config->get_driver_features(vdev); + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_NEGOTIATED_FEATURES, features_driver, + VDPA_ATTR_PAD)) { + err = -EMSGSIZE; + goto msg_err; + } + } + switch (device_id) { case VIRTIO_ID_NET: err = vdpa_dev_net_config_fill(vdev, msg); From 8a505711fa2749a43bc3208f0c75f57b6491fec4 Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Thu, 29 Sep 2022 09:45:52 +0800 Subject: [PATCH 13/17] vDPA: check VIRTIO_NET_F_RSS for max_virtqueue_paris's presence virtio 1.2 spec says: max_virtqueue_pairs only exists if VIRTIO_NET_F_MQ or VIRTIO_NET_F_RSS is set. So when reporint MQ to userspace, it should check both VIRTIO_NET_F_MQ and VIRTIO_NET_F_RSS. unused parameter struct vdpa_device *vdev is removed Signed-off-by: Zhu Lingshan Acked-by: Jason Wang Message-Id: <20220929014555.112323-4-lingshan.zhu@intel.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 1bed5274c542..98f4c3173935 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -804,13 +804,13 @@ static int vdpa_nl_cmd_dev_get_dumpit(struct sk_buff *msg, struct netlink_callba return msg->len; } -static int vdpa_dev_net_mq_config_fill(struct vdpa_device *vdev, - struct sk_buff *msg, u64 features, +static int vdpa_dev_net_mq_config_fill(struct sk_buff *msg, u64 features, const struct virtio_net_config *config) { u16 val_u16; - if ((features & BIT_ULL(VIRTIO_NET_F_MQ)) == 0) + if ((features & BIT_ULL(VIRTIO_NET_F_MQ)) == 0 && + (features & BIT_ULL(VIRTIO_NET_F_RSS)) == 0) return 0; val_u16 = le16_to_cpu(config->max_virtqueue_pairs); From 9d97aa124cde0264587b034bc34438acc2b44ecf Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Thu, 29 Sep 2022 09:45:53 +0800 Subject: [PATCH 14/17] vDPA: check virtio device features to detect MQ vdpa_dev_net_mq_config_fill() should checks device features for MQ than driver features. Signed-off-by: Zhu Lingshan Acked-by: Jason Wang Message-Id: <20220929014555.112323-5-lingshan.zhu@intel.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 98f4c3173935..2774e071991c 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -843,7 +843,7 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms VDPA_ATTR_PAD)) return -EMSGSIZE; - return vdpa_dev_net_mq_config_fill(vdev, msg, features_driver, &config); + return vdpa_dev_net_mq_config_fill(msg, features_device, &config); } static int From 35b37c33eb75f532606e8d333a4eb63d4c22401b Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Thu, 29 Sep 2022 09:45:54 +0800 Subject: [PATCH 15/17] vDPA: fix spars cast warning in vdpa_dev_net_mq_config_fill This commit fixes spars warnings: cast to restricted __le16 in function vdpa_dev_net_mq_config_fill() Signed-off-by: Zhu Lingshan Acked-by: Jason Wang Message-Id: <20220929014555.112323-6-lingshan.zhu@intel.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 2774e071991c..e5e18124ed12 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -813,7 +813,8 @@ static int vdpa_dev_net_mq_config_fill(struct sk_buff *msg, u64 features, (features & BIT_ULL(VIRTIO_NET_F_RSS)) == 0) return 0; - val_u16 = le16_to_cpu(config->max_virtqueue_pairs); + val_u16 = __virtio16_to_cpu(true, config->max_virtqueue_pairs); + return nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, val_u16); } From 41a2ad927aa23ef7a2cbf26a1f77fb8ee421ca30 Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Thu, 29 Sep 2022 09:45:55 +0800 Subject: [PATCH 16/17] vDPA: conditionally read MTU and MAC in dev cfg space The spec says: mtu only exists if VIRTIO_NET_F_MTU is set The mac address field always exists (though is only valid if VIRTIO_NET_F_MAC is set) So vdpa_dev_net_config_fill() should read MTU and MAC conditionally on the feature bits. Signed-off-by: Zhu Lingshan Acked-by: Jason Wang Message-Id: <20220929014555.112323-7-lingshan.zhu@intel.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index e5e18124ed12..d4b05cd0f2fb 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -818,6 +818,29 @@ static int vdpa_dev_net_mq_config_fill(struct sk_buff *msg, u64 features, return nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, val_u16); } +static int vdpa_dev_net_mtu_config_fill(struct sk_buff *msg, u64 features, + const struct virtio_net_config *config) +{ + u16 val_u16; + + if ((features & BIT_ULL(VIRTIO_NET_F_MTU)) == 0) + return 0; + + val_u16 = __virtio16_to_cpu(true, config->mtu); + + return nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16); +} + +static int vdpa_dev_net_mac_config_fill(struct sk_buff *msg, u64 features, + const struct virtio_net_config *config) +{ + if ((features & BIT_ULL(VIRTIO_NET_F_MAC)) == 0) + return 0; + + return nla_put(msg, VDPA_ATTR_DEV_NET_CFG_MACADDR, + sizeof(config->mac), config->mac); +} + static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *msg) { struct virtio_net_config config = {}; @@ -826,24 +849,22 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms vdev->config->get_config(vdev, 0, &config, sizeof(config)); - if (nla_put(msg, VDPA_ATTR_DEV_NET_CFG_MACADDR, sizeof(config.mac), - config.mac)) - return -EMSGSIZE; - val_u16 = __virtio16_to_cpu(true, config.status); if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_STATUS, val_u16)) return -EMSGSIZE; - val_u16 = __virtio16_to_cpu(true, config.mtu); - if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16)) - return -EMSGSIZE; - features_device = vdev->config->get_device_features(vdev); if (nla_put_u64_64bit(msg, VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES, features_device, VDPA_ATTR_PAD)) return -EMSGSIZE; + if (vdpa_dev_net_mtu_config_fill(msg, features_device, &config)) + return -EMSGSIZE; + + if (vdpa_dev_net_mac_config_fill(msg, features_device, &config)) + return -EMSGSIZE; + return vdpa_dev_net_mq_config_fill(msg, features_device, &config); } From 71491c54eafa318fdd24a1f26a1c82b28e1ac21d Mon Sep 17 00:00:00 2001 From: Angus Chen Date: Fri, 30 Sep 2022 08:09:15 +0800 Subject: [PATCH 17/17] virtio_pci: don't try to use intxif pin is zero The background is that we use dpu in cloud computing,the arch is x86,80 cores. We will have a lots of virtio devices,like 512 or more. When we probe about 200 virtio_blk devices,it will fail and the stack is printed as follows: [25338.485128] virtio-pci 0000:b3:00.0: virtio_pci: leaving for legacy driver [25338.496174] genirq: Flags mismatch irq 0. 00000080 (virtio418) vs. 00015a00 (timer) [25338.503822] CPU: 20 PID: 5431 Comm: kworker/20:0 Kdump: loaded Tainted: G OE --------- - - 4.18.0-305.30.1.el8.x86_64 [25338.516403] Hardware name: Inspur NF5280M5/YZMB-00882-10E, BIOS 4.1.21 08/25/2021 [25338.523881] Workqueue: events work_for_cpu_fn [25338.528235] Call Trace: [25338.530687] dump_stack+0x5c/0x80 [25338.534000] __setup_irq.cold.53+0x7c/0xd3 [25338.538098] request_threaded_irq+0xf5/0x160 [25338.542371] vp_find_vqs+0xc7/0x190 [25338.545866] init_vq+0x17c/0x2e0 [virtio_blk] [25338.550223] ? ncpus_cmp_func+0x10/0x10 [25338.554061] virtblk_probe+0xe6/0x8a0 [virtio_blk] [25338.558846] virtio_dev_probe+0x158/0x1f0 [25338.562861] really_probe+0x255/0x4a0 [25338.566524] ? __driver_attach_async_helper+0x90/0x90 [25338.571567] driver_probe_device+0x49/0xc0 [25338.575660] bus_for_each_drv+0x79/0xc0 [25338.579499] __device_attach+0xdc/0x160 [25338.583337] bus_probe_device+0x9d/0xb0 [25338.587167] device_add+0x418/0x780 [25338.590654] register_virtio_device+0x9e/0xe0 [25338.595011] virtio_pci_probe+0xb3/0x140 [25338.598941] local_pci_probe+0x41/0x90 [25338.602689] work_for_cpu_fn+0x16/0x20 [25338.606443] process_one_work+0x1a7/0x360 [25338.610456] ? create_worker+0x1a0/0x1a0 [25338.614381] worker_thread+0x1cf/0x390 [25338.618132] ? create_worker+0x1a0/0x1a0 [25338.622051] kthread+0x116/0x130 [25338.625283] ? kthread_flush_work_fn+0x10/0x10 [25338.629731] ret_from_fork+0x1f/0x40 [25338.633395] virtio_blk: probe of virtio418 failed with error -16 The log : "genirq: Flags mismatch irq 0. 00000080 (virtio418) vs. 00015a00 (timer)" was printed because of the irq 0 is used by timer exclusive,and when vp_find_vqs call vp_find_vqs_msix and returns false twice (for whatever reason), then it will call vp_find_vqs_intx as a fallback. Because vp_dev->pci_dev->irq is zero, we request irq 0 with flag IRQF_SHARED, and get a backtrace like above. According to PCI spec about "Interrupt Pin" Register (Offset 3Dh): "The Interrupt Pin register is a read-only register that identifies the legacy interrupt Message(s) the Function uses. Valid values are 01h, 02h, 03h, and 04h that map to legacy interrupt Messages for INTA, INTB, INTC, and INTD respectively. A value of 00h indicates that the Function uses no legacy interrupt Message(s)." So if vp_dev->pci_dev->pin is zero, we should not request legacy interrupt. Signed-off-by: Angus Chen Suggested-by: Michael S. Tsirkin Message-Id: <20220930000915.548-1-angus.chen@jaguarmicro.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_common.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index ad258a9d3b9f..4df77eeb4d16 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -409,6 +409,9 @@ int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs, err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, ctx, desc); if (!err) return 0; + /* Is there an interrupt pin? If not give up. */ + if (!(to_vp_device(vdev)->pci_dev->pin)) + return err; /* Finally fall back to regular interrupts. */ return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, ctx); }