Tree - rpms/openvswitch - src.fedoraproject.org

benesv / rpms / openvswitch

Forked from rpms/openvswitch 4 years ago

Blame 0001-vhost_user_protect_active_rings_from_async_ring_changes.patch

Blob History Raw

		c36cf18	`From patchwork Wed Jan 17 13:49:25 2018`
		c36cf18	`Content-Type: text/plain; charset="utf-8"`
		c36cf18	`MIME-Version: 1.0`
		c36cf18	`Content-Transfer-Encoding: 7bit`
		c36cf18	`Subject: [dpdk-dev,`
		c36cf18	`v5] vhost_user: protect active rings from async ring changes`
		c36cf18	`From: Victor Kaplansky <victork@redhat.com>`
		c36cf18	`X-Patchwork-Id: 33921`
		c36cf18	`X-Patchwork-Delegate: yuanhan.liu@linux.intel.com`
		c36cf18	`List-Id: dev.dpdk.org`
		c36cf18	`To: dev@dpdk.org`
		c36cf18	`Cc: stable@dpdk.org, Jens Freimann <jfreiman@redhat.com>,`
		c36cf18	`Maxime Coquelin <maxime.coquelin@redhat.com>,`
		c36cf18	`Yuanhan Liu <yliu@fridaylinux.org>, Tiwei Bie <tiwei.bie@intel.com>,`
		c36cf18	`"Tan, Jianfeng" <jianfeng.tan@intel.com>,`
		c36cf18	`Stephen Hemminger <stephen@networkplumber.org>,`
		c36cf18	`Victor Kaplansky <victork@redhat.com>`
		c36cf18	`Date: Wed, 17 Jan 2018 15:49:25 +0200`
		c36cf18
		c36cf18	`When performing live migration or memory hot-plugging,`
		c36cf18	`the changes to the device and vrings made by message handler`
		c36cf18	`done independently from vring usage by PMD threads.`
		c36cf18
		c36cf18	`This causes for example segfaults during live-migration`
		c36cf18	`with MQ enable, but in general virtually any request`
		c36cf18	`sent by qemu changing the state of device can cause`
		c36cf18	`problems.`
		c36cf18
		c36cf18	`These patches fixes all above issues by adding a spinlock`
		c36cf18	`to every vring and requiring message handler to start operation`
		c36cf18	`only after ensuring that all PMD threads related to the device`
		c36cf18	`are out of critical section accessing the vring data.`
		c36cf18
		c36cf18	`Each vring has its own lock in order to not create contention`
		c36cf18	`between PMD threads of different vrings and to prevent`
		c36cf18	`performance degradation by scaling queue pair number.`
		c36cf18
		c36cf18	`See https://bugzilla.redhat.com/show_bug.cgi?id=1450680`
		c36cf18
		c36cf18	`Signed-off-by: Victor Kaplansky <victork@redhat.com>`
		c36cf18	`Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>`
		c36cf18	`---`
		c36cf18	`v5:`
		c36cf18	`o get rid of spinlock wrapping functions in vhost.h`
		c36cf18
		c36cf18	`v4:`
		c36cf18
		c36cf18	`o moved access_unlock before accessing enable flag and`
		c36cf18	`access_unlock after iommu_unlock consistently.`
		c36cf18	`o cosmetics: removed blank line.`
		c36cf18	`o the access_lock variable moved to be in the same`
		c36cf18	`cache line with enable and access_ok flags.`
		c36cf18	`o dequeue path is now guarded with trylock and returning`
		c36cf18	`zero if unsuccessful.`
		c36cf18	`o GET_VRING_BASE operation is not guarded by access lock`
		c36cf18	`to avoid deadlock with device_destroy. See the comment`
		c36cf18	`in the code.`
		c36cf18	`o Fixed error path exit from enqueue and dequeue carefully`
		c36cf18	`unlocking access and iommu locks as appropriate.`
		c36cf18
		c36cf18	`v3:`
		c36cf18	`o Added locking to enqueue flow.`
		c36cf18	`o Enqueue path guarded as well as dequeue path.`
		c36cf18	`o Changed name of active_lock.`
		c36cf18	`o Added initialization of guarding spinlock.`
		c36cf18	`o Reworked functions skimming over all virt-queues.`
		c36cf18	`o Performance measurements done by Maxime Coquelin shows`
		c36cf18	`no degradation in bandwidth and throughput.`
		c36cf18	`o Spelling.`
		c36cf18	`o Taking lock only on set operations.`
		c36cf18	`o IOMMU messages are not guarded by access lock.`
		c36cf18
		c36cf18	`v2:`
		c36cf18	`o Fixed checkpatch complains.`
		c36cf18	`o Added Signed-off-by.`
		c36cf18	`o Refined placement of guard to exclude IOMMU messages.`
		c36cf18	`o TODO: performance degradation measurement.`
		c36cf18
		c36cf18	`dpdk-17.11/lib/librte_vhost/vhost.h \| 6 ++--`
		c36cf18	`dpdk-17.11/lib/librte_vhost/vhost.c \| 1 +`
		c36cf18	`dpdk-17.11/lib/librte_vhost/vhost_user.c \| 70 ++++++++++++++++++++++++++++++++`
		c36cf18	`dpdk-17.11/lib/librte_vhost/virtio_net.c \| 28 ++++++++++++++---`
		c36cf18	`4 files changed, 99 insertions(+), 6 deletions(-)`
		c36cf18
		c36cf18	`diff --git a/dpdk-17.11/lib/librte_vhost/vhost.h b/dpdk-17.11/lib/librte_vhost/vhost.h`
		c36cf18	`index 1cc81c17..c8f2a817 100644`
		c36cf18	`--- a/dpdk-17.11/lib/librte_vhost/vhost.h`
		c36cf18	`+++ b/dpdk-17.11/lib/librte_vhost/vhost.h`
		c36cf18	`@@ -108,12 +108,14 @@ struct vhost_virtqueue {`
		c36cf18
		c36cf18	`/* Backend value to determine if device should started/stopped */`
		c36cf18	`int backend;`
		c36cf18	`+ int enabled;`
		c36cf18	`+ int access_ok;`
		c36cf18	`+ rte_spinlock_t access_lock;`
		c36cf18	`+`
		c36cf18	`/* Used to notify the guest (trigger interrupt) */`
		c36cf18	`int callfd;`
		c36cf18	`/* Currently unused as polling mode is enabled */`
		c36cf18	`int kickfd;`
		c36cf18	`- int enabled;`
		c36cf18	`- int access_ok;`
		c36cf18
		c36cf18	`/* Physical address of used ring, for logging */`
		c36cf18	`uint64_t log_guest_addr;`
		c36cf18	`diff --git a/dpdk-17.11/lib/librte_vhost/vhost.c b/dpdk-17.11/lib/librte_vhost/vhost.c`
		c36cf18	`index 4f8b73a0..dcc42fc7 100644`
		c36cf18	`--- a/dpdk-17.11/lib/librte_vhost/vhost.c`
		c36cf18	`+++ b/dpdk-17.11/lib/librte_vhost/vhost.c`
		c36cf18	`@@ -259,6 +259,7 @@ alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)`
		c36cf18
		c36cf18	`dev->virtqueue[vring_idx] = vq;`
		c36cf18	`init_vring_queue(dev, vring_idx);`
		c36cf18	`+ rte_spinlock_init(&vq->access_lock);`
		c36cf18
		c36cf18	`dev->nr_vring += 1;`
		c36cf18
		c36cf18	`diff --git a/dpdk-17.11/lib/librte_vhost/vhost_user.c b/dpdk-17.11/lib/librte_vhost/vhost_user.c`
		c36cf18	`index f4c7ce46..0685d4e7 100644`
		c36cf18	`--- a/dpdk-17.11/lib/librte_vhost/vhost_user.c`
		c36cf18	`+++ b/dpdk-17.11/lib/librte_vhost/vhost_user.c`
		c36cf18	`@@ -1190,12 +1190,47 @@ vhost_user_check_and_alloc_queue_pair(struct virtio_net dev, VhostUserMsg msg)`
		c36cf18	`return alloc_vring_queue(dev, vring_idx);`
		c36cf18	`}`
		c36cf18
		c36cf18	`+static void`
		c36cf18	`+vhost_user_lock_all_queue_pairs(struct virtio_net *dev)`
		c36cf18	`+{`
		c36cf18	`+ unsigned int i = 0;`
		c36cf18	`+ unsigned int vq_num = 0;`
		c36cf18	`+`
		c36cf18	`+ while (vq_num < dev->nr_vring) {`
		c36cf18	`+ struct vhost_virtqueue *vq = dev->virtqueue[i];`
		c36cf18	`+`
		c36cf18	`+ if (vq) {`
		c36cf18	`+ rte_spinlock_lock(&vq->access_lock);`
		c36cf18	`+ vq_num++;`
		c36cf18	`+ }`
		c36cf18	`+ i++;`
		c36cf18	`+ }`
		c36cf18	`+}`
		c36cf18	`+`
		c36cf18	`+static void`
		c36cf18	`+vhost_user_unlock_all_queue_pairs(struct virtio_net *dev)`
		c36cf18	`+{`
		c36cf18	`+ unsigned int i = 0;`
		c36cf18	`+ unsigned int vq_num = 0;`
		c36cf18	`+`
		c36cf18	`+ while (vq_num < dev->nr_vring) {`
		c36cf18	`+ struct vhost_virtqueue *vq = dev->virtqueue[i];`
		c36cf18	`+`
		c36cf18	`+ if (vq) {`
		c36cf18	`+ rte_spinlock_unlock(&vq->access_lock);`
		c36cf18	`+ vq_num++;`
		c36cf18	`+ }`
		c36cf18	`+ i++;`
		c36cf18	`+ }`
		c36cf18	`+}`
		c36cf18	`+`
		c36cf18	`int`
		c36cf18	`vhost_user_msg_handler(int vid, int fd)`
		c36cf18	`{`
		c36cf18	`struct virtio_net *dev;`
		c36cf18	`struct VhostUserMsg msg;`
		c36cf18	`int ret;`
		c36cf18	`+ int unlock_required = 0;`
		c36cf18
		c36cf18	`dev = get_device(vid);`
		c36cf18	`if (dev == NULL)`
		c36cf18	`@@ -1241,6 +1276,38 @@ vhost_user_msg_handler(int vid, int fd)`
		c36cf18	`return -1;`
		c36cf18	`}`
		c36cf18
		c36cf18	`+ /*`
		c36cf18	`+ * Note: we don't lock all queues on VHOST_USER_GET_VRING_BASE,`
		c36cf18	`+ * since it is sent when virtio stops and device is destroyed.`
		c36cf18	`+ * destroy_device waits for queues to be inactive, so it is safe.`
		c36cf18	`+ * Otherwise taking the access_lock would cause a dead lock.`
		c36cf18	`+ */`
		c36cf18	`+ switch (msg.request.master) {`
		c36cf18	`+ case VHOST_USER_SET_FEATURES:`
		c36cf18	`+ case VHOST_USER_SET_PROTOCOL_FEATURES:`
		c36cf18	`+ case VHOST_USER_SET_OWNER:`
		c36cf18	`+ case VHOST_USER_RESET_OWNER:`
		c36cf18	`+ case VHOST_USER_SET_MEM_TABLE:`
		c36cf18	`+ case VHOST_USER_SET_LOG_BASE:`
		c36cf18	`+ case VHOST_USER_SET_LOG_FD:`
		c36cf18	`+ case VHOST_USER_SET_VRING_NUM:`
		c36cf18	`+ case VHOST_USER_SET_VRING_ADDR:`
		c36cf18	`+ case VHOST_USER_SET_VRING_BASE:`
		c36cf18	`+ case VHOST_USER_SET_VRING_KICK:`
		c36cf18	`+ case VHOST_USER_SET_VRING_CALL:`
		c36cf18	`+ case VHOST_USER_SET_VRING_ERR:`
		c36cf18	`+ case VHOST_USER_SET_VRING_ENABLE:`
		c36cf18	`+ case VHOST_USER_SEND_RARP:`
		c36cf18	`+ case VHOST_USER_NET_SET_MTU:`
		c36cf18	`+ case VHOST_USER_SET_SLAVE_REQ_FD:`
		c36cf18	`+ vhost_user_lock_all_queue_pairs(dev);`
		c36cf18	`+ unlock_required = 1;`
		c36cf18	`+ break;`
		c36cf18	`+ default:`
		c36cf18	`+ break;`
		c36cf18	`+`
		c36cf18	`+ }`
		c36cf18	`+`
		c36cf18	`switch (msg.request.master) {`
		c36cf18	`case VHOST_USER_GET_FEATURES:`
		c36cf18	`msg.payload.u64 = vhost_user_get_features(dev);`
		c36cf18	`@@ -1342,6 +1409,9 @@ vhost_user_msg_handler(int vid, int fd)`
		c36cf18
		c36cf18	`}`
		c36cf18
		c36cf18	`+ if (unlock_required)`
		c36cf18	`+ vhost_user_unlock_all_queue_pairs(dev);`
		c36cf18	`+`
		c36cf18	`if (msg.flags & VHOST_USER_NEED_REPLY) {`
		c36cf18	`msg.payload.u64 = !!ret;`
		c36cf18	`msg.size = sizeof(msg.payload.u64);`
		c36cf18	`diff --git a/dpdk-17.11/lib/librte_vhost/virtio_net.c b/dpdk-17.11/lib/librte_vhost/virtio_net.c`
		c36cf18	`index 6fee16e5..e09a927d 100644`
		c36cf18	`--- a/dpdk-17.11/lib/librte_vhost/virtio_net.c`
		c36cf18	`+++ b/dpdk-17.11/lib/librte_vhost/virtio_net.c`
		c36cf18	`@@ -44,6 +44,7 @@`
		c36cf18	`#include <rte_udp.h>`
		c36cf18	`#include <rte_sctp.h>`
		c36cf18	`#include <rte_arp.h>`
		c36cf18	`+#include <rte_spinlock.h>`
		c36cf18
		c36cf18	`#include "iotlb.h"`
		c36cf18	`#include "vhost.h"`
		c36cf18	`@@ -326,8 +327,11 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,`
		c36cf18	`}`
		c36cf18
		c36cf18	`vq = dev->virtqueue[queue_id];`
		c36cf18	`+`
		c36cf18	`+ rte_spinlock_lock(&vq->access_lock);`
		c36cf18	`+`
		c36cf18	`if (unlikely(vq->enabled == 0))`
		c36cf18	`- return 0;`
		c36cf18	`+ goto out_access_unlock;`
		c36cf18
		c36cf18	`if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))`
		c36cf18	`vhost_user_iotlb_rd_lock(vq);`
		c36cf18	`@@ -419,6 +423,9 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,`
		c36cf18	`if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))`
		c36cf18	`vhost_user_iotlb_rd_unlock(vq);`
		c36cf18
		c36cf18	`+out_access_unlock:`
		c36cf18	`+ rte_spinlock_unlock(&vq->access_lock);`
		c36cf18	`+`
		c36cf18	`return count;`
		c36cf18	`}`
		c36cf18
		c36cf18	`@@ -651,8 +658,11 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,`
		c36cf18	`}`
		c36cf18
		c36cf18	`vq = dev->virtqueue[queue_id];`
		c36cf18	`+`
		c36cf18	`+ rte_spinlock_lock(&vq->access_lock);`
		c36cf18	`+`
		c36cf18	`if (unlikely(vq->enabled == 0))`
		c36cf18	`- return 0;`
		c36cf18	`+ goto out_access_unlock;`
		c36cf18
		c36cf18	`if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))`
		c36cf18	`vhost_user_iotlb_rd_lock(vq);`
		c36cf18	`@@ -715,6 +725,9 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,`
		c36cf18	`if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))`
		c36cf18	`vhost_user_iotlb_rd_unlock(vq);`
		c36cf18
		c36cf18	`+out_access_unlock:`
		c36cf18	`+ rte_spinlock_unlock(&vq->access_lock);`
		c36cf18	`+`
		c36cf18	`return pkt_idx;`
		c36cf18	`}`
		c36cf18
		c36cf18	`@@ -1180,9 +1193,13 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,`
		c36cf18	`}`
		c36cf18
		c36cf18	`vq = dev->virtqueue[queue_id];`
		c36cf18	`- if (unlikely(vq->enabled == 0))`
		c36cf18	`+`
		c36cf18	`+ if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))`
		c36cf18	`return 0;`
		c36cf18
		c36cf18	`+ if (unlikely(vq->enabled == 0))`
		c36cf18	`+ goto out_access_unlock;`
		c36cf18	`+`
		c36cf18	`vq->batch_copy_nb_elems = 0;`
		c36cf18
		c36cf18	`if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))`
		c36cf18	`@@ -1240,7 +1257,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,`
		c36cf18	`if (rarp_mbuf == NULL) {`
		c36cf18	`RTE_LOG(ERR, VHOST_DATA,`
		c36cf18	`"Failed to allocate memory for mbuf.\n");`
		c36cf18	`- return 0;`
		c36cf18	`+ goto out;`
		c36cf18	`}`
		c36cf18
		c36cf18	`if (make_rarp_packet(rarp_mbuf, &dev->mac)) {`
		c36cf18	`@@ -1356,6 +1373,9 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,`
		c36cf18	`if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))`
		c36cf18	`vhost_user_iotlb_rd_unlock(vq);`
		c36cf18
		c36cf18	`+out_access_unlock:`
		c36cf18	`+ rte_spinlock_unlock(&vq->access_lock);`
		c36cf18	`+`
		c36cf18	`if (unlikely(rarp_mbuf != NULL)) {`
		c36cf18	`/*`
		c36cf18	`* Inject it to the head of "pkts" array, so that switch's mac`

benesv / rpms / openvswitch

Source Code

Blame 0001-vhost_user_protect_active_rings_from_async_ring_changes.patch