RDMA/rxe: Use acquire/release for memory ordering
Change work and completion queues to use smp_load_acquire() and smp_store_release() to synchronize between driver and users. This commit goes with a matching series of commits in the rxe user space provider. Link: https://lore.kernel.org/r/20201210174258.5234-1-rpearson@hpe.com Signed-off-by: Bob Pearson <rpearson@hpe.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
Родитель
d8cc403b70
Коммит
d21a1240f5
|
@ -123,11 +123,6 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
|
|||
|
||||
memcpy(producer_addr(cq->queue), cqe, sizeof(*cqe));
|
||||
|
||||
/* make sure all changes to the CQ are written before we update the
|
||||
* producer pointer
|
||||
*/
|
||||
smp_wmb();
|
||||
|
||||
advance_producer(cq->queue);
|
||||
spin_unlock_irqrestore(&cq->cq_lock, flags);
|
||||
|
||||
|
|
|
@ -7,9 +7,11 @@
|
|||
#ifndef RXE_QUEUE_H
|
||||
#define RXE_QUEUE_H
|
||||
|
||||
/* for definition of shared struct rxe_queue_buf */
|
||||
#include <uapi/rdma/rdma_user_rxe.h>
|
||||
|
||||
/* implements a simple circular buffer that can optionally be
|
||||
* shared between user space and the kernel and can be resized
|
||||
|
||||
* the requested element size is rounded up to a power of 2
|
||||
* and the number of elements in the buffer is also rounded
|
||||
* up to a power of 2. Since the queue is empty when the
|
||||
|
@ -17,28 +19,6 @@
|
|||
* of the queue is one less than the number of element slots
|
||||
*/
|
||||
|
||||
/* this data structure is shared between user space and kernel
|
||||
* space for those cases where the queue is shared. It contains
|
||||
* the producer and consumer indices. Is also contains a copy
|
||||
* of the queue size parameters for user space to use but the
|
||||
* kernel must use the parameters in the rxe_queue struct
|
||||
* this MUST MATCH the corresponding librxe struct
|
||||
* for performance reasons arrange to have producer and consumer
|
||||
* pointers in separate cache lines
|
||||
* the kernel should always mask the indices to avoid accessing
|
||||
* memory outside of the data area
|
||||
*/
|
||||
struct rxe_queue_buf {
|
||||
__u32 log2_elem_size;
|
||||
__u32 index_mask;
|
||||
__u32 pad_1[30];
|
||||
__u32 producer_index;
|
||||
__u32 pad_2[31];
|
||||
__u32 consumer_index;
|
||||
__u32 pad_3[31];
|
||||
__u8 data[];
|
||||
};
|
||||
|
||||
struct rxe_queue {
|
||||
struct rxe_dev *rxe;
|
||||
struct rxe_queue_buf *buf;
|
||||
|
@ -46,7 +26,7 @@ struct rxe_queue {
|
|||
size_t buf_size;
|
||||
size_t elem_size;
|
||||
unsigned int log2_elem_size;
|
||||
unsigned int index_mask;
|
||||
u32 index_mask;
|
||||
};
|
||||
|
||||
int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf,
|
||||
|
@ -76,26 +56,56 @@ static inline int next_index(struct rxe_queue *q, int index)
|
|||
|
||||
static inline int queue_empty(struct rxe_queue *q)
|
||||
{
|
||||
return ((q->buf->producer_index - q->buf->consumer_index)
|
||||
& q->index_mask) == 0;
|
||||
u32 prod;
|
||||
u32 cons;
|
||||
|
||||
/* make sure all changes to queue complete before
|
||||
* testing queue empty
|
||||
*/
|
||||
prod = smp_load_acquire(&q->buf->producer_index);
|
||||
/* same */
|
||||
cons = smp_load_acquire(&q->buf->consumer_index);
|
||||
|
||||
return ((prod - cons) & q->index_mask) == 0;
|
||||
}
|
||||
|
||||
static inline int queue_full(struct rxe_queue *q)
|
||||
{
|
||||
return ((q->buf->producer_index + 1 - q->buf->consumer_index)
|
||||
& q->index_mask) == 0;
|
||||
u32 prod;
|
||||
u32 cons;
|
||||
|
||||
/* make sure all changes to queue complete before
|
||||
* testing queue full
|
||||
*/
|
||||
prod = smp_load_acquire(&q->buf->producer_index);
|
||||
/* same */
|
||||
cons = smp_load_acquire(&q->buf->consumer_index);
|
||||
|
||||
return ((prod + 1 - cons) & q->index_mask) == 0;
|
||||
}
|
||||
|
||||
static inline void advance_producer(struct rxe_queue *q)
|
||||
{
|
||||
q->buf->producer_index = (q->buf->producer_index + 1)
|
||||
& q->index_mask;
|
||||
u32 prod;
|
||||
|
||||
prod = (q->buf->producer_index + 1) & q->index_mask;
|
||||
|
||||
/* make sure all changes to queue complete before
|
||||
* changing producer index
|
||||
*/
|
||||
smp_store_release(&q->buf->producer_index, prod);
|
||||
}
|
||||
|
||||
static inline void advance_consumer(struct rxe_queue *q)
|
||||
{
|
||||
q->buf->consumer_index = (q->buf->consumer_index + 1)
|
||||
& q->index_mask;
|
||||
u32 cons;
|
||||
|
||||
cons = (q->buf->consumer_index + 1) & q->index_mask;
|
||||
|
||||
/* make sure all changes to queue complete before
|
||||
* changing consumer index
|
||||
*/
|
||||
smp_store_release(&q->buf->consumer_index, cons);
|
||||
}
|
||||
|
||||
static inline void *producer_addr(struct rxe_queue *q)
|
||||
|
@ -112,12 +122,28 @@ static inline void *consumer_addr(struct rxe_queue *q)
|
|||
|
||||
static inline unsigned int producer_index(struct rxe_queue *q)
|
||||
{
|
||||
return q->buf->producer_index;
|
||||
u32 index;
|
||||
|
||||
/* make sure all changes to queue
|
||||
* complete before getting producer index
|
||||
*/
|
||||
index = smp_load_acquire(&q->buf->producer_index);
|
||||
index &= q->index_mask;
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
static inline unsigned int consumer_index(struct rxe_queue *q)
|
||||
{
|
||||
return q->buf->consumer_index;
|
||||
u32 index;
|
||||
|
||||
/* make sure all changes to queue
|
||||
* complete before getting consumer index
|
||||
*/
|
||||
index = smp_load_acquire(&q->buf->consumer_index);
|
||||
index &= q->index_mask;
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
static inline void *addr_from_index(struct rxe_queue *q, unsigned int index)
|
||||
|
|
|
@ -244,11 +244,6 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
|
|||
recv_wqe->dma.cur_sge = 0;
|
||||
recv_wqe->dma.sge_offset = 0;
|
||||
|
||||
/* make sure all changes to the work queue are written before we
|
||||
* update the producer pointer
|
||||
*/
|
||||
smp_wmb();
|
||||
|
||||
advance_producer(rq->queue);
|
||||
return 0;
|
||||
|
||||
|
@ -633,12 +628,6 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
|
|||
if (unlikely(err))
|
||||
goto err1;
|
||||
|
||||
/*
|
||||
* make sure all changes to the work queue are
|
||||
* written before we update the producer pointer
|
||||
*/
|
||||
smp_wmb();
|
||||
|
||||
advance_producer(sq->queue);
|
||||
spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
|
||||
|
||||
|
|
|
@ -181,4 +181,25 @@ struct rxe_modify_srq_cmd {
|
|||
__aligned_u64 mmap_info_addr;
|
||||
};
|
||||
|
||||
/* This data structure is stored at the base of work and
|
||||
* completion queues shared between user space and kernel space.
|
||||
* It contains the producer and consumer indices. Is also
|
||||
* contains a copy of the queue size parameters for user space
|
||||
* to use but the kernel must use the parameters in the
|
||||
* rxe_queue struct. For performance reasons arrange to have
|
||||
* producer and consumer indices in separate cache lines
|
||||
* the kernel should always mask the indices to avoid accessing
|
||||
* memory outside of the data area
|
||||
*/
|
||||
struct rxe_queue_buf {
|
||||
__u32 log2_elem_size;
|
||||
__u32 index_mask;
|
||||
__u32 pad_1[30];
|
||||
__u32 producer_index;
|
||||
__u32 pad_2[31];
|
||||
__u32 consumer_index;
|
||||
__u32 pad_3[31];
|
||||
__u8 data[];
|
||||
};
|
||||
|
||||
#endif /* RDMA_USER_RXE_H */
|
||||
|
|
Загрузка…
Ссылка в новой задаче