RDMA/ipoib: Remove racy Subnet Manager sendonly join checks

When a system receives a REREG event from the SM, then the SM information
in the kernel is marked as invalid and a request is sent to the SM to
update the information. The SM information is invalid in that time period.

However, receiving a REREG also occurs simultaneously in user space
applications that are now trying to rejoin the multicast groups. Some of
those may be sendonly multicast groups which are then failing.

If the SM information is invalid then ib_sa_sendonly_fullmem_support()
returns false. That is wrong because it just means that we do not know yet
if the potentially new SM supports sendonly joins.

Sendonly join was introduced in 2015 and all the Subnet managers have
supported it ever since. So there is no point in checking if a subnet
manager supports it.

Should an old opensm get a request for a sendonly join then the request
will fail. The code that is removed here accomodated that situation and
fell back to a full join.

Falling back to a full join is problematic in itself. The reason to use
the sendonly join was to reduce the traffic on the Infiniband fabric
otherwise one could have just stayed with the regular join.  So this patch
may cause users of very old opensms to discover that lots of traffic
needlessly crosses their IB fabrics.

Link: https://lore.kernel.org/r/alpine.DEB.2.22.394.2101281845160.13303@www.lameter.com
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
Christoph Lameter 2021-01-28 18:46:47 +00:00 коммит произвёл Jason Gunthorpe
Родитель c70f51de85
Коммит 633d610212
6 изменённых файлов: 1 добавлений и 54 удалений

Просмотреть файл

@ -4542,17 +4542,6 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
rec.join_state = mc->join_state; rec.join_state = mc->join_state;
if ((rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) &&
(!ib_sa_sendonly_fullmem_support(&sa_client,
id_priv->id.device,
id_priv->id.port_num))) {
dev_warn(
&id_priv->id.device->dev,
"RDMA CM: port %u Unable to multicast join: SM doesn't support Send Only Full Member option\n",
id_priv->id.port_num);
return -EOPNOTSUPP;
}
comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL | IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |

Просмотреть файл

@ -1951,30 +1951,6 @@ err1:
} }
EXPORT_SYMBOL(ib_sa_guid_info_rec_query); EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client,
struct ib_device *device,
u8 port_num)
{
struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
struct ib_sa_port *port;
bool ret = false;
unsigned long flags;
if (!sa_dev)
return ret;
port = &sa_dev->port[port_num - sa_dev->start_port];
spin_lock_irqsave(&port->classport_lock, flags);
if ((port->classport_info.valid) &&
(port->classport_info.data.type == RDMA_CLASS_PORT_INFO_IB))
ret = ib_get_cpi_capmask2(&port->classport_info.data.ib)
& IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT;
spin_unlock_irqrestore(&port->classport_lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_sa_sendonly_fullmem_support);
struct ib_classport_info_context { struct ib_classport_info_context {
struct completion done; struct completion done;
struct ib_sa_query *sa_query; struct ib_sa_query *sa_query;

Просмотреть файл

@ -413,7 +413,6 @@ struct ipoib_dev_priv {
u64 hca_caps; u64 hca_caps;
struct ipoib_ethtool_st ethtool; struct ipoib_ethtool_st ethtool;
unsigned int max_send_sge; unsigned int max_send_sge;
bool sm_fullmember_sendonly_support;
const struct net_device_ops *rn_ops; const struct net_device_ops *rn_ops;
}; };

Просмотреть файл

@ -141,8 +141,6 @@ int ipoib_open(struct net_device *dev)
set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
priv->sm_fullmember_sendonly_support = false;
if (ipoib_ib_dev_open(dev)) { if (ipoib_ib_dev_open(dev)) {
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
return 0; return 0;

Просмотреть файл

@ -333,15 +333,6 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
ipoib_dbg(priv, "Keeping carrier off until IB port is active\n"); ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
return; return;
} }
/*
* Check if can send sendonly MCG's with sendonly-fullmember join state.
* It done here after the successfully join to the broadcast group,
* because the broadcast group must always be joined first and is always
* re-joined if the SM changes substantially.
*/
priv->sm_fullmember_sendonly_support =
ib_sa_sendonly_fullmem_support(&ipoib_sa_client,
priv->ca, priv->port);
/* /*
* Take rtnl_lock to avoid racing with ipoib_stop() and * Take rtnl_lock to avoid racing with ipoib_stop() and
* turning the carrier back on while a device is being * turning the carrier back on while a device is being
@ -537,9 +528,7 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
* most closely emulates the behavior, from a user space * most closely emulates the behavior, from a user space
* application perspective, of Ethernet multicast operation. * application perspective, of Ethernet multicast operation.
*/ */
if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) && if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
priv->sm_fullmember_sendonly_support)
/* SM supports sendonly-fullmember, otherwise fallback to full-member */
rec.join_state = SENDONLY_FULLMEMBER_JOIN; rec.join_state = SENDONLY_FULLMEMBER_JOIN;
} }
spin_unlock_irq(&priv->lock); spin_unlock_irq(&priv->lock);

Просмотреть файл

@ -547,10 +547,6 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
void *context), void *context),
void *context, struct ib_sa_query **sa_query); void *context, struct ib_sa_query **sa_query);
bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client,
struct ib_device *device,
u8 port_num);
static inline bool sa_path_is_roce(struct sa_path_rec *rec) static inline bool sa_path_is_roce(struct sa_path_rec *rec)
{ {
return ((rec->rec_type == SA_PATH_REC_TYPE_ROCE_V1) || return ((rec->rec_type == SA_PATH_REC_TYPE_ROCE_V1) ||