Vladimir Oltean says:

====================
Drop rtnl_lock from DSA .port_fdb_{add,del}

As mentioned in the RFC posted 2 months ago:
https://patchwork.kernel.org/project/netdevbpf/cover/20210824114049.3814660-1-vladimir.oltean@nxp.com/

DSA is transitioning to a driver API where the rtnl_lock is not held
when calling ds->ops->port_fdb_add() and ds->ops->port_fdb_del().
Drivers cannot take that lock privately from those callbacks either.

This change is required so that DSA can wait for switchdev FDB work
items to finish before leaving the bridge. That change will be made in a
future patch series.

A small selftest is provided with the patch set in the hope that
concurrency issues uncovered by this series, but not spotted by me by
code inspection, will be caught.

A status of the existing drivers:

- mv88e6xxx_port_fdb_add() and mv88e6xxx_port_fdb_del() take
  mv88e6xxx_reg_lock() so they should be safe.

- qca8k_fdb_add() and qca8k_fdb_del() take mutex_lock(&priv->reg_mutex)
  so they should be safe.

- hellcreek_fdb_add() and hellcreek_fdb_add() take mutex_lock(&hellcreek->reg_lock)
  so they should be safe.

- ksz9477_port_fdb_add() and ksz9477_port_fdb_del() take mutex_lock(&dev->alu_mutex)
  so they should be safe.

- b53_fdb_add() and b53_fdb_del() did not have locking, so I've added a
  scheme based on my own judgement there (not tested).

- felix_fdb_add() and felix_fdb_del() did not have locking, I've added
  and tested a locking scheme there.

- mt7530_port_fdb_add() and mt7530_port_fdb_del() take
  mutex_lock(&priv->reg_mutex), so they should be safe.

- gswip_port_fdb() did not have locking, so I've added a non-expert
  locking scheme based on my own judgement (not tested).

- lan9303_alr_add_port() and lan9303_alr_del_port() take
  mutex_lock(&chip->alr_mutex) so they should be safe.

- sja1105_fdb_add() and sja1105_fdb_del() did not have locking, I've
  added and tested a locking scheme.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2021-10-24 13:47:45 +01:00
Родитель 4d98bb0d7e edc90d1585
Коммит 965e6b262f
15 изменённых файлов: 283 добавлений и 74 удалений

Просмотреть файл

@ -13056,6 +13056,7 @@ F: include/linux/dsa/
F: include/linux/platform_data/dsa.h
F: include/net/dsa.h
F: net/dsa/
F: tools/testing/selftests/drivers/net/dsa/
NETWORKING [GENERAL]
M: "David S. Miller" <davem@davemloft.net>

Просмотреть файл

@ -1546,6 +1546,7 @@ EXPORT_SYMBOL(b53_vlan_del);
/* Address Resolution Logic routines */
static int b53_arl_op_wait(struct b53_device *dev)
__must_hold(&dev->arl_mutex)
{
unsigned int timeout = 10;
u8 reg;
@ -1564,6 +1565,7 @@ static int b53_arl_op_wait(struct b53_device *dev)
}
static int b53_arl_rw_op(struct b53_device *dev, unsigned int op)
__must_hold(&dev->arl_mutex)
{
u8 reg;
@ -1587,6 +1589,7 @@ static int b53_arl_rw_op(struct b53_device *dev, unsigned int op)
static int b53_arl_read(struct b53_device *dev, u64 mac,
u16 vid, struct b53_arl_entry *ent, u8 *idx)
__must_hold(&dev->arl_mutex)
{
DECLARE_BITMAP(free_bins, B53_ARLTBL_MAX_BIN_ENTRIES);
unsigned int i;
@ -1632,6 +1635,7 @@ static int b53_arl_read(struct b53_device *dev, u64 mac,
static int b53_arl_op(struct b53_device *dev, int op, int port,
const unsigned char *addr, u16 vid, bool is_valid)
__must_hold(&dev->arl_mutex)
{
struct b53_arl_entry ent;
u32 fwd_entry;
@ -1709,6 +1713,7 @@ int b53_fdb_add(struct dsa_switch *ds, int port,
const unsigned char *addr, u16 vid)
{
struct b53_device *priv = ds->priv;
int ret;
/* 5325 and 5365 require some more massaging, but could
* be supported eventually
@ -1716,7 +1721,11 @@ int b53_fdb_add(struct dsa_switch *ds, int port,
if (is5325(priv) || is5365(priv))
return -EOPNOTSUPP;
return b53_arl_op(priv, 0, port, addr, vid, true);
mutex_lock(&priv->arl_mutex);
ret = b53_arl_op(priv, 0, port, addr, vid, true);
mutex_unlock(&priv->arl_mutex);
return ret;
}
EXPORT_SYMBOL(b53_fdb_add);
@ -1724,12 +1733,18 @@ int b53_fdb_del(struct dsa_switch *ds, int port,
const unsigned char *addr, u16 vid)
{
struct b53_device *priv = ds->priv;
int ret;
return b53_arl_op(priv, 0, port, addr, vid, false);
mutex_lock(&priv->arl_mutex);
ret = b53_arl_op(priv, 0, port, addr, vid, false);
mutex_unlock(&priv->arl_mutex);
return ret;
}
EXPORT_SYMBOL(b53_fdb_del);
static int b53_arl_search_wait(struct b53_device *dev)
__must_hold(&dev->arl_mutex)
{
unsigned int timeout = 1000;
u8 reg;
@ -1750,6 +1765,7 @@ static int b53_arl_search_wait(struct b53_device *dev)
static void b53_arl_search_rd(struct b53_device *dev, u8 idx,
struct b53_arl_entry *ent)
__must_hold(&dev->arl_mutex)
{
u64 mac_vid;
u32 fwd_entry;
@ -1782,6 +1798,8 @@ int b53_fdb_dump(struct dsa_switch *ds, int port,
int ret;
u8 reg;
mutex_lock(&priv->arl_mutex);
/* Start search operation */
reg = ARL_SRCH_STDN;
b53_write8(priv, B53_ARLIO_PAGE, B53_ARL_SRCH_CTL, reg);
@ -1789,18 +1807,18 @@ int b53_fdb_dump(struct dsa_switch *ds, int port,
do {
ret = b53_arl_search_wait(priv);
if (ret)
return ret;
break;
b53_arl_search_rd(priv, 0, &results[0]);
ret = b53_fdb_copy(port, &results[0], cb, data);
if (ret)
return ret;
break;
if (priv->num_arl_bins > 2) {
b53_arl_search_rd(priv, 1, &results[1]);
ret = b53_fdb_copy(port, &results[1], cb, data);
if (ret)
return ret;
break;
if (!results[0].is_valid && !results[1].is_valid)
break;
@ -1808,6 +1826,8 @@ int b53_fdb_dump(struct dsa_switch *ds, int port,
} while (count++ < b53_max_arl_entries(priv) / 2);
mutex_unlock(&priv->arl_mutex);
return 0;
}
EXPORT_SYMBOL(b53_fdb_dump);
@ -1816,6 +1836,7 @@ int b53_mdb_add(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_mdb *mdb)
{
struct b53_device *priv = ds->priv;
int ret;
/* 5325 and 5365 require some more massaging, but could
* be supported eventually
@ -1823,7 +1844,11 @@ int b53_mdb_add(struct dsa_switch *ds, int port,
if (is5325(priv) || is5365(priv))
return -EOPNOTSUPP;
return b53_arl_op(priv, 0, port, mdb->addr, mdb->vid, true);
mutex_lock(&priv->arl_mutex);
ret = b53_arl_op(priv, 0, port, mdb->addr, mdb->vid, true);
mutex_unlock(&priv->arl_mutex);
return ret;
}
EXPORT_SYMBOL(b53_mdb_add);
@ -1833,7 +1858,9 @@ int b53_mdb_del(struct dsa_switch *ds, int port,
struct b53_device *priv = ds->priv;
int ret;
mutex_lock(&priv->arl_mutex);
ret = b53_arl_op(priv, 0, port, mdb->addr, mdb->vid, false);
mutex_unlock(&priv->arl_mutex);
if (ret)
dev_err(ds->dev, "failed to delete MDB entry\n");
@ -2670,6 +2697,7 @@ struct b53_device *b53_switch_alloc(struct device *base,
mutex_init(&dev->reg_mutex);
mutex_init(&dev->stats_mutex);
mutex_init(&dev->arl_mutex);
return dev;
}

Просмотреть файл

@ -107,6 +107,7 @@ struct b53_device {
struct mutex reg_mutex;
struct mutex stats_mutex;
struct mutex arl_mutex;
const struct b53_io_ops *ops;
/* chip specific data */

Просмотреть файл

@ -276,6 +276,7 @@ struct gswip_priv {
int num_gphy_fw;
struct gswip_gphy_fw *gphy_fw;
u32 port_vlan_filter;
struct mutex pce_table_lock;
};
struct gswip_pce_table_entry {
@ -523,10 +524,14 @@ static int gswip_pce_table_entry_read(struct gswip_priv *priv,
u16 addr_mode = tbl->key_mode ? GSWIP_PCE_TBL_CTRL_OPMOD_KSRD :
GSWIP_PCE_TBL_CTRL_OPMOD_ADRD;
mutex_lock(&priv->pce_table_lock);
err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL,
GSWIP_PCE_TBL_CTRL_BAS);
if (err)
if (err) {
mutex_unlock(&priv->pce_table_lock);
return err;
}
gswip_switch_w(priv, tbl->index, GSWIP_PCE_TBL_ADDR);
gswip_switch_mask(priv, GSWIP_PCE_TBL_CTRL_ADDR_MASK |
@ -536,8 +541,10 @@ static int gswip_pce_table_entry_read(struct gswip_priv *priv,
err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL,
GSWIP_PCE_TBL_CTRL_BAS);
if (err)
if (err) {
mutex_unlock(&priv->pce_table_lock);
return err;
}
for (i = 0; i < ARRAY_SIZE(tbl->key); i++)
tbl->key[i] = gswip_switch_r(priv, GSWIP_PCE_TBL_KEY(i));
@ -553,6 +560,8 @@ static int gswip_pce_table_entry_read(struct gswip_priv *priv,
tbl->valid = !!(crtl & GSWIP_PCE_TBL_CTRL_VLD);
tbl->gmap = (crtl & GSWIP_PCE_TBL_CTRL_GMAP_MASK) >> 7;
mutex_unlock(&priv->pce_table_lock);
return 0;
}
@ -565,10 +574,14 @@ static int gswip_pce_table_entry_write(struct gswip_priv *priv,
u16 addr_mode = tbl->key_mode ? GSWIP_PCE_TBL_CTRL_OPMOD_KSWR :
GSWIP_PCE_TBL_CTRL_OPMOD_ADWR;
mutex_lock(&priv->pce_table_lock);
err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL,
GSWIP_PCE_TBL_CTRL_BAS);
if (err)
if (err) {
mutex_unlock(&priv->pce_table_lock);
return err;
}
gswip_switch_w(priv, tbl->index, GSWIP_PCE_TBL_ADDR);
gswip_switch_mask(priv, GSWIP_PCE_TBL_CTRL_ADDR_MASK |
@ -600,8 +613,12 @@ static int gswip_pce_table_entry_write(struct gswip_priv *priv,
crtl |= GSWIP_PCE_TBL_CTRL_BAS;
gswip_switch_w(priv, crtl, GSWIP_PCE_TBL_CTRL);
return gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL,
GSWIP_PCE_TBL_CTRL_BAS);
err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL,
GSWIP_PCE_TBL_CTRL_BAS);
mutex_unlock(&priv->pce_table_lock);
return err;
}
/* Add the LAN port into a bridge with the CPU port by
@ -2106,6 +2123,7 @@ static int gswip_probe(struct platform_device *pdev)
priv->ds->priv = priv;
priv->ds->ops = priv->hw_info->ops;
priv->dev = dev;
mutex_init(&priv->pce_table_lock);
version = gswip_switch_r(priv, GSWIP_VERSION);
np = dev->of_node;

Просмотреть файл

@ -261,6 +261,8 @@ struct sja1105_private {
* the switch doesn't confuse them with one another.
*/
struct mutex mgmt_lock;
/* Serializes access to the dynamic config interface */
struct mutex dynamic_config_lock;
struct devlink_region **regions;
struct sja1105_cbs_entry *cbs;
struct mii_bus *mdio_base_t1;

Просмотреть файл

@ -1170,6 +1170,56 @@ const struct sja1105_dynamic_table_ops sja1110_dyn_ops[BLK_IDX_MAX_DYN] = {
},
};
#define SJA1105_DYNAMIC_CONFIG_SLEEP_US 10
#define SJA1105_DYNAMIC_CONFIG_TIMEOUT_US 100000
static int
sja1105_dynamic_config_poll_valid(struct sja1105_private *priv,
struct sja1105_dyn_cmd *cmd,
const struct sja1105_dynamic_table_ops *ops)
{
u8 packed_buf[SJA1105_MAX_DYN_CMD_SIZE] = {};
int rc;
/* We don't _need_ to read the full entry, just the command area which
* is a fixed SJA1105_SIZE_DYN_CMD. But our cmd_packing() API expects a
* buffer that contains the full entry too. Additionally, our API
* doesn't really know how many bytes into the buffer does the command
* area really begin. So just read back the whole entry.
*/
rc = sja1105_xfer_buf(priv, SPI_READ, ops->addr, packed_buf,
ops->packed_size);
if (rc)
return rc;
/* Unpack the command structure, and return it to the caller in case it
* needs to perform further checks on it (VALIDENT).
*/
memset(cmd, 0, sizeof(*cmd));
ops->cmd_packing(packed_buf, cmd, UNPACK);
/* Hardware hasn't cleared VALID => still working on it */
return cmd->valid ? -EAGAIN : 0;
}
/* Poll the dynamic config entry's control area until the hardware has
* cleared the VALID bit, which means we have confirmation that it has
* finished processing the command.
*/
static int
sja1105_dynamic_config_wait_complete(struct sja1105_private *priv,
struct sja1105_dyn_cmd *cmd,
const struct sja1105_dynamic_table_ops *ops)
{
int rc;
return read_poll_timeout(sja1105_dynamic_config_poll_valid,
rc, rc != -EAGAIN,
SJA1105_DYNAMIC_CONFIG_SLEEP_US,
SJA1105_DYNAMIC_CONFIG_TIMEOUT_US,
false, priv, cmd, ops);
}
/* Provides read access to the settings through the dynamic interface
* of the switch.
* @blk_idx is used as key to select from the sja1105_dynamic_table_ops.
@ -1196,7 +1246,6 @@ int sja1105_dynamic_config_read(struct sja1105_private *priv,
struct sja1105_dyn_cmd cmd = {0};
/* SPI payload buffer */
u8 packed_buf[SJA1105_MAX_DYN_CMD_SIZE] = {0};
int retries = 3;
int rc;
if (blk_idx >= BLK_IDX_MAX_DYN)
@ -1234,33 +1283,21 @@ int sja1105_dynamic_config_read(struct sja1105_private *priv,
ops->entry_packing(packed_buf, entry, PACK);
/* Send SPI write operation: read config table entry */
mutex_lock(&priv->dynamic_config_lock);
rc = sja1105_xfer_buf(priv, SPI_WRITE, ops->addr, packed_buf,
ops->packed_size);
if (rc < 0) {
mutex_unlock(&priv->dynamic_config_lock);
return rc;
}
rc = sja1105_dynamic_config_wait_complete(priv, &cmd, ops);
mutex_unlock(&priv->dynamic_config_lock);
if (rc < 0)
return rc;
/* Loop until we have confirmation that hardware has finished
* processing the command and has cleared the VALID field
*/
do {
memset(packed_buf, 0, ops->packed_size);
/* Retrieve the read operation's result */
rc = sja1105_xfer_buf(priv, SPI_READ, ops->addr, packed_buf,
ops->packed_size);
if (rc < 0)
return rc;
cmd = (struct sja1105_dyn_cmd) {0};
ops->cmd_packing(packed_buf, &cmd, UNPACK);
if (!cmd.valident && !(ops->access & OP_VALID_ANYWAY))
return -ENOENT;
cpu_relax();
} while (cmd.valid && --retries);
if (cmd.valid)
return -ETIMEDOUT;
if (!cmd.valident && !(ops->access & OP_VALID_ANYWAY))
return -ENOENT;
/* Don't dereference possibly NULL pointer - maybe caller
* only wanted to see whether the entry existed or not.
@ -1316,8 +1353,16 @@ int sja1105_dynamic_config_write(struct sja1105_private *priv,
ops->entry_packing(packed_buf, entry, PACK);
/* Send SPI write operation: read config table entry */
mutex_lock(&priv->dynamic_config_lock);
rc = sja1105_xfer_buf(priv, SPI_WRITE, ops->addr, packed_buf,
ops->packed_size);
if (rc < 0) {
mutex_unlock(&priv->dynamic_config_lock);
return rc;
}
rc = sja1105_dynamic_config_wait_complete(priv, &cmd, ops);
mutex_unlock(&priv->dynamic_config_lock);
if (rc < 0)
return rc;

Просмотреть файл

@ -3366,6 +3366,7 @@ static int sja1105_probe(struct spi_device *spi)
priv->ds = ds;
mutex_init(&priv->ptp_data.lock);
mutex_init(&priv->dynamic_config_lock);
mutex_init(&priv->mgmt_lock);
rc = sja1105_parse_dt(priv);

Просмотреть файл

@ -21,11 +21,13 @@ struct ocelot_mact_entry {
};
static inline u32 ocelot_mact_read_macaccess(struct ocelot *ocelot)
__must_hold(&ocelot->mact_lock)
{
return ocelot_read(ocelot, ANA_TABLES_MACACCESS);
}
static inline int ocelot_mact_wait_for_completion(struct ocelot *ocelot)
__must_hold(&ocelot->mact_lock)
{
u32 val;
@ -39,6 +41,7 @@ static inline int ocelot_mact_wait_for_completion(struct ocelot *ocelot)
static void ocelot_mact_select(struct ocelot *ocelot,
const unsigned char mac[ETH_ALEN],
unsigned int vid)
__must_hold(&ocelot->mact_lock)
{
u32 macl = 0, mach = 0;
@ -67,6 +70,7 @@ int ocelot_mact_learn(struct ocelot *ocelot, int port,
ANA_TABLES_MACACCESS_ENTRYTYPE(type) |
ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_LEARN);
unsigned int mc_ports;
int err;
/* Set MAC_CPU_COPY if the CPU port is used by a multicast entry */
if (type == ENTRYTYPE_MACv4)
@ -79,18 +83,28 @@ int ocelot_mact_learn(struct ocelot *ocelot, int port,
if (mc_ports & BIT(ocelot->num_phys_ports))
cmd |= ANA_TABLES_MACACCESS_MAC_CPU_COPY;
mutex_lock(&ocelot->mact_lock);
ocelot_mact_select(ocelot, mac, vid);
/* Issue a write command */
ocelot_write(ocelot, cmd, ANA_TABLES_MACACCESS);
return ocelot_mact_wait_for_completion(ocelot);
err = ocelot_mact_wait_for_completion(ocelot);
mutex_unlock(&ocelot->mact_lock);
return err;
}
EXPORT_SYMBOL(ocelot_mact_learn);
int ocelot_mact_forget(struct ocelot *ocelot,
const unsigned char mac[ETH_ALEN], unsigned int vid)
{
int err;
mutex_lock(&ocelot->mact_lock);
ocelot_mact_select(ocelot, mac, vid);
/* Issue a forget command */
@ -98,7 +112,11 @@ int ocelot_mact_forget(struct ocelot *ocelot,
ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_FORGET),
ANA_TABLES_MACACCESS);
return ocelot_mact_wait_for_completion(ocelot);
err = ocelot_mact_wait_for_completion(ocelot);
mutex_unlock(&ocelot->mact_lock);
return err;
}
EXPORT_SYMBOL(ocelot_mact_forget);
@ -114,7 +132,9 @@ static void ocelot_mact_init(struct ocelot *ocelot)
| ANA_AGENCTRL_LEARN_IGNORE_VLAN,
ANA_AGENCTRL);
/* Clear the MAC table */
/* Clear the MAC table. We are not concurrent with anyone, so
* holding &ocelot->mact_lock is pointless.
*/
ocelot_write(ocelot, MACACCESS_CMD_INIT, ANA_TABLES_MACACCESS);
}
@ -1172,6 +1192,7 @@ EXPORT_SYMBOL(ocelot_port_fdb_do_dump);
static int ocelot_mact_read(struct ocelot *ocelot, int port, int row, int col,
struct ocelot_mact_entry *entry)
__must_hold(&ocelot->mact_lock)
{
u32 val, dst, macl, mach;
char mac[ETH_ALEN];
@ -1220,33 +1241,40 @@ static int ocelot_mact_read(struct ocelot *ocelot, int port, int row, int col,
int ocelot_fdb_dump(struct ocelot *ocelot, int port,
dsa_fdb_dump_cb_t *cb, void *data)
{
int err = 0;
int i, j;
/* We could take the lock just around ocelot_mact_read, but doing so
* thousands of times in a row seems rather pointless and inefficient.
*/
mutex_lock(&ocelot->mact_lock);
/* Loop through all the mac tables entries. */
for (i = 0; i < ocelot->num_mact_rows; i++) {
for (j = 0; j < 4; j++) {
struct ocelot_mact_entry entry;
bool is_static;
int ret;
ret = ocelot_mact_read(ocelot, port, i, j, &entry);
err = ocelot_mact_read(ocelot, port, i, j, &entry);
/* If the entry is invalid (wrong port, invalid...),
* skip it.
*/
if (ret == -EINVAL)
if (err == -EINVAL)
continue;
else if (ret)
return ret;
else if (err)
break;
is_static = (entry.type == ENTRYTYPE_LOCKED);
ret = cb(entry.mac, entry.vid, is_static, data);
if (ret)
return ret;
err = cb(entry.mac, entry.vid, is_static, data);
if (err)
break;
}
}
return 0;
mutex_unlock(&ocelot->mact_lock);
return err;
}
EXPORT_SYMBOL(ocelot_fdb_dump);
@ -2231,6 +2259,7 @@ int ocelot_init(struct ocelot *ocelot)
mutex_init(&ocelot->stats_lock);
mutex_init(&ocelot->ptp_lock);
mutex_init(&ocelot->mact_lock);
spin_lock_init(&ocelot->ptp_clock_lock);
spin_lock_init(&ocelot->ts_id_lock);
snprintf(queue_name, sizeof(queue_name), "%s-stats",

Просмотреть файл

@ -287,6 +287,7 @@ struct dsa_port {
/* List of MAC addresses that must be forwarded on this port.
* These are only valid on CPU ports and DSA links.
*/
struct mutex addr_lists_lock;
struct list_head fdbs;
struct list_head mdbs;

Просмотреть файл

@ -675,6 +675,9 @@ struct ocelot {
struct delayed_work stats_work;
struct workqueue_struct *stats_queue;
/* Lock for serializing access to the MAC table */
struct mutex mact_lock;
struct workqueue_struct *owq;
u8 ptp:1;

Просмотреть файл

@ -433,6 +433,7 @@ static int dsa_port_setup(struct dsa_port *dp)
if (dp->setup)
return 0;
mutex_init(&dp->addr_lists_lock);
INIT_LIST_HEAD(&dp->fdbs);
INIT_LIST_HEAD(&dp->mdbs);

Просмотреть файл

@ -2413,7 +2413,6 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
dp = dsa_to_port(ds, switchdev_work->port);
rtnl_lock();
switch (switchdev_work->event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE:
if (switchdev_work->host_addr)
@ -2448,7 +2447,6 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
break;
}
rtnl_unlock();
dev_put(switchdev_work->dev);
kfree(switchdev_work);

Просмотреть файл

@ -215,26 +215,30 @@ static int dsa_port_do_mdb_add(struct dsa_port *dp,
struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a;
int port = dp->index;
int err;
int err = 0;
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
return ds->ops->port_mdb_add(ds, port, mdb);
mutex_lock(&dp->addr_lists_lock);
a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid);
if (a) {
refcount_inc(&a->refcount);
return 0;
goto out;
}
a = kzalloc(sizeof(*a), GFP_KERNEL);
if (!a)
return -ENOMEM;
if (!a) {
err = -ENOMEM;
goto out;
}
err = ds->ops->port_mdb_add(ds, port, mdb);
if (err) {
kfree(a);
return err;
goto out;
}
ether_addr_copy(a->addr, mdb->addr);
@ -242,7 +246,10 @@ static int dsa_port_do_mdb_add(struct dsa_port *dp,
refcount_set(&a->refcount, 1);
list_add_tail(&a->list, &dp->mdbs);
return 0;
out:
mutex_unlock(&dp->addr_lists_lock);
return err;
}
static int dsa_port_do_mdb_del(struct dsa_port *dp,
@ -251,29 +258,36 @@ static int dsa_port_do_mdb_del(struct dsa_port *dp,
struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a;
int port = dp->index;
int err;
int err = 0;
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
return ds->ops->port_mdb_del(ds, port, mdb);
mutex_lock(&dp->addr_lists_lock);
a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid);
if (!a)
return -ENOENT;
if (!a) {
err = -ENOENT;
goto out;
}
if (!refcount_dec_and_test(&a->refcount))
return 0;
goto out;
err = ds->ops->port_mdb_del(ds, port, mdb);
if (err) {
refcount_inc(&a->refcount);
return err;
goto out;
}
list_del(&a->list);
kfree(a);
return 0;
out:
mutex_unlock(&dp->addr_lists_lock);
return err;
}
static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr,
@ -282,26 +296,30 @@ static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr,
struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a;
int port = dp->index;
int err;
int err = 0;
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
return ds->ops->port_fdb_add(ds, port, addr, vid);
mutex_lock(&dp->addr_lists_lock);
a = dsa_mac_addr_find(&dp->fdbs, addr, vid);
if (a) {
refcount_inc(&a->refcount);
return 0;
goto out;
}
a = kzalloc(sizeof(*a), GFP_KERNEL);
if (!a)
return -ENOMEM;
if (!a) {
err = -ENOMEM;
goto out;
}
err = ds->ops->port_fdb_add(ds, port, addr, vid);
if (err) {
kfree(a);
return err;
goto out;
}
ether_addr_copy(a->addr, addr);
@ -309,7 +327,10 @@ static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr,
refcount_set(&a->refcount, 1);
list_add_tail(&a->list, &dp->fdbs);
return 0;
out:
mutex_unlock(&dp->addr_lists_lock);
return err;
}
static int dsa_port_do_fdb_del(struct dsa_port *dp, const unsigned char *addr,
@ -318,29 +339,36 @@ static int dsa_port_do_fdb_del(struct dsa_port *dp, const unsigned char *addr,
struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a;
int port = dp->index;
int err;
int err = 0;
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
return ds->ops->port_fdb_del(ds, port, addr, vid);
mutex_lock(&dp->addr_lists_lock);
a = dsa_mac_addr_find(&dp->fdbs, addr, vid);
if (!a)
return -ENOENT;
if (!a) {
err = -ENOENT;
goto out;
}
if (!refcount_dec_and_test(&a->refcount))
return 0;
goto out;
err = ds->ops->port_fdb_del(ds, port, addr, vid);
if (err) {
refcount_inc(&a->refcount);
return err;
goto out;
}
list_del(&a->list);
kfree(a);
return 0;
out:
mutex_unlock(&dp->addr_lists_lock);
return err;
}
static int dsa_switch_host_fdb_add(struct dsa_switch *ds,

Просмотреть файл

@ -0,0 +1,47 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
# Bridge FDB entries can be offloaded to DSA switches without holding the
# rtnl_mutex. Traditionally this mutex has conferred drivers implicit
# serialization, which means their code paths are not well tested in the
# presence of concurrency.
# This test creates a background task that stresses the FDB by adding and
# deleting an entry many times in a row without the rtnl_mutex held.
# It then tests the driver resistance to concurrency by calling .ndo_fdb_dump
# (with rtnl_mutex held) from a foreground task.
# Since either the FDB dump or the additions/removals can fail, but the
# additions and removals are performed in deferred as opposed to process
# context, we cannot simply check for user space error codes.
WAIT_TIME=1
NUM_NETIFS=1
REQUIRE_JQ="no"
REQUIRE_MZ="no"
NETIF_CREATE="no"
lib_dir=$(dirname $0)/../../../net/forwarding
source $lib_dir/lib.sh
cleanup() {
echo "Cleaning up"
kill $pid && wait $pid &> /dev/null
ip link del br0
echo "Please check kernel log for errors"
}
trap 'cleanup' EXIT
eth=${NETIFS[p1]}
ip link del br0 2&>1 >/dev/null || :
ip link add br0 type bridge && ip link set $eth master br0
(while :; do
bridge fdb add 00:01:02:03:04:05 dev $eth master static
bridge fdb del 00:01:02:03:04:05 dev $eth master static
done) &
pid=$!
for i in $(seq 1 50); do
bridge fdb show > /dev/null
sleep 3
echo "$((${i} * 2))% complete..."
done

Просмотреть файл

@ -23,6 +23,8 @@ MC_CLI=${MC_CLI:=smcroutectl}
PING_TIMEOUT=${PING_TIMEOUT:=5}
WAIT_TIMEOUT=${WAIT_TIMEOUT:=20}
INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600}
REQUIRE_JQ=${REQUIRE_JQ:=yes}
REQUIRE_MZ=${REQUIRE_MZ:=yes}
relative_path="${BASH_SOURCE%/*}"
if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
@ -141,8 +143,12 @@ require_command()
fi
}
require_command jq
require_command $MZ
if [[ "$REQUIRE_JQ" = "yes" ]]; then
require_command jq
fi
if [[ "$REQUIRE_MZ" = "yes" ]]; then
require_command $MZ
fi
if [[ ! -v NUM_NETIFS ]]; then
echo "SKIP: importer does not define \"NUM_NETIFS\""