diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst index ef8cb62e82a1..4b59cf2c599f 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst @@ -656,3 +656,47 @@ Bridge offloads tracepoints: $ cat /sys/kernel/debug/tracing/trace ... ip-5387 [000] ...1 573713: mlx5_esw_bridge_vport_cleanup: vport_num=1 + +Eswitch QoS tracepoints: + +- mlx5_esw_vport_qos_create: trace creation of transmit scheduler arbiter for vport:: + + $ echo mlx5:mlx5_esw_vport_qos_create >> /sys/kernel/debug/tracing/set_event + $ cat /sys/kernel/debug/tracing/trace + ... + <...>-23496 [018] .... 73136.838831: mlx5_esw_vport_qos_create: (0000:82:00.0) vport=2 tsar_ix=4 bw_share=0, max_rate=0 group=000000007b576bb3 + +- mlx5_esw_vport_qos_config: trace configuration of transmit scheduler arbiter for vport:: + + $ echo mlx5:mlx5_esw_vport_qos_config >> /sys/kernel/debug/tracing/set_event + $ cat /sys/kernel/debug/tracing/trace + ... + <...>-26548 [023] .... 75754.223823: mlx5_esw_vport_qos_config: (0000:82:00.0) vport=1 tsar_ix=3 bw_share=34, max_rate=10000 group=000000007b576bb3 + +- mlx5_esw_vport_qos_destroy: trace deletion of transmit scheduler arbiter for vport:: + + $ echo mlx5:mlx5_esw_vport_qos_destroy >> /sys/kernel/debug/tracing/set_event + $ cat /sys/kernel/debug/tracing/trace + ... + <...>-27418 [004] .... 76546.680901: mlx5_esw_vport_qos_destroy: (0000:82:00.0) vport=1 tsar_ix=3 + +- mlx5_esw_group_qos_create: trace creation of transmit scheduler arbiter for rate group:: + + $ echo mlx5:mlx5_esw_group_qos_create >> /sys/kernel/debug/tracing/set_event + $ cat /sys/kernel/debug/tracing/trace + ... + <...>-26578 [008] .... 75776.022112: mlx5_esw_group_qos_create: (0000:82:00.0) group=000000008dac63ea tsar_ix=5 + +- mlx5_esw_group_qos_config: trace configuration of transmit scheduler arbiter for rate group:: + + $ echo mlx5:mlx5_esw_group_qos_config >> /sys/kernel/debug/tracing/set_event + $ cat /sys/kernel/debug/tracing/trace + ... + <...>-27303 [020] .... 76461.455356: mlx5_esw_group_qos_config: (0000:82:00.0) group=000000008dac63ea tsar_ix=5 bw_share=100 max_rate=20000 + +- mlx5_esw_group_qos_destroy: trace deletion of transmit scheduler arbiter for group:: + + $ echo mlx5:mlx5_esw_group_qos_destroy >> /sys/kernel/debug/tracing/set_event + $ cat /sys/kernel/debug/tracing/trace + ... + <...>-27418 [006] .... 76547.187258: mlx5_esw_group_qos_destroy: (0000:82:00.0) group=000000007b576bb3 tsar_ix=1 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 4fccc9bc0328..63032cd6efb1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -44,19 +44,22 @@ mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \ lib/fs_chains.o en/tc_tun.o \ esw/indir_table.o en/tc_tun_encap.o \ en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \ - en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o + en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o \ + en/tc/post_act.o mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o +mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o # # Core extra # mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \ - ecpf.o rdma.o esw/legacy.o + ecpf.o rdma.o esw/legacy.o \ + esw/devlink_port.o esw/vporttbl.o esw/qos.o + mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \ esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \ - esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o \ - esw/devlink_port.o esw/vporttbl.o -mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += esw/sample.o + esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o + mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o en/rep/bridge.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 6f4d7c7f06e0..e84287ffc7ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -7,6 +7,7 @@ #include "fw_reset.h" #include "fs_core.h" #include "eswitch.h" +#include "esw/qos.h" #include "sf/dev/dev.h" #include "sf/sf.h" @@ -292,6 +293,13 @@ static const struct devlink_ops mlx5_devlink_ops = { .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get, .port_function_hw_addr_get = mlx5_devlink_port_function_hw_addr_get, .port_function_hw_addr_set = mlx5_devlink_port_function_hw_addr_set, + .rate_leaf_tx_share_set = mlx5_esw_devlink_rate_leaf_tx_share_set, + .rate_leaf_tx_max_set = mlx5_esw_devlink_rate_leaf_tx_max_set, + .rate_node_tx_share_set = mlx5_esw_devlink_rate_node_tx_share_set, + .rate_node_tx_max_set = mlx5_esw_devlink_rate_node_tx_max_set, + .rate_node_new = mlx5_esw_devlink_rate_node_new, + .rate_node_del = mlx5_esw_devlink_rate_node_del, + .rate_leaf_parent_set = mlx5_esw_devlink_rate_parent_set, #endif #ifdef CONFIG_MLX5_SF_MANAGER .port_new = mlx5_devlink_sf_port_new, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index e348c276eaa1..41684a6c44e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -7,6 +7,8 @@ #include "mod_hdr.h" #include "lib/fs_ttc.h" +struct mlx5e_post_act; + enum { MLX5E_TC_FT_LEVEL = 0, MLX5E_TC_TTC_FT_LEVEL, @@ -19,6 +21,7 @@ struct mlx5e_tc_table { struct mutex t_lock; struct mlx5_flow_table *t; struct mlx5_fs_chains *chains; + struct mlx5e_post_act *post_act; struct rhashtable ht; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index 059799e4f483..51a4d80f7fa3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -17,7 +17,7 @@ #include "en/mapping.h" #include "en/tc_tun.h" #include "lib/port_tun.h" -#include "esw/sample.h" +#include "en/tc/sample.h" struct mlx5e_rep_indr_block_priv { struct net_device *netdev; @@ -516,7 +516,6 @@ void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv) mlx5e_rep_indr_block_unbind); } -#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, struct mlx5e_tc_update_priv *tc_priv, u32 tunnel_id) @@ -609,12 +608,13 @@ static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, return true; } -static bool mlx5e_restore_skb(struct sk_buff *skb, u32 chain, u32 reg_c1, - struct mlx5e_tc_update_priv *tc_priv) +static bool mlx5e_restore_skb_chain(struct sk_buff *skb, u32 chain, u32 reg_c1, + struct mlx5e_tc_update_priv *tc_priv) { struct mlx5e_priv *priv = netdev_priv(skb->dev); u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK; +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) if (chain) { struct mlx5_rep_uplink_priv *uplink_priv; struct mlx5e_rep_priv *uplink_rpriv; @@ -636,9 +636,25 @@ static bool mlx5e_restore_skb(struct sk_buff *skb, u32 chain, u32 reg_c1, zone_restore_id)) return false; } +#endif /* CONFIG_NET_TC_SKB_EXT */ + return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); } -#endif /* CONFIG_NET_TC_SKB_EXT */ + +static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5_mapped_obj *mapped_obj, + struct mlx5e_tc_update_priv *tc_priv) +{ + if (!mlx5e_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) { + netdev_dbg(priv->netdev, + "Failed to restore tunnel info for sampled packet\n"); + return; + } +#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) + mlx5e_tc_sample_skb(skb, mapped_obj); +#endif /* CONFIG_MLX5_TC_SAMPLE */ + mlx5_rep_tc_post_napi_receive(tc_priv); +} bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb, @@ -647,7 +663,7 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, struct mlx5_mapped_obj mapped_obj; struct mlx5_eswitch *esw; struct mlx5e_priv *priv; - u32 reg_c0, reg_c1; + u32 reg_c0; int err; reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK); @@ -659,8 +675,6 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, */ skb->mark = 0; - reg_c1 = be32_to_cpu(cqe->ft_metadata); - priv = netdev_priv(skb->dev); esw = priv->mdev->priv.eswitch; err = mapping_find(esw->offloads.reg_c0_obj_pool, reg_c0, &mapped_obj); @@ -671,18 +685,14 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, return false; } -#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) - if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) - return mlx5e_restore_skb(skb, mapped_obj.chain, reg_c1, tc_priv); -#endif /* CONFIG_NET_TC_SKB_EXT */ -#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) - if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) { - mlx5_esw_sample_skb(skb, &mapped_obj); + if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) { + u32 reg_c1 = be32_to_cpu(cqe->ft_metadata); + + return mlx5e_restore_skb_chain(skb, mapped_obj.chain, reg_c1, tc_priv); + } else if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) { + mlx5e_restore_skb_sample(priv, skb, &mapped_obj, tc_priv); return false; - } -#endif /* CONFIG_MLX5_TC_SAMPLE */ - if (mapped_obj.type != MLX5_MAPPED_OBJ_SAMPLE && - mapped_obj.type != MLX5_MAPPED_OBJ_CHAIN) { + } else { netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type); return false; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c new file mode 100644 index 000000000000..a3e43e898a56 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "en_tc.h" +#include "post_act.h" +#include "mlx5_core.h" + +struct mlx5e_post_act { + enum mlx5_flow_namespace_type ns_type; + struct mlx5_fs_chains *chains; + struct mlx5_flow_table *ft; + struct mlx5e_priv *priv; + struct xarray ids; +}; + +struct mlx5e_post_act_handle { + enum mlx5_flow_namespace_type ns_type; + struct mlx5_flow_attr *attr; + struct mlx5_flow_handle *rule; + u32 id; +}; + +#define MLX5_POST_ACTION_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen) +#define MLX5_POST_ACTION_MAX GENMASK(MLX5_POST_ACTION_BITS - 1, 0) +#define MLX5_POST_ACTION_MASK MLX5_POST_ACTION_MAX + +struct mlx5e_post_act * +mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, + enum mlx5_flow_namespace_type ns_type) +{ + struct mlx5e_post_act *post_act; + int err; + + if (ns_type == MLX5_FLOW_NAMESPACE_FDB && + !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ignore_flow_level)) { + mlx5_core_warn(priv->mdev, "firmware level support is missing\n"); + err = -EOPNOTSUPP; + goto err_check; + } else if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) { + mlx5_core_warn(priv->mdev, "firmware level support is missing\n"); + err = -EOPNOTSUPP; + goto err_check; + } + + post_act = kzalloc(sizeof(*post_act), GFP_KERNEL); + if (!post_act) { + err = -ENOMEM; + goto err_check; + } + post_act->ft = mlx5_chains_create_global_table(chains); + if (IS_ERR(post_act->ft)) { + err = PTR_ERR(post_act->ft); + mlx5_core_warn(priv->mdev, "failed to create post action table, err: %d\n", err); + goto err_ft; + } + post_act->chains = chains; + post_act->ns_type = ns_type; + post_act->priv = priv; + xa_init_flags(&post_act->ids, XA_FLAGS_ALLOC1); + return post_act; + +err_ft: + kfree(post_act); +err_check: + return ERR_PTR(err); +} + +void +mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act) +{ + if (IS_ERR_OR_NULL(post_act)) + return; + + xa_destroy(&post_act->ids); + mlx5_chains_destroy_global_table(post_act->chains, post_act->ft); + kfree(post_act); +} + +struct mlx5e_post_act_handle * +mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *attr) +{ + u32 attr_sz = ns_to_attr_sz(post_act->ns_type); + struct mlx5e_post_act_handle *handle = NULL; + struct mlx5_flow_attr *post_attr = NULL; + struct mlx5_flow_spec *spec = NULL; + int err; + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + post_attr = mlx5_alloc_flow_attr(post_act->ns_type); + if (!handle || !spec || !post_attr) { + kfree(post_attr); + kvfree(spec); + kfree(handle); + return ERR_PTR(-ENOMEM); + } + + memcpy(post_attr, attr, attr_sz); + post_attr->chain = 0; + post_attr->prio = 0; + post_attr->ft = post_act->ft; + post_attr->inner_match_level = MLX5_MATCH_NONE; + post_attr->outer_match_level = MLX5_MATCH_NONE; + post_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP); + + handle->ns_type = post_act->ns_type; + /* Splits were handled before post action */ + if (handle->ns_type == MLX5_FLOW_NAMESPACE_FDB) + post_attr->esw_attr->split_count = 0; + + err = xa_alloc(&post_act->ids, &handle->id, post_attr, + XA_LIMIT(1, MLX5_POST_ACTION_MAX), GFP_KERNEL); + if (err) + goto err_xarray; + + /* Post action rule matches on fte_id and executes original rule's + * tc rule action + */ + mlx5e_tc_match_to_reg_match(spec, FTEID_TO_REG, + handle->id, MLX5_POST_ACTION_MASK); + + handle->rule = mlx5_tc_rule_insert(post_act->priv, spec, post_attr); + if (IS_ERR(handle->rule)) { + err = PTR_ERR(handle->rule); + netdev_warn(post_act->priv->netdev, "Failed to add post action rule"); + goto err_rule; + } + handle->attr = post_attr; + + kvfree(spec); + return handle; + +err_rule: + xa_erase(&post_act->ids, handle->id); +err_xarray: + kfree(post_attr); + kvfree(spec); + kfree(handle); + return ERR_PTR(err); +} + +void +mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle) +{ + mlx5_tc_rule_delete(post_act->priv, handle->rule, handle->attr); + xa_erase(&post_act->ids, handle->id); + kfree(handle->attr); + kfree(handle); +} + +struct mlx5_flow_table * +mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act) +{ + return post_act->ft; +} + +/* Allocate a header modify action to write the post action handle fte id to a register. */ +int +mlx5e_tc_post_act_set_handle(struct mlx5_core_dev *dev, + struct mlx5e_post_act_handle *handle, + struct mlx5e_tc_mod_hdr_acts *acts) +{ + return mlx5e_tc_match_to_reg_set(dev, acts, handle->ns_type, FTEID_TO_REG, handle->id); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h new file mode 100644 index 000000000000..b530ec1981a5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_POST_ACTION_H__ +#define __MLX5_POST_ACTION_H__ + +#include "en.h" +#include "lib/fs_chains.h" + +struct mlx5_flow_attr; +struct mlx5e_priv; +struct mlx5e_tc_mod_hdr_acts; + +struct mlx5e_post_act * +mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, + enum mlx5_flow_namespace_type ns_type); + +void +mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act); + +struct mlx5e_post_act_handle * +mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *attr); + +void +mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle); + +struct mlx5_flow_table * +mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act); + +int +mlx5e_tc_post_act_set_handle(struct mlx5_core_dev *dev, + struct mlx5e_post_act_handle *handle, + struct mlx5e_tc_mod_hdr_acts *acts); + +#endif /* __MLX5_POST_ACTION_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c similarity index 53% rename from drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c rename to drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c index d3ad78aa9d45..6552ecee3f9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c @@ -4,7 +4,8 @@ #include #include #include "en/mapping.h" -#include "esw/sample.h" +#include "en/tc/post_act.h" +#include "sample.h" #include "eswitch.h" #include "en_tc.h" #include "fs_core.h" @@ -17,17 +18,18 @@ static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_sample_ns = { .flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | MLX5_FLOW_TABLE_TUNNEL_EN_DECAP, }; -struct mlx5_esw_psample { - struct mlx5e_priv *priv; +struct mlx5e_tc_psample { + struct mlx5_eswitch *esw; struct mlx5_flow_table *termtbl; struct mlx5_flow_handle *termtbl_rule; DECLARE_HASHTABLE(hashtbl, 8); struct mutex ht_lock; /* protect hashtbl */ DECLARE_HASHTABLE(restore_hashtbl, 8); struct mutex restore_lock; /* protect restore_hashtbl */ + struct mlx5e_post_act *post_act; }; -struct mlx5_sampler { +struct mlx5e_sampler { struct hlist_node hlist; u32 sampler_id; u32 sample_ratio; @@ -36,29 +38,32 @@ struct mlx5_sampler { int count; }; -struct mlx5_sample_flow { - struct mlx5_sampler *sampler; - struct mlx5_sample_restore *restore; +struct mlx5e_sample_flow { + struct mlx5e_sampler *sampler; + struct mlx5e_sample_restore *restore; struct mlx5_flow_attr *pre_attr; struct mlx5_flow_handle *pre_rule; - struct mlx5_flow_handle *rule; + struct mlx5_flow_attr *post_attr; + struct mlx5_flow_handle *post_rule; + struct mlx5e_post_act_handle *post_act_handle; }; -struct mlx5_sample_restore { +struct mlx5e_sample_restore { struct hlist_node hlist; struct mlx5_modify_hdr *modify_hdr; struct mlx5_flow_handle *rule; + struct mlx5e_post_act_handle *post_act_handle; u32 obj_id; int count; }; static int -sampler_termtbl_create(struct mlx5_esw_psample *esw_psample) +sampler_termtbl_create(struct mlx5e_tc_psample *tc_psample) { - struct mlx5_core_dev *dev = esw_psample->priv->mdev; - struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5_eswitch *esw = tc_psample->esw; struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_destination dest = {}; + struct mlx5_core_dev *dev = esw->dev; struct mlx5_flow_namespace *root_ns; struct mlx5_flow_act act = {}; int err; @@ -79,20 +84,20 @@ sampler_termtbl_create(struct mlx5_esw_psample *esw_psample) ft_attr.prio = FDB_SLOW_PATH; ft_attr.max_fte = 1; ft_attr.level = 1; - esw_psample->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); - if (IS_ERR(esw_psample->termtbl)) { - err = PTR_ERR(esw_psample->termtbl); + tc_psample->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); + if (IS_ERR(tc_psample->termtbl)) { + err = PTR_ERR(tc_psample->termtbl); mlx5_core_warn(dev, "failed to create termtbl, err: %d\n", err); return err; } act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; dest.vport.num = esw->manager_vport; - esw_psample->termtbl_rule = mlx5_add_flow_rules(esw_psample->termtbl, NULL, &act, &dest, 1); - if (IS_ERR(esw_psample->termtbl_rule)) { - err = PTR_ERR(esw_psample->termtbl_rule); + tc_psample->termtbl_rule = mlx5_add_flow_rules(tc_psample->termtbl, NULL, &act, &dest, 1); + if (IS_ERR(tc_psample->termtbl_rule)) { + err = PTR_ERR(tc_psample->termtbl_rule); mlx5_core_warn(dev, "failed to create termtbl rule, err: %d\n", err); - mlx5_destroy_flow_table(esw_psample->termtbl); + mlx5_destroy_flow_table(tc_psample->termtbl); return err; } @@ -100,14 +105,14 @@ sampler_termtbl_create(struct mlx5_esw_psample *esw_psample) } static void -sampler_termtbl_destroy(struct mlx5_esw_psample *esw_psample) +sampler_termtbl_destroy(struct mlx5e_tc_psample *tc_psample) { - mlx5_del_flow_rules(esw_psample->termtbl_rule); - mlx5_destroy_flow_table(esw_psample->termtbl); + mlx5_del_flow_rules(tc_psample->termtbl_rule); + mlx5_destroy_flow_table(tc_psample->termtbl); } static int -sampler_obj_create(struct mlx5_core_dev *mdev, struct mlx5_sampler *sampler) +sampler_obj_create(struct mlx5_core_dev *mdev, struct mlx5e_sampler *sampler) { u32 in[MLX5_ST_SZ_DW(create_sampler_obj_in)] = {}; u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; @@ -163,16 +168,16 @@ sampler_cmp(u32 sample_ratio1, u32 default_table_id1, u32 sample_ratio2, u32 def return sample_ratio1 != sample_ratio2 || default_table_id1 != default_table_id2; } -static struct mlx5_sampler * -sampler_get(struct mlx5_esw_psample *esw_psample, u32 sample_ratio, u32 default_table_id) +static struct mlx5e_sampler * +sampler_get(struct mlx5e_tc_psample *tc_psample, u32 sample_ratio, u32 default_table_id) { - struct mlx5_sampler *sampler; + struct mlx5e_sampler *sampler; u32 hash_key; int err; - mutex_lock(&esw_psample->ht_lock); + mutex_lock(&tc_psample->ht_lock); hash_key = sampler_hash(sample_ratio, default_table_id); - hash_for_each_possible(esw_psample->hashtbl, sampler, hlist, hash_key) + hash_for_each_possible(tc_psample->hashtbl, sampler, hlist, hash_key) if (!sampler_cmp(sampler->sample_ratio, sampler->default_table_id, sample_ratio, default_table_id)) goto add_ref; @@ -183,42 +188,49 @@ sampler_get(struct mlx5_esw_psample *esw_psample, u32 sample_ratio, u32 default_ goto err_alloc; } - sampler->sample_table_id = esw_psample->termtbl->id; + sampler->sample_table_id = tc_psample->termtbl->id; sampler->default_table_id = default_table_id; sampler->sample_ratio = sample_ratio; - err = sampler_obj_create(esw_psample->priv->mdev, sampler); + err = sampler_obj_create(tc_psample->esw->dev, sampler); if (err) goto err_create; - hash_add(esw_psample->hashtbl, &sampler->hlist, hash_key); + hash_add(tc_psample->hashtbl, &sampler->hlist, hash_key); add_ref: sampler->count++; - mutex_unlock(&esw_psample->ht_lock); + mutex_unlock(&tc_psample->ht_lock); return sampler; err_create: kfree(sampler); err_alloc: - mutex_unlock(&esw_psample->ht_lock); + mutex_unlock(&tc_psample->ht_lock); return ERR_PTR(err); } static void -sampler_put(struct mlx5_esw_psample *esw_psample, struct mlx5_sampler *sampler) +sampler_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sampler *sampler) { - mutex_lock(&esw_psample->ht_lock); + mutex_lock(&tc_psample->ht_lock); if (--sampler->count == 0) { hash_del(&sampler->hlist); - sampler_obj_destroy(esw_psample->priv->mdev, sampler->sampler_id); + sampler_obj_destroy(tc_psample->esw->dev, sampler->sampler_id); kfree(sampler); } - mutex_unlock(&esw_psample->ht_lock); + mutex_unlock(&tc_psample->ht_lock); } +/* obj_id is used to restore the sample parameters. + * Set fte_id in original flow table, then match it in the default table. + * Only set it for NICs can preserve reg_c or decap action. For other cases, + * use the same match in the default table. + * Use one header rewrite for both obj_id and fte_id. + */ static struct mlx5_modify_hdr * -sample_metadata_rule_get(struct mlx5_core_dev *mdev, u32 obj_id) +sample_modify_hdr_get(struct mlx5_core_dev *mdev, u32 obj_id, + struct mlx5e_post_act_handle *handle) { struct mlx5e_tc_mod_hdr_acts mod_acts = {}; struct mlx5_modify_hdr *modify_hdr; @@ -229,6 +241,12 @@ sample_metadata_rule_get(struct mlx5_core_dev *mdev, u32 obj_id) if (err) goto err_set_regc0; + if (handle) { + err = mlx5e_tc_post_act_set_handle(mdev, handle, &mod_acts); + if (err) + goto err_post_act; + } + modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_FDB, mod_acts.num_actions, mod_acts.actions); @@ -241,23 +259,40 @@ sample_metadata_rule_get(struct mlx5_core_dev *mdev, u32 obj_id) return modify_hdr; err_modify_hdr: +err_post_act: dealloc_mod_hdr_actions(&mod_acts); err_set_regc0: return ERR_PTR(err); } -static struct mlx5_sample_restore * -sample_restore_get(struct mlx5_esw_psample *esw_psample, u32 obj_id) +static u32 +restore_hash(u32 obj_id, struct mlx5e_post_act_handle *post_act_handle) { - struct mlx5_core_dev *mdev = esw_psample->priv->mdev; - struct mlx5_eswitch *esw = mdev->priv.eswitch; - struct mlx5_sample_restore *restore; + return jhash_2words(obj_id, hash32_ptr(post_act_handle), 0); +} + +static bool +restore_equal(struct mlx5e_sample_restore *restore, u32 obj_id, + struct mlx5e_post_act_handle *post_act_handle) +{ + return restore->obj_id == obj_id && restore->post_act_handle == post_act_handle; +} + +static struct mlx5e_sample_restore * +sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id, + struct mlx5e_post_act_handle *post_act_handle) +{ + struct mlx5_eswitch *esw = tc_psample->esw; + struct mlx5_core_dev *mdev = esw->dev; + struct mlx5e_sample_restore *restore; struct mlx5_modify_hdr *modify_hdr; + u32 hash_key; int err; - mutex_lock(&esw_psample->restore_lock); - hash_for_each_possible(esw_psample->restore_hashtbl, restore, hlist, obj_id) - if (restore->obj_id == obj_id) + mutex_lock(&tc_psample->restore_lock); + hash_key = restore_hash(obj_id, post_act_handle); + hash_for_each_possible(tc_psample->restore_hashtbl, restore, hlist, hash_key) + if (restore_equal(restore, obj_id, post_act_handle)) goto add_ref; restore = kzalloc(sizeof(*restore), GFP_KERNEL); @@ -266,8 +301,9 @@ sample_restore_get(struct mlx5_esw_psample *esw_psample, u32 obj_id) goto err_alloc; } restore->obj_id = obj_id; + restore->post_act_handle = post_act_handle; - modify_hdr = sample_metadata_rule_get(mdev, obj_id); + modify_hdr = sample_modify_hdr_get(mdev, obj_id, post_act_handle); if (IS_ERR(modify_hdr)) { err = PTR_ERR(modify_hdr); goto err_modify_hdr; @@ -280,10 +316,10 @@ sample_restore_get(struct mlx5_esw_psample *esw_psample, u32 obj_id) goto err_restore; } - hash_add(esw_psample->restore_hashtbl, &restore->hlist, obj_id); + hash_add(tc_psample->restore_hashtbl, &restore->hlist, hash_key); add_ref: restore->count++; - mutex_unlock(&esw_psample->restore_lock); + mutex_unlock(&tc_psample->restore_lock); return restore; err_restore: @@ -291,26 +327,26 @@ err_restore: err_modify_hdr: kfree(restore); err_alloc: - mutex_unlock(&esw_psample->restore_lock); + mutex_unlock(&tc_psample->restore_lock); return ERR_PTR(err); } static void -sample_restore_put(struct mlx5_esw_psample *esw_psample, struct mlx5_sample_restore *restore) +sample_restore_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sample_restore *restore) { - mutex_lock(&esw_psample->restore_lock); + mutex_lock(&tc_psample->restore_lock); if (--restore->count == 0) hash_del(&restore->hlist); - mutex_unlock(&esw_psample->restore_lock); + mutex_unlock(&tc_psample->restore_lock); if (!restore->count) { mlx5_del_flow_rules(restore->rule); - mlx5_modify_header_dealloc(esw_psample->priv->mdev, restore->modify_hdr); + mlx5_modify_header_dealloc(tc_psample->esw->dev, restore->modify_hdr); kfree(restore); } } -void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj) +void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj) { u32 trunc_size = mapped_obj->sample.trunc_size; struct psample_group psample_group = {}; @@ -325,6 +361,87 @@ void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj psample_sample_packet(&psample_group, skb, mapped_obj->sample.rate, &md); } +static int +add_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow, + struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr, + u32 *default_tbl_id) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + u32 attr_sz = ns_to_attr_sz(MLX5_FLOW_NAMESPACE_FDB); + struct mlx5_vport_tbl_attr per_vport_tbl_attr; + struct mlx5_flow_table *default_tbl; + struct mlx5_flow_attr *post_attr; + int err; + + /* Allocate default table per vport, chain and prio. Otherwise, there is + * only one default table for the same sampler object. Rules with different + * prio and chain may overlap. For CT sample action, per vport default + * table is needed to resotre the metadata. + */ + per_vport_tbl_attr.chain = attr->chain; + per_vport_tbl_attr.prio = attr->prio; + per_vport_tbl_attr.vport = esw_attr->in_rep->vport; + per_vport_tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns; + default_tbl = mlx5_esw_vporttbl_get(esw, &per_vport_tbl_attr); + if (IS_ERR(default_tbl)) { + err = PTR_ERR(default_tbl); + goto err_default_tbl; + } + *default_tbl_id = default_tbl->id; + + post_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); + if (!post_attr) { + err = -ENOMEM; + goto err_attr; + } + sample_flow->post_attr = post_attr; + memcpy(post_attr, attr, attr_sz); + /* Perform the original matches on the default table. + * Offload all actions except the sample action. + */ + post_attr->chain = 0; + post_attr->prio = 0; + post_attr->ft = default_tbl; + post_attr->flags = MLX5_ESW_ATTR_FLAG_NO_IN_PORT; + + /* When offloading sample and encap action, if there is no valid + * neigh data struct, a slow path rule is offloaded first. Source + * port metadata match is set at that time. A per vport table is + * already allocated. No need to match it again. So clear the source + * port metadata match. + */ + mlx5_eswitch_clear_rule_source_port(esw, spec); + sample_flow->post_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, post_attr); + if (IS_ERR(sample_flow->post_rule)) { + err = PTR_ERR(sample_flow->post_rule); + goto err_rule; + } + return 0; + +err_rule: + kfree(post_attr); +err_attr: + mlx5_esw_vporttbl_put(esw, &per_vport_tbl_attr); +err_default_tbl: + return err; +} + +static void +del_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_vport_tbl_attr tbl_attr; + + mlx5_eswitch_del_offloaded_rule(esw, sample_flow->post_rule, sample_flow->post_attr); + kfree(sample_flow->post_attr); + tbl_attr.chain = attr->chain; + tbl_attr.prio = attr->prio; + tbl_attr.vport = esw_attr->in_rep->vport; + tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns; + mlx5_esw_vporttbl_put(esw, &tbl_attr); +} + /* For the following typical flow table: * * +-------------------------------+ @@ -342,8 +459,9 @@ void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj * +---------------------+ * + original match + * +---------------------+ - * | - * v + * | set fte_id (if reg_c preserve cap) + * | do decap (if required) + * v * +------------------------------------------------+ * + Flow Sampler Object + * +------------------------------------------------+ @@ -353,80 +471,82 @@ void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj * +------------------------------------------------+ * | | * v v - * +-----------------------------+ +----------------------------------------+ - * + sample table + + default table per + - * +-----------------------------+ +----------------------------------------+ - * + forward to management vport + + original match + - * +-----------------------------+ +----------------------------------------+ - * + other actions + - * +----------------------------------------+ + * +-----------------------------+ +-------------------+ + * + sample table + + default table + + * +-----------------------------+ +-------------------+ + * + forward to management vport + | + * +-----------------------------+ | + * +-------+------+ + * | |reg_c preserve cap + * | |or decap action + * v v + * +-----------------+ +-------------+ + * + per vport table + + post action + + * +-----------------+ +-------------+ + * + original match + + * +-----------------+ + * + other actions + + * +-----------------+ */ struct mlx5_flow_handle * -mlx5_esw_sample_offload(struct mlx5_esw_psample *esw_psample, +mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *attr) + struct mlx5_flow_attr *attr, + u32 tunnel_id) { + struct mlx5e_post_act_handle *post_act_handle = NULL; struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; - struct mlx5_vport_tbl_attr per_vport_tbl_attr; struct mlx5_esw_flow_attr *pre_esw_attr; struct mlx5_mapped_obj restore_obj = {}; - struct mlx5_sample_flow *sample_flow; - struct mlx5_sample_attr *sample_attr; - struct mlx5_flow_table *default_tbl; + struct mlx5e_sample_flow *sample_flow; + struct mlx5e_sample_attr *sample_attr; struct mlx5_flow_attr *pre_attr; struct mlx5_eswitch *esw; + u32 default_tbl_id; u32 obj_id; int err; - if (IS_ERR_OR_NULL(esw_psample)) + if (IS_ERR_OR_NULL(tc_psample)) return ERR_PTR(-EOPNOTSUPP); /* If slow path flag is set, eg. when the neigh is invalid for encap, * don't offload sample action. */ - esw = esw_psample->priv->mdev->priv.eswitch; + esw = tc_psample->esw; if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); sample_flow = kzalloc(sizeof(*sample_flow), GFP_KERNEL); if (!sample_flow) return ERR_PTR(-ENOMEM); - esw_attr->sample->sample_flow = sample_flow; + sample_attr = attr->sample_attr; + sample_attr->sample_flow = sample_flow; - /* Allocate default table per vport, chain and prio. Otherwise, there is - * only one default table for the same sampler object. Rules with different - * prio and chain may overlap. For CT sample action, per vport default - * table is needed to resotre the metadata. + /* For NICs with reg_c_preserve support or decap action, use + * post action instead of the per vport, chain and prio table. + * Only match the fte id instead of the same match in the + * original flow table. */ - per_vport_tbl_attr.chain = attr->chain; - per_vport_tbl_attr.prio = attr->prio; - per_vport_tbl_attr.vport = esw_attr->in_rep->vport; - per_vport_tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns; - default_tbl = mlx5_esw_vporttbl_get(esw, &per_vport_tbl_attr); - if (IS_ERR(default_tbl)) { - err = PTR_ERR(default_tbl); - goto err_default_tbl; - } + if (MLX5_CAP_GEN(esw->dev, reg_c_preserve) || + attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { + struct mlx5_flow_table *ft; - /* Perform the original matches on the default table. - * Offload all actions except the sample action. - */ - esw_attr->sample->sample_default_tbl = default_tbl; - /* When offloading sample and encap action, if there is no valid - * neigh data struct, a slow path rule is offloaded first. Source - * port metadata match is set at that time. A per vport table is - * already allocated. No need to match it again. So clear the source - * port metadata match. - */ - mlx5_eswitch_clear_rule_source_port(esw, spec); - sample_flow->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); - if (IS_ERR(sample_flow->rule)) { - err = PTR_ERR(sample_flow->rule); - goto err_offload_rule; + ft = mlx5e_tc_post_act_get_ft(tc_psample->post_act); + default_tbl_id = ft->id; + post_act_handle = mlx5e_tc_post_act_add(tc_psample->post_act, attr); + if (IS_ERR(post_act_handle)) { + err = PTR_ERR(post_act_handle); + goto err_post_act; + } + sample_flow->post_act_handle = post_act_handle; + } else { + err = add_post_rule(esw, sample_flow, spec, attr, &default_tbl_id); + if (err) + goto err_post_rule; } /* Create sampler object. */ - sample_flow->sampler = sampler_get(esw_psample, esw_attr->sample->rate, default_tbl->id); + sample_flow->sampler = sampler_get(tc_psample, sample_attr->rate, default_tbl_id); if (IS_ERR(sample_flow->sampler)) { err = PTR_ERR(sample_flow->sampler); goto err_sampler; @@ -434,16 +554,17 @@ mlx5_esw_sample_offload(struct mlx5_esw_psample *esw_psample, /* Create an id mapping reg_c0 value to sample object. */ restore_obj.type = MLX5_MAPPED_OBJ_SAMPLE; - restore_obj.sample.group_id = esw_attr->sample->group_num; - restore_obj.sample.rate = esw_attr->sample->rate; - restore_obj.sample.trunc_size = esw_attr->sample->trunc_size; + restore_obj.sample.group_id = sample_attr->group_num; + restore_obj.sample.rate = sample_attr->rate; + restore_obj.sample.trunc_size = sample_attr->trunc_size; + restore_obj.sample.tunnel_id = tunnel_id; err = mapping_add(esw->offloads.reg_c0_obj_pool, &restore_obj, &obj_id); if (err) goto err_obj_id; - esw_attr->sample->restore_obj_id = obj_id; + sample_attr->restore_obj_id = obj_id; /* Create sample restore context. */ - sample_flow->restore = sample_restore_get(esw_psample, obj_id); + sample_flow->restore = sample_restore_get(tc_psample, obj_id, post_act_handle); if (IS_ERR(sample_flow->restore)) { err = PTR_ERR(sample_flow->restore); goto err_sample_restore; @@ -455,21 +576,23 @@ mlx5_esw_sample_offload(struct mlx5_esw_psample *esw_psample, pre_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); if (!pre_attr) { err = -ENOMEM; - goto err_alloc_flow_attr; + goto err_alloc_pre_flow_attr; } - sample_attr = kzalloc(sizeof(*sample_attr), GFP_KERNEL); - if (!sample_attr) { - err = -ENOMEM; - goto err_alloc_sample_attr; - } - pre_esw_attr = pre_attr->esw_attr; pre_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + /* For decap action, do decap in the original flow table instead of the + * default flow table. + */ + if (tunnel_id) + pre_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; pre_attr->modify_hdr = sample_flow->restore->modify_hdr; pre_attr->flags = MLX5_ESW_ATTR_FLAG_SAMPLE; + pre_attr->inner_match_level = attr->inner_match_level; + pre_attr->outer_match_level = attr->outer_match_level; pre_attr->chain = attr->chain; pre_attr->prio = attr->prio; - pre_esw_attr->sample = sample_attr; - pre_esw_attr->sample->sampler_id = sample_flow->sampler->sampler_id; + pre_attr->sample_attr = attr->sample_attr; + sample_attr->sampler_id = sample_flow->sampler->sampler_id; + pre_esw_attr = pre_attr->esw_attr; pre_esw_attr->in_mdev = esw_attr->in_mdev; pre_esw_attr->in_rep = esw_attr->in_rep; sample_flow->pre_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, pre_attr); @@ -479,108 +602,113 @@ mlx5_esw_sample_offload(struct mlx5_esw_psample *esw_psample, } sample_flow->pre_attr = pre_attr; - return sample_flow->rule; + return sample_flow->post_rule; err_pre_offload_rule: - kfree(sample_attr); -err_alloc_sample_attr: kfree(pre_attr); -err_alloc_flow_attr: - sample_restore_put(esw_psample, sample_flow->restore); +err_alloc_pre_flow_attr: + sample_restore_put(tc_psample, sample_flow->restore); err_sample_restore: mapping_remove(esw->offloads.reg_c0_obj_pool, obj_id); err_obj_id: - sampler_put(esw_psample, sample_flow->sampler); + sampler_put(tc_psample, sample_flow->sampler); err_sampler: - /* For sample offload, rule is added in default_tbl. No need to call - * mlx5_esw_chains_put_table() - */ - attr->prio = 0; - attr->chain = 0; - mlx5_eswitch_del_offloaded_rule(esw, sample_flow->rule, attr); -err_offload_rule: - mlx5_esw_vporttbl_put(esw, &per_vport_tbl_attr); -err_default_tbl: + if (!post_act_handle) + del_post_rule(esw, sample_flow, attr); +err_post_rule: + if (post_act_handle) + mlx5e_tc_post_act_del(tc_psample->post_act, post_act_handle); +err_post_act: kfree(sample_flow); return ERR_PTR(err); } void -mlx5_esw_sample_unoffload(struct mlx5_esw_psample *esw_psample, +mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample, struct mlx5_flow_handle *rule, struct mlx5_flow_attr *attr) { struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; - struct mlx5_sample_flow *sample_flow; + struct mlx5e_sample_flow *sample_flow; struct mlx5_vport_tbl_attr tbl_attr; - struct mlx5_flow_attr *pre_attr; struct mlx5_eswitch *esw; - if (IS_ERR_OR_NULL(esw_psample)) + if (IS_ERR_OR_NULL(tc_psample)) return; /* If slow path flag is set, sample action is not offloaded. * No need to delete sample rule. */ - esw = esw_psample->priv->mdev->priv.eswitch; + esw = tc_psample->esw; if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) { mlx5_eswitch_del_offloaded_rule(esw, rule, attr); return; } - sample_flow = esw_attr->sample->sample_flow; - pre_attr = sample_flow->pre_attr; - memset(pre_attr, 0, sizeof(*pre_attr)); - esw = esw_psample->priv->mdev->priv.eswitch; - mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, pre_attr); - mlx5_eswitch_del_offloaded_rule(esw, sample_flow->rule, attr); + /* The following delete order can't be changed, otherwise, + * will hit fw syndromes. + */ + sample_flow = attr->sample_attr->sample_flow; + mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, sample_flow->pre_attr); + if (!sample_flow->post_act_handle) + mlx5_eswitch_del_offloaded_rule(esw, sample_flow->post_rule, + sample_flow->post_attr); - sample_restore_put(esw_psample, sample_flow->restore); - mapping_remove(esw->offloads.reg_c0_obj_pool, esw_attr->sample->restore_obj_id); - sampler_put(esw_psample, sample_flow->sampler); - tbl_attr.chain = attr->chain; - tbl_attr.prio = attr->prio; - tbl_attr.vport = esw_attr->in_rep->vport; - tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns; - mlx5_esw_vporttbl_put(esw, &tbl_attr); + sample_restore_put(tc_psample, sample_flow->restore); + mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr->restore_obj_id); + sampler_put(tc_psample, sample_flow->sampler); + if (sample_flow->post_act_handle) { + mlx5e_tc_post_act_del(tc_psample->post_act, sample_flow->post_act_handle); + } else { + tbl_attr.chain = attr->chain; + tbl_attr.prio = attr->prio; + tbl_attr.vport = esw_attr->in_rep->vport; + tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns; + mlx5_esw_vporttbl_put(esw, &tbl_attr); + kfree(sample_flow->post_attr); + } - kfree(pre_attr->esw_attr->sample); - kfree(pre_attr); + kfree(sample_flow->pre_attr); kfree(sample_flow); } -struct mlx5_esw_psample * -mlx5_esw_sample_init(struct mlx5e_priv *priv) +struct mlx5e_tc_psample * +mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act) { - struct mlx5_esw_psample *esw_psample; + struct mlx5e_tc_psample *tc_psample; int err; - esw_psample = kzalloc(sizeof(*esw_psample), GFP_KERNEL); - if (!esw_psample) + tc_psample = kzalloc(sizeof(*tc_psample), GFP_KERNEL); + if (!tc_psample) return ERR_PTR(-ENOMEM); - esw_psample->priv = priv; - err = sampler_termtbl_create(esw_psample); + if (IS_ERR_OR_NULL(post_act)) { + err = PTR_ERR(post_act); + goto err_post_act; + } + tc_psample->post_act = post_act; + tc_psample->esw = esw; + err = sampler_termtbl_create(tc_psample); if (err) - goto err_termtbl; + goto err_post_act; - mutex_init(&esw_psample->ht_lock); - mutex_init(&esw_psample->restore_lock); + mutex_init(&tc_psample->ht_lock); + mutex_init(&tc_psample->restore_lock); - return esw_psample; + return tc_psample; -err_termtbl: - kfree(esw_psample); +err_post_act: + kfree(tc_psample); return ERR_PTR(err); } void -mlx5_esw_sample_cleanup(struct mlx5_esw_psample *esw_psample) +mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample) { - if (IS_ERR_OR_NULL(esw_psample)) + if (IS_ERR_OR_NULL(tc_psample)) return; - mutex_destroy(&esw_psample->restore_lock); - mutex_destroy(&esw_psample->ht_lock); - sampler_termtbl_destroy(esw_psample); - kfree(esw_psample); + mutex_destroy(&tc_psample->restore_lock); + mutex_destroy(&tc_psample->ht_lock); + sampler_termtbl_destroy(tc_psample); + kfree(tc_psample); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h new file mode 100644 index 000000000000..db0146df9b30 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_SAMPLE_H__ +#define __MLX5_EN_TC_SAMPLE_H__ + +#include "eswitch.h" + +struct mlx5_flow_attr; +struct mlx5e_tc_psample; +struct mlx5e_post_act; + +struct mlx5e_sample_attr { + u32 group_num; + u32 rate; + u32 trunc_size; + u32 restore_obj_id; + u32 sampler_id; + struct mlx5e_sample_flow *sample_flow; +}; + +void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj); + +struct mlx5_flow_handle * +mlx5e_tc_sample_offload(struct mlx5e_tc_psample *sample_priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, + u32 tunnel_id); + +void +mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *sample_priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr); + +struct mlx5e_tc_psample * +mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act); + +void +mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample); + +#endif /* __MLX5_EN_TC_SAMPLE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index b1707b86aa16..6c949abcd2e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -19,6 +19,7 @@ #include "en/tc_ct.h" #include "en/mod_hdr.h" #include "en/mapping.h" +#include "en/tc/post_act.h" #include "en.h" #include "en_tc.h" #include "en_rep.h" @@ -32,10 +33,6 @@ #define MLX5_CT_STATE_RELATED_BIT BIT(5) #define MLX5_CT_STATE_INVALID_BIT BIT(6) -#define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen) -#define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0) -#define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX - #define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen) #define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0) @@ -46,14 +43,13 @@ struct mlx5_tc_ct_priv { struct mlx5_core_dev *dev; const struct net_device *netdev; struct mod_hdr_tbl *mod_hdr_tbl; - struct idr fte_ids; struct xarray tuple_ids; struct rhashtable zone_ht; struct rhashtable ct_tuples_ht; struct rhashtable ct_tuples_nat_ht; struct mlx5_flow_table *ct; struct mlx5_flow_table *ct_nat; - struct mlx5_flow_table *post_ct; + struct mlx5e_post_act *post_act; struct mutex control_lock; /* guards parallel adds/dels */ struct mapping_ctx *zone_mapping; struct mapping_ctx *labels_mapping; @@ -64,11 +60,9 @@ struct mlx5_tc_ct_priv { struct mlx5_ct_flow { struct mlx5_flow_attr *pre_ct_attr; - struct mlx5_flow_attr *post_ct_attr; struct mlx5_flow_handle *pre_ct_rule; - struct mlx5_flow_handle *post_ct_rule; + struct mlx5e_post_act_handle *post_act_handle; struct mlx5_ct_ft *ft; - u32 fte_id; u32 chain_mapping; }; @@ -768,7 +762,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; attr->dest_chain = 0; - attr->dest_ft = ct_priv->post_ct; + attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act); attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct; attr->outer_match_level = MLX5_MATCH_L4; attr->counter = entry->counter->counter; @@ -1432,7 +1426,7 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft, ctstate |= MLX5_CT_STATE_NAT_BIT; mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate); - dest.ft = ct_priv->post_ct; + dest.ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act); rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -1716,9 +1710,9 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) * | do decap * v * +---------------------+ - * + pre_ct/pre_ct_nat + if matches +---------------------+ - * + zone+nat match +---------------->+ post_ct (see below) + - * +---------------------+ set zone +---------------------+ + * + pre_ct/pre_ct_nat + if matches +-------------------------+ + * + zone+nat match +---------------->+ post_act (see below) + + * +---------------------+ set zone +-------------------------+ * | set zone * v * +--------------------+ @@ -1732,7 +1726,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) * | do nat (if needed) * v * +--------------+ - * + post_ct + original filter actions + * + post_act + original filter actions * + fte_id match +------------------------> * +--------------+ */ @@ -1746,19 +1740,15 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type); - struct mlx5_flow_spec *post_ct_spec = NULL; + struct mlx5e_post_act_handle *handle; struct mlx5_flow_attr *pre_ct_attr; struct mlx5_modify_hdr *mod_hdr; - struct mlx5_flow_handle *rule; struct mlx5_ct_flow *ct_flow; int chain_mapping = 0, err; struct mlx5_ct_ft *ft; - u32 fte_id = 1; - post_ct_spec = kvzalloc(sizeof(*post_ct_spec), GFP_KERNEL); ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); - if (!post_ct_spec || !ct_flow) { - kvfree(post_ct_spec); + if (!ct_flow) { kfree(ct_flow); return ERR_PTR(-ENOMEM); } @@ -1773,14 +1763,13 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, } ct_flow->ft = ft; - err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id, - MLX5_FTE_ID_MAX, GFP_KERNEL); - if (err) { - netdev_warn(priv->netdev, - "Failed to allocate fte id, err: %d\n", err); - goto err_idr; + handle = mlx5e_tc_post_act_add(ct_priv->post_act, attr); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + ct_dbg("Failed to allocate post action handle"); + goto err_post_act_handle; } - ct_flow->fte_id = fte_id; + ct_flow->post_act_handle = handle; /* Base flow attributes of both rules on original rule attribute */ ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); @@ -1789,15 +1778,8 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, goto err_alloc_pre; } - ct_flow->post_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); - if (!ct_flow->post_ct_attr) { - err = -ENOMEM; - goto err_alloc_post; - } - pre_ct_attr = ct_flow->pre_ct_attr; memcpy(pre_ct_attr, attr, attr_sz); - memcpy(ct_flow->post_ct_attr, attr, attr_sz); /* Modify the original rule's action to fwd and modify, leave decap */ pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP; @@ -1823,10 +1805,9 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, goto err_mapping; } - err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type, - FTEID_TO_REG, fte_id); + err = mlx5e_tc_post_act_set_handle(priv->mdev, handle, &pre_mod_acts); if (err) { - ct_dbg("Failed to set fte_id register mapping"); + ct_dbg("Failed to set post action handle"); goto err_mapping; } @@ -1857,33 +1838,6 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, } pre_ct_attr->modify_hdr = mod_hdr; - /* Post ct rule matches on fte_id and executes original rule's - * tc rule action - */ - mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG, - fte_id, MLX5_FTE_ID_MASK); - - /* Put post_ct rule on post_ct flow table */ - ct_flow->post_ct_attr->chain = 0; - ct_flow->post_ct_attr->prio = 0; - ct_flow->post_ct_attr->ft = ct_priv->post_ct; - - /* Splits were handled before CT */ - if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) - ct_flow->post_ct_attr->esw_attr->split_count = 0; - - ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE; - ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE; - ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP); - rule = mlx5_tc_rule_insert(priv, post_ct_spec, - ct_flow->post_ct_attr); - ct_flow->post_ct_rule = rule; - if (IS_ERR(ct_flow->post_ct_rule)) { - err = PTR_ERR(ct_flow->post_ct_rule); - ct_dbg("Failed to add post ct rule"); - goto err_insert_post_ct; - } - /* Change original rule point to ct table */ pre_ct_attr->dest_chain = 0; pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft; @@ -1897,28 +1851,21 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, attr->ct_attr.ct_flow = ct_flow; dealloc_mod_hdr_actions(&pre_mod_acts); - kvfree(post_ct_spec); - return rule; + return ct_flow->pre_ct_rule; err_insert_orig: - mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule, - ct_flow->post_ct_attr); -err_insert_post_ct: mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); err_mapping: dealloc_mod_hdr_actions(&pre_mod_acts); mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); err_get_chain: - kfree(ct_flow->post_ct_attr); -err_alloc_post: kfree(ct_flow->pre_ct_attr); err_alloc_pre: - idr_remove(&ct_priv->fte_ids, fte_id); -err_idr: + mlx5e_tc_post_act_del(ct_priv->post_act, handle); +err_post_act_handle: mlx5_tc_ct_del_ft_cb(ct_priv, ft); err_ft: - kvfree(post_ct_spec); kfree(ct_flow); netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err); return ERR_PTR(err); @@ -2029,16 +1976,13 @@ __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, pre_ct_attr); mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); - if (ct_flow->post_ct_rule) { - mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule, - ct_flow->post_ct_attr); + if (ct_flow->post_act_handle) { mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); - idr_remove(&ct_priv->fte_ids, ct_flow->fte_id); + mlx5e_tc_post_act_del(ct_priv->post_act, ct_flow->post_act_handle); mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft); } kfree(ct_flow->pre_ct_attr); - kfree(ct_flow->post_ct_attr); kfree(ct_flow); } @@ -2064,11 +2008,6 @@ static int mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw, const char **err_msg) { - if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) { - *err_msg = "firmware level support is missing"; - return -EOPNOTSUPP; - } - if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) { /* vlan workaround should be avoided for multi chain rules. * This is just a sanity check as pop vlan action should @@ -2097,21 +2036,10 @@ mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw, return 0; } -static int -mlx5_tc_ct_init_check_nic_support(struct mlx5e_priv *priv, - const char **err_msg) -{ - if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) { - *err_msg = "firmware level support is missing"; - return -EOPNOTSUPP; - } - - return 0; -} - static int mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv, enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act, const char **err_msg) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -2122,10 +2050,14 @@ mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv, *err_msg = "tc skb extension missing"; return -EOPNOTSUPP; #endif + if (IS_ERR_OR_NULL(post_act)) { + *err_msg = "tc ct offload not supported, post action is missing"; + return -EOPNOTSUPP; + } + if (ns_type == MLX5_FLOW_NAMESPACE_FDB) return mlx5_tc_ct_init_check_esw_support(esw, err_msg); - else - return mlx5_tc_ct_init_check_nic_support(priv, err_msg); + return 0; } #define INIT_ERR_PREFIX "tc ct offload init failed" @@ -2133,7 +2065,8 @@ mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv, struct mlx5_tc_ct_priv * mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, struct mod_hdr_tbl *mod_hdr, - enum mlx5_flow_namespace_type ns_type) + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act) { struct mlx5_tc_ct_priv *ct_priv; struct mlx5_core_dev *dev; @@ -2142,11 +2075,9 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, int err; dev = priv->mdev; - err = mlx5_tc_ct_init_check_support(priv, ns_type, &msg); + err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act, &msg); if (err) { - mlx5_core_warn(dev, - "tc ct offload not supported, %s\n", - msg); + mlx5_core_warn(dev, "tc ct offload not supported, %s\n", msg); goto err_support; } @@ -2194,16 +2125,7 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, goto err_ct_nat_tbl; } - ct_priv->post_ct = mlx5_chains_create_global_table(chains); - if (IS_ERR(ct_priv->post_ct)) { - err = PTR_ERR(ct_priv->post_ct); - mlx5_core_warn(dev, - "%s, failed to create post ct table err: %d\n", - INIT_ERR_PREFIX, err); - goto err_post_ct_tbl; - } - - idr_init(&ct_priv->fte_ids); + ct_priv->post_act = post_act; mutex_init(&ct_priv->control_lock); rhashtable_init(&ct_priv->zone_ht, &zone_params); rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params); @@ -2211,8 +2133,6 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, return ct_priv; -err_post_ct_tbl: - mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat); err_ct_nat_tbl: mlx5_chains_destroy_global_table(chains, ct_priv->ct); err_ct_tbl: @@ -2237,7 +2157,6 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv) chains = ct_priv->chains; - mlx5_chains_destroy_global_table(chains, ct_priv->post_ct); mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat); mlx5_chains_destroy_global_table(chains, ct_priv->ct); mapping_destroy(ct_priv->zone_mapping); @@ -2247,7 +2166,6 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv) rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); rhashtable_destroy(&ct_priv->zone_ht); mutex_destroy(&ct_priv->control_lock); - idr_destroy(&ct_priv->fte_ids); kfree(ct_priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 644cf1641cde..363329f4aac6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -92,7 +92,8 @@ struct mlx5_ct_attr { struct mlx5_tc_ct_priv * mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, struct mod_hdr_tbl *mod_hdr, - enum mlx5_flow_namespace_type ns_type); + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act); void mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv); @@ -132,7 +133,8 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, static inline struct mlx5_tc_ct_priv * mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, struct mod_hdr_tbl *mod_hdr, - enum mlx5_flow_namespace_type ns_type) + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act) { return NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 756f806401d7..48a203a9e7d9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -60,6 +60,7 @@ struct mlx5e_neigh_update_table { struct mlx5_tc_ct_priv; struct mlx5e_rep_bond; struct mlx5e_tc_tun_encap; +struct mlx5e_post_act; struct mlx5_rep_uplink_priv { /* Filters DB - instantiated by the uplink representor and shared by @@ -88,8 +89,9 @@ struct mlx5_rep_uplink_priv { /* maps tun_enc_opts to a unique id*/ struct mapping_ctx *tunnel_enc_opts_mapping; + struct mlx5e_post_act *post_act; struct mlx5_tc_ct_priv *ct_priv; - struct mlx5_esw_psample *esw_psample; + struct mlx5e_tc_psample *tc_psample; /* support eswitch vports bonding */ struct mlx5e_rep_bond *bond; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9465a51b6e66..6603d9c823a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -47,6 +47,7 @@ #include #include #include "en.h" +#include "en/tc/post_act.h" #include "en_rep.h" #include "en/rep/tc.h" #include "en/rep/neigh.h" @@ -60,7 +61,7 @@ #include "en/mod_hdr.h" #include "en/tc_priv.h" #include "en/tc_tun_encap.h" -#include "esw/sample.h" +#include "en/tc/sample.h" #include "lib/devcom.h" #include "lib/geneve.h" #include "lib/fs_chains.h" @@ -246,7 +247,7 @@ get_ct_priv(struct mlx5e_priv *priv) } #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) -static struct mlx5_esw_psample * +static struct mlx5e_tc_psample * get_sample_priv(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -257,7 +258,7 @@ get_sample_priv(struct mlx5e_priv *priv) uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); uplink_priv = &uplink_rpriv->uplink_priv; - return uplink_priv->esw_psample; + return uplink_priv->tc_psample; } return NULL; @@ -1147,7 +1148,8 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, mod_hdr_acts); #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) } else if (flow_flag_test(flow, SAMPLE)) { - rule = mlx5_esw_sample_offload(get_sample_priv(flow->priv), spec, attr); + rule = mlx5e_tc_sample_offload(get_sample_priv(flow->priv), spec, attr, + mlx5e_tc_get_flow_tun_id(flow)); #endif } else { rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); @@ -1186,7 +1188,7 @@ void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) if (flow_flag_test(flow, SAMPLE)) { - mlx5_esw_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr); + mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr); return; } #endif @@ -1550,6 +1552,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, else mlx5e_detach_mod_hdr(priv, flow); } + kfree(attr->sample_attr); kvfree(attr->parse_attr); kvfree(attr->esw_attr->rx_tun_attr); @@ -1559,7 +1562,6 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, if (flow_flag_test(flow, L3_TO_L2_DECAP)) mlx5e_detach_decap(priv, flow); - kfree(flow->attr->esw_attr->sample); kfree(flow->attr); } @@ -1624,17 +1626,22 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, } } -static int flow_has_tc_fwd_action(struct flow_cls_offload *f) +static bool flow_requires_tunnel_mapping(u32 chain, struct flow_cls_offload *f) { struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct flow_action *flow_action = &rule->action; const struct flow_action_entry *act; int i; + if (chain) + return false; + flow_action_for_each(i, act, flow_action) { switch (act->id) { case FLOW_ACTION_GOTO: return true; + case FLOW_ACTION_SAMPLE: + return true; default: continue; } @@ -1875,7 +1882,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, return -EOPNOTSUPP; needs_mapping = !!flow->attr->chain; - sets_mapping = !flow->attr->chain && flow_has_tc_fwd_action(f); + sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f); *match_inner = !needs_mapping; if ((needs_mapping || sets_mapping) && @@ -3716,13 +3723,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_sample_attr sample_attr = {}; const struct ip_tunnel_info *info = NULL; struct mlx5_flow_attr *attr = flow->attr; int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS]; bool ft_flow = mlx5e_is_ft_flow(flow); const struct flow_action_entry *act; struct mlx5_esw_flow_attr *esw_attr; - struct mlx5_sample_attr sample = {}; bool encap = false, decap = false; u32 action = attr->action; int err, i, if_count = 0; @@ -3993,10 +4000,10 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported"); return -EOPNOTSUPP; } - sample.rate = act->sample.rate; - sample.group_num = act->sample.psample_group->group_num; + sample_attr.rate = act->sample.rate; + sample_attr.group_num = act->sample.psample_group->group_num; if (act->sample.truncate) - sample.trunc_size = act->sample.trunc_size; + sample_attr.trunc_size = act->sample.trunc_size; flow_flag_set(flow, SAMPLE); break; default: @@ -4081,10 +4088,10 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, * no errors after parsing. */ if (flow_flag_test(flow, SAMPLE)) { - esw_attr->sample = kzalloc(sizeof(*esw_attr->sample), GFP_KERNEL); - if (!esw_attr->sample) + attr->sample_attr = kzalloc(sizeof(*attr->sample_attr), GFP_KERNEL); + if (!attr->sample_attr) return -ENOMEM; - *esw_attr->sample = sample; + *attr->sample_attr = sample_attr; } return 0; @@ -4682,7 +4689,7 @@ static int apply_police_params(struct mlx5e_priv *priv, u64 rate, rate_mbps = max_t(u32, rate, 1); } - err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps); + err = mlx5_esw_qos_modify_vport_rate(esw, vport_num, rate_mbps); if (err) NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware"); @@ -4895,8 +4902,9 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) goto err_chains; } + tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL); tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr, - MLX5_FLOW_NAMESPACE_KERNEL); + MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act); tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; err = register_netdevice_notifier_dev_net(priv->netdev, @@ -4912,6 +4920,7 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) err_reg: mlx5_tc_ct_clean(tc->ct); + mlx5e_tc_post_act_destroy(tc->post_act); mlx5_chains_destroy(tc->chains); err_chains: mapping_destroy(chains_mapping); @@ -4950,6 +4959,7 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) mutex_destroy(&tc->t_lock); mlx5_tc_ct_clean(tc->ct); + mlx5e_tc_post_act_destroy(tc->post_act); mapping_destroy(tc->mapping); mlx5_chains_destroy(tc->chains); } @@ -4970,13 +4980,16 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) priv = netdev_priv(rpriv->netdev); esw = priv->mdev->priv.eswitch; + uplink_priv->post_act = mlx5e_tc_post_act_init(priv, esw_chains(esw), + MLX5_FLOW_NAMESPACE_FDB); uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev), esw_chains(esw), &esw->offloads.mod_hdr, - MLX5_FLOW_NAMESPACE_FDB); + MLX5_FLOW_NAMESPACE_FDB, + uplink_priv->post_act); #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) - uplink_priv->esw_psample = mlx5_esw_sample_init(netdev_priv(priv->netdev)); + uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act); #endif mapping_id = mlx5_query_nic_system_image_guid(esw->dev); @@ -5022,11 +5035,12 @@ err_enc_opts_mapping: mapping_destroy(uplink_priv->tunnel_mapping); err_tun_mapping: #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) - mlx5_esw_sample_cleanup(uplink_priv->esw_psample); + mlx5e_tc_sample_cleanup(uplink_priv->tc_psample); #endif mlx5_tc_ct_clean(uplink_priv->ct_priv); netdev_warn(priv->netdev, "Failed to initialize tc (eswitch), err: %d", err); + mlx5e_tc_post_act_destroy(uplink_priv->post_act); return err; } @@ -5043,9 +5057,10 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) mapping_destroy(uplink_priv->tunnel_mapping); #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) - mlx5_esw_sample_cleanup(uplink_priv->esw_psample); + mlx5e_tc_sample_cleanup(uplink_priv->tc_psample); #endif mlx5_tc_ct_clean(uplink_priv->ct_priv); + mlx5e_tc_post_act_destroy(uplink_priv->post_act); } int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index f7cbeb0b66d2..1a4cd882f0fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -70,6 +70,7 @@ struct mlx5_flow_attr { struct mlx5_fc *counter; struct mlx5_modify_hdr *modify_hdr; struct mlx5_ct_attr ct_attr; + struct mlx5e_sample_attr *sample_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; u32 chain; u16 prio; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index 1703384eca95..20af557ae30c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -91,9 +91,15 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_ if (err) goto reg_err; + err = devlink_rate_leaf_create(dl_port, vport); + if (err) + goto rate_err; + vport->dl_port = dl_port; return 0; +rate_err: + devlink_port_unregister(dl_port); reg_err: mlx5_esw_dl_port_free(dl_port); return err; @@ -109,6 +115,12 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vpo vport = mlx5_eswitch_get_vport(esw, vport_num); if (IS_ERR(vport)) return; + + if (vport->dl_port->devlink_rate) { + mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL); + devlink_rate_leaf_destroy(vport->dl_port); + } + devlink_port_unregister(vport->dl_port); mlx5_esw_dl_port_free(vport->dl_port); vport->dl_port = NULL; @@ -148,8 +160,16 @@ int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_p if (err) return err; + err = devlink_rate_leaf_create(dl_port, vport); + if (err) + goto rate_err; + vport->dl_port = dl_port; return 0; + +rate_err: + devlink_port_unregister(dl_port); + return err; } void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num) @@ -159,6 +179,12 @@ void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num vport = mlx5_eswitch_get_vport(esw, vport_num); if (IS_ERR(vport)) return; + + if (vport->dl_port->devlink_rate) { + mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL); + devlink_rate_leaf_destroy(vport->dl_port); + } + devlink_port_unregister(vport->dl_port); vport->dl_port = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h new file mode 100644 index 000000000000..458baf0c6415 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_ESW_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_ESW_TP_ + +#include +#include "eswitch.h" + +TRACE_EVENT(mlx5_esw_vport_qos_destroy, + TP_PROTO(const struct mlx5_vport *vport), + TP_ARGS(vport), + TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device)) + __field(unsigned short, vport_id) + __field(unsigned int, tsar_ix) + ), + TP_fast_assign(__assign_str(devname, dev_name(vport->dev->device)); + __entry->vport_id = vport->vport; + __entry->tsar_ix = vport->qos.esw_tsar_ix; + ), + TP_printk("(%s) vport=%hu tsar_ix=%u\n", + __get_str(devname), __entry->vport_id, __entry->tsar_ix + ) +); + +DECLARE_EVENT_CLASS(mlx5_esw_vport_qos_template, + TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate), + TP_ARGS(vport, bw_share, max_rate), + TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device)) + __field(unsigned short, vport_id) + __field(unsigned int, tsar_ix) + __field(unsigned int, bw_share) + __field(unsigned int, max_rate) + __field(void *, group) + ), + TP_fast_assign(__assign_str(devname, dev_name(vport->dev->device)); + __entry->vport_id = vport->vport; + __entry->tsar_ix = vport->qos.esw_tsar_ix; + __entry->bw_share = bw_share; + __entry->max_rate = max_rate; + __entry->group = vport->qos.group; + ), + TP_printk("(%s) vport=%hu tsar_ix=%u bw_share=%u, max_rate=%u group=%p\n", + __get_str(devname), __entry->vport_id, __entry->tsar_ix, + __entry->bw_share, __entry->max_rate, __entry->group + ) +); + +DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_create, + TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate), + TP_ARGS(vport, bw_share, max_rate) + ); + +DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_config, + TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate), + TP_ARGS(vport, bw_share, max_rate) + ); + +DECLARE_EVENT_CLASS(mlx5_esw_group_qos_template, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_esw_rate_group *group, + unsigned int tsar_ix), + TP_ARGS(dev, group, tsar_ix), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(const void *, group) + __field(unsigned int, tsar_ix) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->group = group; + __entry->tsar_ix = tsar_ix; + ), + TP_printk("(%s) group=%p tsar_ix=%u\n", + __get_str(devname), __entry->group, __entry->tsar_ix + ) +); + +DEFINE_EVENT(mlx5_esw_group_qos_template, mlx5_esw_group_qos_create, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_esw_rate_group *group, + unsigned int tsar_ix), + TP_ARGS(dev, group, tsar_ix) + ); + +DEFINE_EVENT(mlx5_esw_group_qos_template, mlx5_esw_group_qos_destroy, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_esw_rate_group *group, + unsigned int tsar_ix), + TP_ARGS(dev, group, tsar_ix) + ); + +TRACE_EVENT(mlx5_esw_group_qos_config, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_esw_rate_group *group, + unsigned int tsar_ix, u32 bw_share, u32 max_rate), + TP_ARGS(dev, group, tsar_ix, bw_share, max_rate), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(const void *, group) + __field(unsigned int, tsar_ix) + __field(unsigned int, bw_share) + __field(unsigned int, max_rate) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->group = group; + __entry->tsar_ix = tsar_ix; + __entry->bw_share = bw_share; + __entry->max_rate = max_rate; + ), + TP_printk("(%s) group=%p tsar_ix=%u bw_share=%u max_rate=%u\n", + __get_str(devname), __entry->group, __entry->tsar_ix, + __entry->bw_share, __entry->max_rate + ) +); +#endif /* _MLX5_ESW_TP_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH esw/diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE qos_tracepoint +#include diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c index d9041b16611d..df277a6cddc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -11,6 +11,7 @@ #include "mlx5_core.h" #include "eswitch.h" #include "fs_core.h" +#include "esw/qos.h" enum { LEGACY_VEPA_PRIO = 0, @@ -508,3 +509,22 @@ unlock: mutex_unlock(&esw->state_lock); return err; } + +int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, + u32 max_rate, u32 min_rate) +{ + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + int err; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + if (IS_ERR(evport)) + return PTR_ERR(evport); + + mutex_lock(&esw->state_lock); + err = mlx5_esw_qos_set_vport_min_rate(esw, evport, min_rate, NULL); + if (!err) + err = mlx5_esw_qos_set_vport_max_rate(esw, evport, max_rate, NULL); + mutex_unlock(&esw->state_lock); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c new file mode 100644 index 000000000000..985e305179d1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -0,0 +1,869 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "eswitch.h" +#include "esw/qos.h" +#include "en/port.h" +#define CREATE_TRACE_POINTS +#include "diag/qos_tracepoint.h" + +/* Minimum supported BW share value by the HW is 1 Mbit/sec */ +#define MLX5_MIN_BW_SHARE 1 + +#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ + min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit) + +struct mlx5_esw_rate_group { + u32 tsar_ix; + u32 max_rate; + u32 min_rate; + u32 bw_share; + struct list_head list; +}; + +static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx, + u32 parent_ix, u32 tsar_ix, + u32 max_rate, u32 bw_share) +{ + u32 bitmask = 0; + + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) + return -EOPNOTSUPP; + + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_ix); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; + + return mlx5_modify_scheduling_element_cmd(dev, + SCHEDULING_HIERARCHY_E_SWITCH, + sched_ctx, + tsar_ix, + bitmask); +} + +static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, + u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_core_dev *dev = esw->dev; + int err; + + err = esw_qos_tsar_config(dev, sched_ctx, + esw->qos.root_tsar_ix, group->tsar_ix, + max_rate, bw_share); + if (err) + NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed"); + + trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate); + + return err; +} + +static int esw_qos_vport_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + u32 max_rate, u32 bw_share, + struct netlink_ext_ack *extack) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_esw_rate_group *group = vport->qos.group; + struct mlx5_core_dev *dev = esw->dev; + u32 parent_tsar_ix; + void *vport_elem; + int err; + + if (!vport->qos.enabled) + return -EIO; + + parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix; + MLX5_SET(scheduling_context, sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); + vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, + element_attributes); + MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); + + err = esw_qos_tsar_config(dev, sched_ctx, parent_tsar_ix, vport->qos.esw_tsar_ix, + max_rate, bw_share); + if (err) { + esw_warn(esw->dev, + "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n", + vport->vport, err); + NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed"); + return err; + } + + trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate); + + return 0; +} + +static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + bool group_level) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + struct mlx5_vport *evport; + u32 max_guarantee = 0; + unsigned long i; + + if (group_level) { + struct mlx5_esw_rate_group *group; + + list_for_each_entry(group, &esw->qos.groups, list) { + if (group->min_rate < max_guarantee) + continue; + max_guarantee = group->min_rate; + } + } else { + mlx5_esw_for_each_vport(esw, i, evport) { + if (!evport->enabled || !evport->qos.enabled || + evport->qos.group != group || evport->qos.min_rate < max_guarantee) + continue; + max_guarantee = evport->qos.min_rate; + } + } + + if (max_guarantee) + return max_t(u32, max_guarantee / fw_max_bw_share, 1); + + /* If vports min rate divider is 0 but their group has bw_share configured, then + * need to set bw_share for vports to minimal value. + */ + if (!group_level && !max_guarantee && group->bw_share) + return 1; + return 0; +} + +static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max) +{ + if (divider) + return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max); + + return 0; +} + +static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false); + struct mlx5_vport *evport; + unsigned long i; + u32 bw_share; + int err; + + mlx5_esw_for_each_vport(esw, i, evport) { + if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group) + continue; + bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share); + + if (bw_share == evport->qos.bw_share) + continue; + + err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack); + if (err) + return err; + + evport->qos.bw_share = bw_share; + } + + return 0; +} + +static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider, + struct netlink_ext_ack *extack) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + struct mlx5_esw_rate_group *group; + u32 bw_share; + int err; + + list_for_each_entry(group, &esw->qos.groups, list) { + bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share); + + if (bw_share == group->bw_share) + continue; + + err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack); + if (err) + return err; + + group->bw_share = bw_share; + + /* All the group's vports need to be set with default bw_share + * to enable them with QOS + */ + err = esw_qos_normalize_vports_min_rate(esw, group, extack); + + if (err) + return err; + } + + return 0; +} + +int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, + struct mlx5_vport *evport, + u32 min_rate, + struct netlink_ext_ack *extack) +{ + u32 fw_max_bw_share, previous_min_rate; + bool min_rate_supported; + int err; + + lockdep_assert_held(&esw->state_lock); + fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && + fw_max_bw_share >= MLX5_MIN_BW_SHARE; + if (min_rate && !min_rate_supported) + return -EOPNOTSUPP; + if (min_rate == evport->qos.min_rate) + return 0; + + previous_min_rate = evport->qos.min_rate; + evport->qos.min_rate = min_rate; + err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack); + if (err) + evport->qos.min_rate = previous_min_rate; + + return err; +} + +int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, + struct mlx5_vport *evport, + u32 max_rate, + struct netlink_ext_ack *extack) +{ + u32 act_max_rate = max_rate; + bool max_rate_supported; + int err; + + lockdep_assert_held(&esw->state_lock); + max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); + + if (max_rate && !max_rate_supported) + return -EOPNOTSUPP; + if (max_rate == evport->qos.max_rate) + return 0; + + /* If parent group has rate limit need to set to group + * value when new max rate is 0. + */ + if (evport->qos.group && !max_rate) + act_max_rate = evport->qos.group->max_rate; + + err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack); + + if (!err) + evport->qos.max_rate = max_rate; + + return err; +} + +static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, + u32 min_rate, struct netlink_ext_ack *extack) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + struct mlx5_core_dev *dev = esw->dev; + u32 previous_min_rate, divider; + int err; + + if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE)) + return -EOPNOTSUPP; + + if (min_rate == group->min_rate) + return 0; + + previous_min_rate = group->min_rate; + group->min_rate = min_rate; + divider = esw_qos_calculate_min_rate_divider(esw, group, true); + err = esw_qos_normalize_groups_min_rate(esw, divider, extack); + if (err) { + group->min_rate = previous_min_rate; + NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed"); + + /* Attempt restoring previous configuration */ + divider = esw_qos_calculate_min_rate_divider(esw, group, true); + if (esw_qos_normalize_groups_min_rate(esw, divider, extack)) + NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed"); + } + + return err; +} + +static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + u32 max_rate, struct netlink_ext_ack *extack) +{ + struct mlx5_vport *vport; + unsigned long i; + int err; + + if (group->max_rate == max_rate) + return 0; + + err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack); + if (err) + return err; + + group->max_rate = max_rate; + + /* Any unlimited vports in the group should be set + * with the value of the group. + */ + mlx5_esw_for_each_vport(esw, i, vport) { + if (!vport->enabled || !vport->qos.enabled || + vport->qos.group != group || vport->qos.max_rate) + continue; + + err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack); + if (err) + NL_SET_ERR_MSG_MOD(extack, + "E-Switch vport implicit rate limit setting failed"); + } + + return err; +} + +static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + u32 max_rate, u32 bw_share) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_esw_rate_group *group = vport->qos.group; + struct mlx5_core_dev *dev = esw->dev; + u32 parent_tsar_ix; + void *vport_elem; + int err; + + parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix; + MLX5_SET(scheduling_context, sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); + vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); + MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + + err = mlx5_create_scheduling_element_cmd(dev, + SCHEDULING_HIERARCHY_E_SWITCH, + sched_ctx, + &vport->qos.esw_tsar_ix); + if (err) { + esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n", + vport->vport, err); + return err; + } + + return 0; +} + +static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_esw_rate_group *curr_group, + struct mlx5_esw_rate_group *new_group, + struct netlink_ext_ack *extack) +{ + u32 max_rate; + int err; + + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + vport->qos.esw_tsar_ix); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed"); + return err; + } + + vport->qos.group = new_group; + max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate; + + /* If vport is unlimited, we set the group's value. + * Therefore, if the group is limited it will apply to + * the vport as well and if not, vport will remain unlimited. + */ + err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed."); + goto err_sched; + } + + return 0; + +err_sched: + vport->qos.group = curr_group; + max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate; + if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share)) + esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n", + vport->vport); + + return err; +} + +static int esw_qos_vport_update_group(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_rate_group *new_group, *curr_group; + int err; + + if (!vport->enabled) + return -EINVAL; + + curr_group = vport->qos.group; + new_group = group ?: esw->qos.group0; + if (curr_group == new_group) + return 0; + + err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack); + if (err) + return err; + + /* Recalculate bw share weights of old and new groups */ + if (vport->qos.bw_share) { + esw_qos_normalize_vports_min_rate(esw, curr_group, extack); + esw_qos_normalize_vports_min_rate(esw, new_group, extack); + } + + return 0; +} + +static struct mlx5_esw_rate_group * +esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) +{ + u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_esw_rate_group *group; + u32 divider; + int err; + + if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth)) + return ERR_PTR(-EOPNOTSUPP); + + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) + return ERR_PTR(-ENOMEM); + + MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, + esw->qos.root_tsar_ix); + err = mlx5_create_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + tsar_ctx, + &group->tsar_ix); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed"); + goto err_sched_elem; + } + + list_add_tail(&group->list, &esw->qos.groups); + + divider = esw_qos_calculate_min_rate_divider(esw, group, true); + if (divider) { + err = esw_qos_normalize_groups_min_rate(esw, divider, extack); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed"); + goto err_min_rate; + } + } + trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix); + + return group; + +err_min_rate: + list_del(&group->list); + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + group->tsar_ix); + if (err) + NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed"); +err_sched_elem: + kfree(group); + return ERR_PTR(err); +} + +static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) +{ + u32 divider; + int err; + + list_del(&group->list); + + divider = esw_qos_calculate_min_rate_divider(esw, NULL, true); + err = esw_qos_normalize_groups_min_rate(esw, divider, extack); + if (err) + NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed"); + + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + group->tsar_ix); + if (err) + NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed"); + + trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix); + kfree(group); + return err; +} + +static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type) +{ + switch (type) { + case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_TASR; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_VPORT; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_VPORT_TC; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; + } + return false; +} + +void mlx5_esw_qos_create(struct mlx5_eswitch *esw) +{ + u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_core_dev *dev = esw->dev; + __be32 *attr; + int err; + + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) + return; + + if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR)) + return; + + mutex_lock(&esw->state_lock); + if (esw->qos.enabled) + goto unlock; + + MLX5_SET(scheduling_context, tsar_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); + + attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); + *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16); + + err = mlx5_create_scheduling_element_cmd(dev, + SCHEDULING_HIERARCHY_E_SWITCH, + tsar_ctx, + &esw->qos.root_tsar_ix); + if (err) { + esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err); + goto unlock; + } + + INIT_LIST_HEAD(&esw->qos.groups); + if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) { + esw->qos.group0 = esw_qos_create_rate_group(esw, NULL); + if (IS_ERR(esw->qos.group0)) { + esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n", + PTR_ERR(esw->qos.group0)); + goto err_group0; + } + } + esw->qos.enabled = true; +unlock: + mutex_unlock(&esw->state_lock); + return; + +err_group0: + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + esw->qos.root_tsar_ix); + if (err) + esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err); + mutex_unlock(&esw->state_lock); +} + +void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw) +{ + struct devlink *devlink = priv_to_devlink(esw->dev); + int err; + + devlink_rate_nodes_destroy(devlink); + mutex_lock(&esw->state_lock); + if (!esw->qos.enabled) + goto unlock; + + if (esw->qos.group0) + esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL); + + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + esw->qos.root_tsar_ix); + if (err) + esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err); + + esw->qos.enabled = false; +unlock: + mutex_unlock(&esw->state_lock); +} + +int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + u32 max_rate, u32 bw_share) +{ + int err; + + lockdep_assert_held(&esw->state_lock); + if (!esw->qos.enabled) + return 0; + + if (vport->qos.enabled) + return -EEXIST; + + vport->qos.group = esw->qos.group0; + + err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share); + if (!err) { + vport->qos.enabled = true; + trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate); + } + + return err; +} + +void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + int err; + + lockdep_assert_held(&esw->state_lock); + if (!esw->qos.enabled || !vport->qos.enabled) + return; + WARN(vport->qos.group && vport->qos.group != esw->qos.group0, + "Disabling QoS on port before detaching it from group"); + + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + vport->qos.esw_tsar_ix); + if (err) + esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n", + vport->vport, err); + + vport->qos.enabled = false; + trace_mlx5_esw_vport_qos_destroy(vport); +} + +int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps) +{ + u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_vport *vport; + u32 bitmask; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + if (!vport->qos.enabled) + return -EOPNOTSUPP; + + MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); + bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + + return mlx5_modify_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + ctx, + vport->qos.esw_tsar_ix, + bitmask); +} + +#define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */ + +/* Converts bytes per second value passed in a pointer into megabits per + * second, rewriting last. If converted rate exceed link speed or is not a + * fraction of Mbps - returns error. + */ +static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name, + u64 *rate, struct netlink_ext_ack *extack) +{ + u32 link_speed_max, reminder; + u64 value; + int err; + + err = mlx5e_port_max_linkspeed(mdev, &link_speed_max); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed"); + return err; + } + + value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &reminder); + if (reminder) { + pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n", + name, *rate); + NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps"); + return -EINVAL; + } + + if (value > link_speed_max) { + pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n", + name, value, link_speed_max); + NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed"); + return -EINVAL; + } + + *rate = value; + return 0; +} + +/* Eswitch devlink rate API */ + +int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_share, struct netlink_ext_ack *extack) +{ + struct mlx5_vport *vport = priv; + struct mlx5_eswitch *esw; + int err; + + esw = vport->dev->priv.eswitch; + if (!mlx5_esw_allowed(esw)) + return -EPERM; + + err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack); + if (err) + return err; + + mutex_lock(&esw->state_lock); + err = mlx5_esw_qos_set_vport_min_rate(esw, vport, tx_share, extack); + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_max, struct netlink_ext_ack *extack) +{ + struct mlx5_vport *vport = priv; + struct mlx5_eswitch *esw; + int err; + + esw = vport->dev->priv.eswitch; + if (!mlx5_esw_allowed(esw)) + return -EPERM; + + err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack); + if (err) + return err; + + mutex_lock(&esw->state_lock); + err = mlx5_esw_qos_set_vport_max_rate(esw, vport, tx_max, extack); + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, + u64 tx_share, struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5_esw_rate_group *group = priv; + int err; + + err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack); + if (err) + return err; + + mutex_lock(&esw->state_lock); + err = esw_qos_set_group_min_rate(esw, group, tx_share, extack); + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, + u64 tx_max, struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5_esw_rate_group *group = priv; + int err; + + err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack); + if (err) + return err; + + mutex_lock(&esw->state_lock); + err = esw_qos_set_group_max_rate(esw, group, tx_max, extack); + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_rate_group *group; + struct mlx5_eswitch *esw; + int err = 0; + + esw = mlx5_devlink_eswitch_get(rate_node->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + mutex_lock(&esw->state_lock); + if (esw->mode != MLX5_ESWITCH_OFFLOADS) { + NL_SET_ERR_MSG_MOD(extack, + "Rate node creation supported only in switchdev mode"); + err = -EOPNOTSUPP; + goto unlock; + } + + group = esw_qos_create_rate_group(esw, extack); + if (IS_ERR(group)) { + err = PTR_ERR(group); + goto unlock; + } + + *priv = group; +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_rate_group *group = priv; + struct mlx5_eswitch *esw; + int err; + + esw = mlx5_devlink_eswitch_get(rate_node->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + mutex_lock(&esw->state_lock); + err = esw_qos_destroy_rate_group(esw, group, extack); + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) +{ + int err; + + mutex_lock(&esw->state_lock); + err = esw_qos_vport_update_group(esw, vport, group, extack); + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_rate_group *group; + struct mlx5_vport *vport = priv; + + if (!parent) + return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, + vport, NULL, extack); + + group = parent_priv; + return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h new file mode 100644 index 000000000000..28451abe2d2f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_ESW_QOS_H__ +#define __MLX5_ESW_QOS_H__ + +#ifdef CONFIG_MLX5_ESWITCH + +int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, + struct mlx5_vport *evport, + u32 min_rate, + struct netlink_ext_ack *extack); +int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, + struct mlx5_vport *evport, + u32 max_rate, + struct netlink_ext_ack *extack); +void mlx5_esw_qos_create(struct mlx5_eswitch *esw); +void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw); +int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + u32 max_rate, u32 bw_share); +void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + +int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_share, struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_max, struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, + u64 tx_share, struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, + u64 tx_max, struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, + struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, + struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack); +#endif + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h deleted file mode 100644 index 2a3f4be10030..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h +++ /dev/null @@ -1,42 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ -/* Copyright (c) 2021 Mellanox Technologies. */ - -#ifndef __MLX5_EN_TC_SAMPLE_H__ -#define __MLX5_EN_TC_SAMPLE_H__ - -#include "en.h" -#include "eswitch.h" - -struct mlx5e_priv; -struct mlx5_flow_attr; -struct mlx5_esw_psample; - -struct mlx5_sample_attr { - u32 group_num; - u32 rate; - u32 trunc_size; - u32 restore_obj_id; - u32 sampler_id; - struct mlx5_flow_table *sample_default_tbl; - struct mlx5_sample_flow *sample_flow; -}; - -void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj); - -struct mlx5_flow_handle * -mlx5_esw_sample_offload(struct mlx5_esw_psample *sample_priv, - struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *attr); - -void -mlx5_esw_sample_unoffload(struct mlx5_esw_psample *sample_priv, - struct mlx5_flow_handle *rule, - struct mlx5_flow_attr *attr); - -struct mlx5_esw_psample * -mlx5_esw_sample_init(struct mlx5e_priv *priv); - -void -mlx5_esw_sample_cleanup(struct mlx5_esw_psample *esw_psample); - -#endif /* __MLX5_EN_TC_SAMPLE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 2fde9f59e8b4..ec136b499204 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -38,6 +38,7 @@ #include #include "esw/acl/lgcy.h" #include "esw/legacy.h" +#include "esw/qos.h" #include "mlx5_core.h" #include "lib/eq.h" #include "eswitch.h" @@ -740,201 +741,6 @@ static void esw_vport_change_handler(struct work_struct *work) mutex_unlock(&esw->state_lock); } -static bool element_type_supported(struct mlx5_eswitch *esw, int type) -{ - const struct mlx5_core_dev *dev = esw->dev; - - switch (type) { - case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: - return MLX5_CAP_QOS(dev, esw_element_type) & - ELEMENT_TYPE_CAP_MASK_TASR; - case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT: - return MLX5_CAP_QOS(dev, esw_element_type) & - ELEMENT_TYPE_CAP_MASK_VPORT; - case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC: - return MLX5_CAP_QOS(dev, esw_element_type) & - ELEMENT_TYPE_CAP_MASK_VPORT_TC; - case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC: - return MLX5_CAP_QOS(dev, esw_element_type) & - ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; - } - return false; -} - -/* Vport QoS management */ -static void esw_create_tsar(struct mlx5_eswitch *esw) -{ - u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; - struct mlx5_core_dev *dev = esw->dev; - __be32 *attr; - int err; - - if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) - return; - - if (!element_type_supported(esw, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR)) - return; - - if (esw->qos.enabled) - return; - - MLX5_SET(scheduling_context, tsar_ctx, element_type, - SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); - - attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); - *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16); - - err = mlx5_create_scheduling_element_cmd(dev, - SCHEDULING_HIERARCHY_E_SWITCH, - tsar_ctx, - &esw->qos.root_tsar_id); - if (err) { - esw_warn(esw->dev, "E-Switch create TSAR failed (%d)\n", err); - return; - } - - esw->qos.enabled = true; -} - -static void esw_destroy_tsar(struct mlx5_eswitch *esw) -{ - int err; - - if (!esw->qos.enabled) - return; - - err = mlx5_destroy_scheduling_element_cmd(esw->dev, - SCHEDULING_HIERARCHY_E_SWITCH, - esw->qos.root_tsar_id); - if (err) - esw_warn(esw->dev, "E-Switch destroy TSAR failed (%d)\n", err); - - esw->qos.enabled = false; -} - -static int esw_vport_enable_qos(struct mlx5_eswitch *esw, - struct mlx5_vport *vport, - u32 initial_max_rate, u32 initial_bw_share) -{ - u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; - struct mlx5_core_dev *dev = esw->dev; - void *vport_elem; - int err = 0; - - if (!esw->qos.enabled) - return 0; - - if (vport->qos.enabled) - return -EEXIST; - - MLX5_SET(scheduling_context, sched_ctx, element_type, - SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); - vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, - element_attributes); - MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); - MLX5_SET(scheduling_context, sched_ctx, parent_element_id, - esw->qos.root_tsar_id); - MLX5_SET(scheduling_context, sched_ctx, max_average_bw, - initial_max_rate); - MLX5_SET(scheduling_context, sched_ctx, bw_share, initial_bw_share); - - err = mlx5_create_scheduling_element_cmd(dev, - SCHEDULING_HIERARCHY_E_SWITCH, - sched_ctx, - &vport->qos.esw_tsar_ix); - if (err) { - esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n", - vport->vport, err); - return err; - } - - vport->qos.enabled = true; - return 0; -} - -static void esw_vport_disable_qos(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) -{ - int err; - - if (!vport->qos.enabled) - return; - - err = mlx5_destroy_scheduling_element_cmd(esw->dev, - SCHEDULING_HIERARCHY_E_SWITCH, - vport->qos.esw_tsar_ix); - if (err) - esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n", - vport->vport, err); - - vport->qos.enabled = false; -} - -static int esw_vport_qos_config(struct mlx5_eswitch *esw, - struct mlx5_vport *vport, - u32 max_rate, u32 bw_share) -{ - u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; - struct mlx5_core_dev *dev = esw->dev; - void *vport_elem; - u32 bitmask = 0; - int err = 0; - - if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) - return -EOPNOTSUPP; - - if (!vport->qos.enabled) - return -EIO; - - MLX5_SET(scheduling_context, sched_ctx, element_type, - SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); - vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, - element_attributes); - MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); - MLX5_SET(scheduling_context, sched_ctx, parent_element_id, - esw->qos.root_tsar_id); - MLX5_SET(scheduling_context, sched_ctx, max_average_bw, - max_rate); - MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); - bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; - bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; - - err = mlx5_modify_scheduling_element_cmd(dev, - SCHEDULING_HIERARCHY_E_SWITCH, - sched_ctx, - vport->qos.esw_tsar_ix, - bitmask); - if (err) { - esw_warn(esw->dev, "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n", - vport->vport, err); - return err; - } - - return 0; -} - -int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, - u32 rate_mbps) -{ - u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; - struct mlx5_vport *vport; - - vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) - return PTR_ERR(vport); - - if (!vport->qos.enabled) - return -EOPNOTSUPP; - - MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); - - return mlx5_modify_scheduling_element_cmd(esw->dev, - SCHEDULING_HIERARCHY_E_SWITCH, - ctx, - vport->qos.esw_tsar_ix, - MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW); -} - static void node_guid_gen_from_mac(u64 *node_guid, const u8 *mac) { ((u8 *)node_guid)[7] = mac[0]; @@ -976,7 +782,7 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) return err; /* Attach vport to the eswitch rate limiter */ - esw_vport_enable_qos(esw, vport, vport->qos.max_rate, vport->qos.bw_share); + mlx5_esw_qos_vport_enable(esw, vport, vport->qos.max_rate, vport->qos.bw_share); if (mlx5_esw_is_manager_vport(esw, vport_num)) return 0; @@ -1013,7 +819,7 @@ static void esw_vport_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport vport_num, 1, MLX5_VPORT_ADMIN_STATE_DOWN); - esw_vport_disable_qos(esw, vport); + mlx5_esw_qos_vport_disable(esw, vport); esw_vport_cleanup_acl(esw, vport); } @@ -1454,7 +1260,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs) mlx5_eswitch_update_num_of_vfs(esw, num_vfs); - esw_create_tsar(esw); + mlx5_esw_qos_create(esw); esw->mode = mode; @@ -1484,7 +1290,7 @@ abort: if (mode == MLX5_ESWITCH_OFFLOADS) mlx5_rescan_drivers(esw->dev); - esw_destroy_tsar(esw); + mlx5_esw_qos_destroy(esw); mlx5_esw_acls_ns_cleanup(esw); return err; } @@ -1553,7 +1359,7 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf) if (old_mode == MLX5_ESWITCH_OFFLOADS) mlx5_rescan_drivers(esw->dev); - esw_destroy_tsar(esw); + mlx5_esw_qos_destroy(esw); mlx5_esw_acls_ns_cleanup(esw); if (clear_vf) @@ -2050,110 +1856,6 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, return err; } -static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw) -{ - u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); - struct mlx5_vport *evport; - u32 max_guarantee = 0; - unsigned long i; - - mlx5_esw_for_each_vport(esw, i, evport) { - if (!evport->enabled || evport->qos.min_rate < max_guarantee) - continue; - max_guarantee = evport->qos.min_rate; - } - - if (max_guarantee) - return max_t(u32, max_guarantee / fw_max_bw_share, 1); - return 0; -} - -static int normalize_vports_min_rate(struct mlx5_eswitch *esw) -{ - u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); - u32 divider = calculate_vports_min_rate_divider(esw); - struct mlx5_vport *evport; - u32 vport_max_rate; - u32 vport_min_rate; - unsigned long i; - u32 bw_share; - int err; - - mlx5_esw_for_each_vport(esw, i, evport) { - if (!evport->enabled) - continue; - vport_min_rate = evport->qos.min_rate; - vport_max_rate = evport->qos.max_rate; - bw_share = 0; - - if (divider) - bw_share = MLX5_RATE_TO_BW_SHARE(vport_min_rate, - divider, - fw_max_bw_share); - - if (bw_share == evport->qos.bw_share) - continue; - - err = esw_vport_qos_config(esw, evport, vport_max_rate, - bw_share); - if (!err) - evport->qos.bw_share = bw_share; - else - return err; - } - - return 0; -} - -int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, - u32 max_rate, u32 min_rate) -{ - struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); - u32 fw_max_bw_share; - u32 previous_min_rate; - bool min_rate_supported; - bool max_rate_supported; - int err = 0; - - if (!mlx5_esw_allowed(esw)) - return -EPERM; - if (IS_ERR(evport)) - return PTR_ERR(evport); - - fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); - min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && - fw_max_bw_share >= MLX5_MIN_BW_SHARE; - max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); - - if ((min_rate && !min_rate_supported) || (max_rate && !max_rate_supported)) - return -EOPNOTSUPP; - - mutex_lock(&esw->state_lock); - - if (min_rate == evport->qos.min_rate) - goto set_max_rate; - - previous_min_rate = evport->qos.min_rate; - evport->qos.min_rate = min_rate; - err = normalize_vports_min_rate(esw); - if (err) { - evport->qos.min_rate = previous_min_rate; - goto unlock; - } - -set_max_rate: - if (max_rate == evport->qos.max_rate) - goto unlock; - - err = esw_vport_qos_config(esw, evport, max_rate, evport->qos.bw_share); - if (!err) - evport->qos.max_rate = max_rate; - -unlock: - mutex_unlock(&esw->state_lock); - return err; -} - int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, u16 vport_num, struct ifla_vf_stats *vf_stats) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index d3a5ff4f6140..2c7444101bb9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -46,7 +46,7 @@ #include "lib/fs_chains.h" #include "sf/sf.h" #include "en/tc_ct.h" -#include "esw/sample.h" +#include "en/tc/sample.h" enum mlx5_mapped_obj_type { MLX5_MAPPED_OBJ_CHAIN, @@ -61,6 +61,7 @@ struct mlx5_mapped_obj { u32 group_id; u32 rate; u32 trunc_size; + u32 tunnel_id; } sample; }; }; @@ -75,11 +76,6 @@ struct mlx5_mapped_obj { #define MLX5_MAX_MC_PER_VPORT(dev) \ (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list)) -#define MLX5_MIN_BW_SHARE 1 - -#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ - min_t(u32, max_t(u32, (rate) / (divider), MLX5_MIN_BW_SHARE), limit) - #define mlx5_esw_has_fwd_fdb(dev) \ MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table) @@ -181,6 +177,7 @@ struct mlx5_vport { u32 bw_share; u32 min_rate; u32 max_rate; + struct mlx5_esw_rate_group *group; } qos; u16 vport; @@ -309,7 +306,9 @@ struct mlx5_eswitch { struct { bool enabled; - u32 root_tsar_id; + u32 root_tsar_ix; + struct mlx5_esw_rate_group *group0; + struct list_head groups; /* Protected by esw->state_lock */ } qos; struct mlx5_esw_bridge_offloads *br_offloads; @@ -335,8 +334,7 @@ int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable); u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw); void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata); -int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, - u32 rate_mbps); +int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps); /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); @@ -359,6 +357,10 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, u16 vport_num, bool setting); int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, u32 max_rate, u32 min_rate); +int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack); int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting); int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting); int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, @@ -469,7 +471,6 @@ struct mlx5_esw_flow_attr { } dests[MLX5_MAX_FLOW_FWD_VPORTS]; struct mlx5_rx_tun_attr *rx_tun_attr; struct mlx5_pkt_reformat *decap_pkt_reformat; - struct mlx5_sample_attr *sample; }; int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 49c7bf94332c..0d461e38add3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -187,12 +187,12 @@ esw_cleanup_decap_indir(struct mlx5_eswitch *esw, static int esw_setup_sampler_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, - struct mlx5_esw_flow_attr *esw_attr, + struct mlx5_flow_attr *attr, int i) { flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER; - dest[i].sampler_id = esw_attr->sample->sampler_id; + dest[i].sampler_id = attr->sample_attr->sampler_id; return 0; } @@ -435,7 +435,7 @@ esw_setup_dests(struct mlx5_flow_destination *dest, attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE; if (attr->flags & MLX5_ESW_ATTR_FLAG_SAMPLE) { - esw_setup_sampler_dest(dest, flow_act, esw_attr, *i); + esw_setup_sampler_dest(dest, flow_act, attr, *i); (*i)++; } else if (attr->dest_ft) { esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i); @@ -540,10 +540,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) flow_act.modify_hdr = attr->modify_hdr; - /* esw_attr->sample is allocated only when there is a sample action */ - if (esw_attr->sample && esw_attr->sample->sample_default_tbl) { - fdb = esw_attr->sample->sample_default_tbl; - } else if (split) { + if (split) { fwd_attr.chain = attr->chain; fwd_attr.prio = attr->prio; fwd_attr.vport = esw_attr->in_rep->vport; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index fce3cbae0b99..f3638d09ba77 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -865,7 +865,8 @@ struct mlx5_ifc_qos_cap_bits { u8 nic_bw_share[0x1]; u8 nic_rate_limit[0x1]; u8 packet_pacing_uid[0x1]; - u8 reserved_at_c[0x14]; + u8 log_esw_max_sched_depth[0x4]; + u8 reserved_at_10[0x10]; u8 reserved_at_20[0xb]; u8 log_max_qos_nic_queue_group[0x5];