From 6cc070bdf07c8f6d5955d43da0560c9e5fd203b1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 3 May 2019 15:28:39 +0300 Subject: [PATCH 01/16] net/mlx5: potential error pointer dereference in error handling The error handling was a bit flipped around. If the mlx5_create_flow_group() function failed then it would have resulted in dereferencing "fg" when it was an error pointer. Fixes: 80f09dfc237f ("net/mlx5: Eswitch, enable RoCE loopback traffic") Signed-off-by: Dan Carpenter Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/rdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index 86f77456f873..401441aefbcb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -106,10 +106,10 @@ static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev) return 0; -destroy_flow_table: - mlx5_destroy_flow_table(ft); destroy_flow_group: mlx5_destroy_flow_group(fg); +destroy_flow_table: + mlx5_destroy_flow_table(ft); free: kvfree(spec); kvfree(flow_group_in); From 0b9055a112fd86c07b9d4857b61019485ec6526f Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 29 May 2019 22:50:24 +0000 Subject: [PATCH 02/16] net/mlx5: Add core dump register access HW bits Add Firmware core dump registers and HW definitions. Signed-off-by: Moshe Shemesh Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5a27246db883..b5431f7d97cb 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -107,6 +107,7 @@ enum { MLX5_REG_FPGA_CAP = 0x4022, MLX5_REG_FPGA_CTRL = 0x4023, MLX5_REG_FPGA_ACCESS_REG = 0x4024, + MLX5_REG_CORE_DUMP = 0x402e, MLX5_REG_PCAP = 0x5001, MLX5_REG_PMTU = 0x5003, MLX5_REG_PTYS = 0x5004, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5e74305e2e57..7ee422e38826 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -715,7 +715,9 @@ struct mlx5_ifc_qos_cap_bits { }; struct mlx5_ifc_debug_cap_bits { - u8 reserved_at_0[0x20]; + u8 core_dump_general[0x1]; + u8 core_dump_qp[0x1]; + u8 reserved_at_2[0x1e]; u8 reserved_at_20[0x2]; u8 stall_detect[0x1]; @@ -2531,6 +2533,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_e_switch_cap_bits e_switch_cap; struct mlx5_ifc_vector_calc_cap_bits vector_calc_cap; struct mlx5_ifc_qos_cap_bits qos_cap; + struct mlx5_ifc_debug_cap_bits debug_cap; struct mlx5_ifc_fpga_cap_bits fpga_cap; u8 reserved_at_0[0x8000]; }; @@ -8546,6 +8549,18 @@ struct mlx5_ifc_qcam_reg_bits { u8 reserved_at_1c0[0x80]; }; +struct mlx5_ifc_core_dump_reg_bits { + u8 reserved_at_0[0x18]; + u8 core_dump_type[0x8]; + + u8 reserved_at_20[0x30]; + u8 vhca_id[0x10]; + + u8 reserved_at_60[0x8]; + u8 qpn[0x18]; + u8 reserved_at_80[0x180]; +}; + struct mlx5_ifc_pcap_reg_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; From c6d4e45d3b44b71227588c2f76615380b3961f96 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Wed, 29 May 2019 22:50:29 +0000 Subject: [PATCH 03/16] net/mlx5: Introduce termination table bits Termination table is a flow table with a termination flag. The flag allows the firmware to assume that the the specified actions are the last actions list. This assumption allows the FW to safely perform potential looping logic (e.g. hairpin). Introduce the bits for this attribute. Signed-off-by: Eli Britstein Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 3 +++ include/linux/mlx5/fs.h | 1 + include/linux/mlx5/mlx5_ifc.h | 6 ++++-- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 013b1ca4a791..bb24c3797218 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -147,6 +147,7 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns, { int en_encap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT); int en_decap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + int term = !!(ft->flags & MLX5_FLOW_TABLE_TERMINATION); u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0}; u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {0}; struct mlx5_core_dev *dev = ns->dev; @@ -167,6 +168,8 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns, en_decap); MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en, en_encap); + MLX5_SET(create_flow_table_in, in, flow_table_context.termination_table, + term); switch (ft->op_mod) { case FS_FT_OP_MOD_NORMAL: diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index e690ba0f965c..2ddaa97f2179 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -47,6 +47,7 @@ enum { enum { MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT = BIT(0), MLX5_FLOW_TABLE_TUNNEL_EN_DECAP = BIT(1), + MLX5_FLOW_TABLE_TERMINATION = BIT(2), }; #define LEFTOVERS_RULE_NUM 2 diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 7ee422e38826..feaa909bf14f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -382,7 +382,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reformat_and_modify_action[0x1]; u8 reserved_at_15[0x2]; u8 table_miss_action_domain[0x1]; - u8 reserved_at_18[0x8]; + u8 termination_table[0x1]; + u8 reserved_at_19[0x7]; u8 reserved_at_20[0x2]; u8 log_max_ft_size[0x6]; u8 log_max_modify_header_context[0x8]; @@ -7239,7 +7240,8 @@ struct mlx5_ifc_create_flow_table_out_bits { struct mlx5_ifc_flow_table_context_bits { u8 reformat_en[0x1]; u8 decap_en[0x1]; - u8 reserved_at_2[0x2]; + u8 reserved_at_2[0x1]; + u8 termination_table[0x1]; u8 table_miss_action[0x4]; u8 level[0x8]; u8 reserved_at_10[0x8]; From cd56f929e6a547180f889a4def370bdd6d48d223 Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Wed, 29 May 2019 22:50:34 +0000 Subject: [PATCH 04/16] net/mlx5: E-Switch, Replace host_params event with functions_changed event To support sriov on a E-Switch manager, num_vfs are queried to the firmware whenever E-Switch manager is notified by esw_functions_changed event. Replace host_params event with esw_functions_changed event that reflects more appropriate naming. While at it, also correct num_vfs type from int to u16 as expected by the function mlx5_esw_query_functions(). Signed-off-by: Vu Pham Reviewed-by: Parav Pandit Reviewed-by: Bodong Wang Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 +- .../net/ethernet/mellanox/mlx5/core/ecpf.c | 27 -------- .../net/ethernet/mellanox/mlx5/core/ecpf.h | 4 -- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 3 +- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 32 ++++++++- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 6 +- .../mellanox/mlx5/core/eswitch_offloads.c | 69 +++++++++++-------- .../net/ethernet/mellanox/mlx5/core/events.c | 4 +- include/linux/mlx5/device.h | 2 +- include/linux/mlx5/mlx5_ifc.h | 6 +- 10 files changed, 86 insertions(+), 71 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 937ba4bcb056..7d3aec98e31f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -316,7 +316,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT: case MLX5_CMD_OP_DEALLOC_MEMIC: case MLX5_CMD_OP_PAGE_FAULT_RESUME: - case MLX5_CMD_OP_QUERY_HOST_PARAMS: + case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -628,7 +628,7 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT); MLX5_COMMAND_STR_CASE(ALLOC_MEMIC); MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC); - MLX5_COMMAND_STR_CASE(QUERY_HOST_PARAMS); + MLX5_COMMAND_STR_CASE(QUERY_ESW_FUNCTIONS); default: return "unknown command opcode"; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c index 4746f2d28fb6..1bcf8b8f9713 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -83,30 +83,3 @@ void mlx5_ec_cleanup(struct mlx5_core_dev *dev) mlx5_peer_pf_cleanup(dev); } - -static int mlx5_query_host_params_context(struct mlx5_core_dev *dev, - u32 *out, int outlen) -{ - u32 in[MLX5_ST_SZ_DW(query_host_params_in)] = {}; - - MLX5_SET(query_host_params_in, in, opcode, - MLX5_CMD_OP_QUERY_HOST_PARAMS); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); -} - -int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf) -{ - u32 out[MLX5_ST_SZ_DW(query_host_params_out)] = {}; - int err; - - err = mlx5_query_host_params_context(dev, out, sizeof(out)); - if (err) - return err; - - *num_vf = MLX5_GET(query_host_params_out, out, - host_params_context.host_num_of_vfs); - mlx5_core_dbg(dev, "host_num_of_vfs %d\n", *num_vf); - - return 0; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h index 346372df218f..d3d7a00a02ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h @@ -16,7 +16,6 @@ enum { bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev); int mlx5_ec_init(struct mlx5_core_dev *dev); void mlx5_ec_cleanup(struct mlx5_core_dev *dev); -int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf); #else /* CONFIG_MLX5_ESWITCH */ @@ -24,9 +23,6 @@ static inline bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; } static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {} -static inline int -mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf) -{ return -EOPNOTSUPP; } #endif /* CONFIG_MLX5_ESWITCH */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 23883d1fa22f..052bd70e4aa6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -534,7 +534,8 @@ static u64 gather_async_events_mask(struct mlx5_core_dev *dev) async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER); if (mlx5_core_is_ecpf_esw_manager(dev)) - async_event_mask |= (1ull << MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE); + async_event_mask |= + (1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED); return async_event_mask; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 9ea0ccfe5ef5..d8935232964a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1686,13 +1686,41 @@ static int eswitch_vport_event(struct notifier_block *nb, return NOTIFY_OK; } +static int query_esw_functions(struct mlx5_core_dev *dev, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {0}; + + MLX5_SET(query_esw_functions_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_FUNCTIONS); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + +int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u16 *num_vfs) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_functions_out)] = {0}; + int err; + + err = query_esw_functions(dev, out, sizeof(out)); + if (err) + return err; + + *num_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_num_of_vfs); + esw_debug(dev, "host_num_of_vfs=%d\n", *num_vfs); + + return 0; +} + /* Public E-Switch API */ #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { - int vf_nvports = 0, total_nvports = 0; struct mlx5_vport *vport; + int total_nvports = 0; + u16 vf_nvports = 0; int err; int i, enabled_events; @@ -1712,7 +1740,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) if (mode == SRIOV_OFFLOADS) { if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - err = mlx5_query_host_params_num_vfs(esw->dev, &vf_nvports); + err = mlx5_esw_query_functions(esw->dev, &vf_nvports); if (err) return err; total_nvports = esw->total_vports; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index ed3fad689ec9..320dd83dd301 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -190,7 +190,7 @@ struct mlx5_host_work { struct mlx5_eswitch *esw; }; -struct mlx5_host_info { +struct mlx5_esw_functions { struct mlx5_nb nb; u16 num_vfs; }; @@ -219,7 +219,7 @@ struct mlx5_eswitch { int mode; int nvports; u16 manager_vport; - struct mlx5_host_info host_info; + struct mlx5_esw_functions esw_funcs; }; void esw_offloads_cleanup(struct mlx5_eswitch *esw); @@ -386,6 +386,8 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1); +int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u16 *num_vfs); + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define esw_info(__dev, format, ...) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index e09ae27485ee..83689678b400 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -41,7 +41,6 @@ #include "en.h" #include "fs_core.h" #include "lib/devcom.h" -#include "ecpf.h" #include "lib/eq.h" /* There are two match-all miss flows, one for unicast dst mac and @@ -1782,57 +1781,79 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) esw_prio_tag_acls_cleanup(esw); } -static void esw_host_params_event_handler(struct work_struct *work) +static void esw_functions_changed_event_handler(struct work_struct *work) { struct mlx5_host_work *host_work; struct mlx5_eswitch *esw; - int err, num_vf = 0; + u16 num_vfs = 0; + int err; host_work = container_of(work, struct mlx5_host_work, work); esw = host_work->esw; - err = mlx5_query_host_params_num_vfs(esw->dev, &num_vf); - if (err || num_vf == esw->host_info.num_vfs) + err = mlx5_esw_query_functions(esw->dev, &num_vfs); + if (err || num_vfs == esw->esw_funcs.num_vfs) goto out; /* Number of VFs can only change from "0 to x" or "x to 0". */ - if (esw->host_info.num_vfs > 0) { - esw_offloads_unload_vf_reps(esw, esw->host_info.num_vfs); + if (esw->esw_funcs.num_vfs > 0) { + esw_offloads_unload_vf_reps(esw, esw->esw_funcs.num_vfs); } else { - err = esw_offloads_load_vf_reps(esw, num_vf); + err = esw_offloads_load_vf_reps(esw, num_vfs); if (err) goto out; } - esw->host_info.num_vfs = num_vf; + esw->esw_funcs.num_vfs = num_vfs; out: kfree(host_work); } -static int esw_host_params_event(struct notifier_block *nb, - unsigned long type, void *data) +static int esw_functions_changed_event(struct notifier_block *nb, + unsigned long type, void *data) { + struct mlx5_esw_functions *esw_funcs; struct mlx5_host_work *host_work; - struct mlx5_host_info *host_info; struct mlx5_eswitch *esw; host_work = kzalloc(sizeof(*host_work), GFP_ATOMIC); if (!host_work) return NOTIFY_DONE; - host_info = mlx5_nb_cof(nb, struct mlx5_host_info, nb); - esw = container_of(host_info, struct mlx5_eswitch, host_info); + esw_funcs = mlx5_nb_cof(nb, struct mlx5_esw_functions, nb); + esw = container_of(esw_funcs, struct mlx5_eswitch, esw_funcs); host_work->esw = esw; - INIT_WORK(&host_work->work, esw_host_params_event_handler); + INIT_WORK(&host_work->work, esw_functions_changed_event_handler); queue_work(esw->work_queue, &host_work->work); return NOTIFY_OK; } +static void esw_functions_changed_event_init(struct mlx5_eswitch *esw, + u16 vf_nvports) +{ + if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) + return; + + MLX5_NB_INIT(&esw->esw_funcs.nb, esw_functions_changed_event, + ESW_FUNCTIONS_CHANGED); + mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb); + esw->esw_funcs.num_vfs = vf_nvports; +} + +static void esw_functions_changed_event_cleanup(struct mlx5_eswitch *esw) +{ + if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) + return; + + mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb); + flush_workqueue(esw->work_queue); +} + int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, int total_nvports) { @@ -1848,12 +1869,7 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, esw_offloads_devcom_init(esw); - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - MLX5_NB_INIT(&esw->host_info.nb, esw_host_params_event, - HOST_PARAMS_CHANGE); - mlx5_eq_notifier_register(esw->dev, &esw->host_info.nb); - esw->host_info.num_vfs = vf_nvports; - } + esw_functions_changed_event_init(esw, vf_nvports); mlx5_rdma_enable_roce(esw->dev); @@ -1887,13 +1903,12 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw) { u16 num_vfs; - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - mlx5_eq_notifier_unregister(esw->dev, &esw->host_info.nb); - flush_workqueue(esw->work_queue); - num_vfs = esw->host_info.num_vfs; - } else { + esw_functions_changed_event_cleanup(esw); + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) + num_vfs = esw->esw_funcs.num_vfs; + else num_vfs = esw->dev->priv.sriov.num_vfs; - } mlx5_rdma_disable_roce(esw->dev); esw_offloads_devcom_cleanup(esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index a81e8d2168d8..8bcf3426b9c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -108,8 +108,8 @@ static const char *eqe_type_str(u8 type) return "MLX5_EVENT_TYPE_STALL_EVENT"; case MLX5_EVENT_TYPE_CMD: return "MLX5_EVENT_TYPE_CMD"; - case MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE: - return "MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE"; + case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED: + return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED"; case MLX5_EVENT_TYPE_PAGE_REQUEST: return "MLX5_EVENT_TYPE_PAGE_REQUEST"; case MLX5_EVENT_TYPE_PAGE_FAULT: diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index fc2b6e807f06..5e760067ac41 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -342,7 +342,7 @@ enum mlx5_event { MLX5_EVENT_TYPE_PAGE_FAULT = 0xc, MLX5_EVENT_TYPE_NIC_VPORT_CHANGE = 0xd, - MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE = 0xe, + MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED = 0xe, MLX5_EVENT_TYPE_DCT_DRAINED = 0x1c, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index feaa909bf14f..0780242a757a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -155,7 +155,7 @@ enum { MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY = 0x725, MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY = 0x726, MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS = 0x727, - MLX5_CMD_OP_QUERY_HOST_PARAMS = 0x740, + MLX5_CMD_OP_QUERY_ESW_FUNCTIONS = 0x740, MLX5_CMD_OP_QUERY_VPORT_STATE = 0x750, MLX5_CMD_OP_MODIFY_VPORT_STATE = 0x751, MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT = 0x752, @@ -9721,7 +9721,7 @@ struct mlx5_ifc_host_params_context_bits { u8 reserved_at_80[0x180]; }; -struct mlx5_ifc_query_host_params_in_bits { +struct mlx5_ifc_query_esw_functions_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; @@ -9731,7 +9731,7 @@ struct mlx5_ifc_query_host_params_in_bits { u8 reserved_at_40[0x40]; }; -struct mlx5_ifc_query_host_params_out_bits { +struct mlx5_ifc_query_esw_functions_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; From 6706a3b94f890145ca09797f748d2b30e1414fd3 Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Wed, 29 May 2019 22:50:37 +0000 Subject: [PATCH 05/16] net/mlx5: E-Switch, Honor eswitch functions changed event cap Whenever device supports eswitch functions changed event, honor such device setting. Do not limit it to ECPF. Signed-off-by: Parav Pandit Signed-off-by: Vu Pham Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 13 +++++++++++++ .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 6 +++--- include/linux/mlx5/mlx5_ifc.h | 4 +++- 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 052bd70e4aa6..5e9319d3d90c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -533,7 +533,7 @@ static u64 gather_async_events_mask(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER); - if (mlx5_core_is_ecpf_esw_manager(dev)) + if (mlx5_eswitch_is_funcs_handler(dev)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 320dd83dd301..b524813cccac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -406,6 +406,18 @@ static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) MLX5_VPORT_ECPF : MLX5_VPORT_PF; } +static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) +{ + /* Ideally device should have the functions changed supported + * capability regardless of it being ECPF or PF wherever such + * event should be processed such as on eswitch manager device. + * However, some ECPF based device might not have this capability + * set. Hence OR for ECPF check to cover such device. + */ + return MLX5_CAP_ESW(dev, esw_functions_changed) || + mlx5_core_is_ecpf_esw_manager(dev); +} + static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw) { /* Uplink always locate at the last element of the array.*/ @@ -500,6 +512,7 @@ static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; } static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {} static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; } +static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } #define FDB_MAX_CHAIN 1 #define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 83689678b400..05cb2fffd887 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1836,7 +1836,7 @@ static int esw_functions_changed_event(struct notifier_block *nb, static void esw_functions_changed_event_init(struct mlx5_eswitch *esw, u16 vf_nvports) { - if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) + if (!mlx5_eswitch_is_funcs_handler(esw->dev)) return; MLX5_NB_INIT(&esw->esw_funcs.nb, esw_functions_changed_event, @@ -1847,7 +1847,7 @@ static void esw_functions_changed_event_init(struct mlx5_eswitch *esw, static void esw_functions_changed_event_cleanup(struct mlx5_eswitch *esw) { - if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) + if (!mlx5_eswitch_is_funcs_handler(esw->dev)) return; mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb); @@ -1905,7 +1905,7 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw) esw_functions_changed_event_cleanup(esw); - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) + if (mlx5_eswitch_is_funcs_handler(esw->dev)) num_vfs = esw->esw_funcs.num_vfs; else num_vfs = esw->dev->priv.sriov.num_vfs; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 0780242a757a..6513b985c5e9 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -665,7 +665,9 @@ struct mlx5_ifc_e_switch_cap_bits { u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert_if_not_exist[0x1]; u8 vport_cvlan_insert_overwrite[0x1]; - u8 reserved_at_5[0x16]; + u8 reserved_at_5[0x14]; + u8 esw_functions_changed[0x1]; + u8 reserved_at_1a[0x1]; u8 ecpf_vport_exists[0x1]; u8 counter_eswitch_affinity[0x1]; u8 merged_eswitch[0x1]; From c94ff7487754dd23159a8dc47466c0cc82121ebd Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 29 May 2019 22:50:39 +0000 Subject: [PATCH 06/16] {IB, net}/mlx5: No need to typecast from void* to mlx5_ib_dev* Avoid typecasting from void* to mlx5_ib_dev* or mlx5e_rep_priv* as it is not needed. Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/ib_rep.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h index 1d9778da8a50..c995102b0276 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.h +++ b/drivers/infiniband/hw/mlx5/ib_rep.h @@ -72,6 +72,6 @@ struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, static inline struct mlx5_ib_dev *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep) { - return (struct mlx5_ib_dev *)rep->rep_if[REP_IB].priv; + return rep->rep_if[REP_IB].priv; } #endif /* __MLX5_IB_REP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 83b573b1abac..c40c025afd99 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -91,7 +91,7 @@ struct mlx5e_rep_priv { static inline struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep) { - return (struct mlx5e_rep_priv *)rep->rep_if[REP_ETH].priv; + return rep->rep_if[REP_ETH].priv; } struct mlx5e_neigh { From 8693115af4c24d92b971ad895c5f329761ed5d38 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 29 May 2019 22:50:41 +0000 Subject: [PATCH 07/16] {IB,net}/mlx5: Constify rep ops functions pointers Currently for every representor type and for every single vport, representer function pointers copy is stored even though they don't change from one to other vport. Additionally priv data entry for the rep is not passed during registration, but its copied. It is used (set and cleared) by the user of the reps. As we want to scale vports, to simplify and also to split constants from data, 1. Rename mlx5_eswitch_rep_if to mlx5_eswitch_rep_ops as to match _ops prefix with other standard netdev, ibdev ops. 2. Constify the IB and Ethernet rep ops structure. 3. Instead of storing copy of all rep function pointers, store copy per eswitch rep type. 4. Split data and function pointers to mlx5_eswitch_rep_ops and mlx5_eswitch_rep_data. Signed-off-by: Parav Pandit Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/ib_rep.c | 19 +++++----- drivers/infiniband/hw/mlx5/ib_rep.h | 2 +- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 15 ++++---- .../net/ethernet/mellanox/mlx5/core/en_rep.h | 2 +- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 1 + .../mellanox/mlx5/core/eswitch_offloads.c | 38 +++++++++---------- include/linux/mlx5/eswitch.h | 20 +++++----- 7 files changed, 49 insertions(+), 48 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index cbcc40d776b9..22e651cb5534 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -60,7 +60,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) if (!__mlx5_ib_add(ibdev, profile)) return -EINVAL; - rep->rep_if[REP_IB].priv = ibdev; + rep->rep_data[REP_IB].priv = ibdev; return 0; } @@ -70,13 +70,13 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) { struct mlx5_ib_dev *dev; - if (!rep->rep_if[REP_IB].priv || + if (!rep->rep_data[REP_IB].priv || rep->vport != MLX5_VPORT_UPLINK) return; dev = mlx5_ib_rep_to_dev(rep); __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); - rep->rep_if[REP_IB].priv = NULL; + rep->rep_data[REP_IB].priv = NULL; } static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep) @@ -84,16 +84,17 @@ static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep) return mlx5_ib_rep_to_dev(rep); } +static const struct mlx5_eswitch_rep_ops rep_ops = { + .load = mlx5_ib_vport_rep_load, + .unload = mlx5_ib_vport_rep_unload, + .get_proto_dev = mlx5_ib_vport_get_proto_dev, +}; + void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - struct mlx5_eswitch_rep_if rep_if = {}; - rep_if.load = mlx5_ib_vport_rep_load; - rep_if.unload = mlx5_ib_vport_rep_unload; - rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev; - - mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_IB); + mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB); } void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev) diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h index c995102b0276..22adce2d6795 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.h +++ b/drivers/infiniband/hw/mlx5/ib_rep.h @@ -72,6 +72,6 @@ struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, static inline struct mlx5_ib_dev *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep) { - return rep->rep_if[REP_IB].priv; + return rep->rep_data[REP_IB].priv; } #endif /* __MLX5_IB_REP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 91e24f1cead8..33f8f99681a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1752,7 +1752,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) } rpriv->netdev = netdev; - rep->rep_if[REP_ETH].priv = rpriv; + rep->rep_data[REP_ETH].priv = rpriv; INIT_LIST_HEAD(&rpriv->vport_sqs_list); if (rep->vport == MLX5_VPORT_UPLINK) { @@ -1826,16 +1826,17 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep) return rpriv->netdev; } +static const struct mlx5_eswitch_rep_ops rep_ops = { + .load = mlx5e_vport_rep_load, + .unload = mlx5e_vport_rep_unload, + .get_proto_dev = mlx5e_vport_rep_get_proto_dev +}; + void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - struct mlx5_eswitch_rep_if rep_if = {}; - rep_if.load = mlx5e_vport_rep_load; - rep_if.unload = mlx5e_vport_rep_unload; - rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; - - mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_ETH); + mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_ETH); } void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index c40c025afd99..e34573fd88c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -91,7 +91,7 @@ struct mlx5e_rep_priv { static inline struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep) { - return rep->rep_if[REP_ETH].priv; + return rep->rep_data[REP_ETH].priv; } struct mlx5e_neigh { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index b524813cccac..135d9a29bbdf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -173,6 +173,7 @@ struct mlx5_esw_offload { struct mutex peer_mutex; DECLARE_HASHTABLE(encap_tbl, 8); DECLARE_HASHTABLE(mod_hdr_tbl, 8); + const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES]; u8 inline_mode; u64 num_flows; u8 encap; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 05cb2fffd887..d6246ee042fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -332,7 +332,7 @@ static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none"); for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) { rep = &esw->offloads.vport_reps[vf_vport]; - if (atomic_read(&rep->rep_if[REP_ETH].state) != REP_LOADED) + if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED) continue; err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val); @@ -1276,7 +1276,7 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) ether_addr_copy(rep->hw_id, hw_id); for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) - atomic_set(&rep->rep_if[rep_type].state, + atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED); } @@ -1286,9 +1286,9 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u8 rep_type) { - if (atomic_cmpxchg(&rep->rep_if[rep_type].state, + if (atomic_cmpxchg(&rep->rep_data[rep_type].state, REP_LOADED, REP_REGISTERED) == REP_LOADED) - rep->rep_if[rep_type].unload(rep); + esw->offloads.rep_ops[rep_type]->unload(rep); } static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) @@ -1349,11 +1349,11 @@ static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, { int err = 0; - if (atomic_cmpxchg(&rep->rep_if[rep_type].state, + if (atomic_cmpxchg(&rep->rep_data[rep_type].state, REP_REGISTERED, REP_LOADED) == REP_REGISTERED) { - err = rep->rep_if[rep_type].load(esw->dev, rep); + err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep); if (err) - atomic_set(&rep->rep_if[rep_type].state, + atomic_set(&rep->rep_data[rep_type].state, REP_REGISTERED); } @@ -2216,21 +2216,17 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap) } void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep_if *__rep_if, + const struct mlx5_eswitch_rep_ops *ops, u8 rep_type) { - struct mlx5_eswitch_rep_if *rep_if; + struct mlx5_eswitch_rep_data *rep_data; struct mlx5_eswitch_rep *rep; int i; + esw->offloads.rep_ops[rep_type] = ops; mlx5_esw_for_all_reps(esw, i, rep) { - rep_if = &rep->rep_if[rep_type]; - rep_if->load = __rep_if->load; - rep_if->unload = __rep_if->unload; - rep_if->get_proto_dev = __rep_if->get_proto_dev; - rep_if->priv = __rep_if->priv; - - atomic_set(&rep_if->state, REP_REGISTERED); + rep_data = &rep->rep_data[rep_type]; + atomic_set(&rep_data->state, REP_REGISTERED); } } EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps); @@ -2245,7 +2241,7 @@ void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type) __unload_reps_all_vport(esw, max_vf, rep_type); mlx5_esw_for_all_reps(esw, i, rep) - atomic_set(&rep->rep_if[rep_type].state, REP_UNREGISTERED); + atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED); } EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps); @@ -2254,7 +2250,7 @@ void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) struct mlx5_eswitch_rep *rep; rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); - return rep->rep_if[rep_type].priv; + return rep->rep_data[rep_type].priv; } void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, @@ -2265,9 +2261,9 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, rep = mlx5_eswitch_get_rep(esw, vport); - if (atomic_read(&rep->rep_if[rep_type].state) == REP_LOADED && - rep->rep_if[rep_type].get_proto_dev) - return rep->rep_if[rep_type].get_proto_dev(rep); + if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED && + esw->offloads.rep_ops[rep_type]->get_proto_dev) + return esw->offloads.rep_ops[rep_type]->get_proto_dev(rep); return NULL; } EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev); diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 0ca77dd1429c..d81ee4df181c 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -29,17 +29,19 @@ enum { }; struct mlx5_eswitch_rep; -struct mlx5_eswitch_rep_if { - int (*load)(struct mlx5_core_dev *dev, - struct mlx5_eswitch_rep *rep); - void (*unload)(struct mlx5_eswitch_rep *rep); - void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep); - void *priv; - atomic_t state; +struct mlx5_eswitch_rep_ops { + int (*load)(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep); + void (*unload)(struct mlx5_eswitch_rep *rep); + void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep); +}; + +struct mlx5_eswitch_rep_data { + void *priv; + atomic_t state; }; struct mlx5_eswitch_rep { - struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES]; + struct mlx5_eswitch_rep_data rep_data[NUM_REP_TYPES]; u16 vport; u8 hw_id[ETH_ALEN]; u16 vlan; @@ -47,7 +49,7 @@ struct mlx5_eswitch_rep { }; void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep_if *rep_if, + const struct mlx5_eswitch_rep_ops *ops, u8 rep_type); void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type); void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, From 9b81d5a9945bc696c2e653757778e79495b164a1 Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Thu, 4 Apr 2019 18:27:33 +0300 Subject: [PATCH 08/16] net/mlx5e: Generalize vport type in vport representor Beside the special vports (PF/uplink/ecpf), the rest of the vports are similar. Remove vf_ prefix from function and variable names. This patch does not change any functionality. Signed-off-by: Vu Pham Reviewed-by: Parav Pandit Reviewed-by: Bodong Wang Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 52 ++++++++----------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 4f8dabe6b166..dde0021bd5ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -128,7 +128,7 @@ static void mlx5e_rep_get_strings(struct net_device *dev, } } -static void mlx5e_vf_rep_update_hw_counters(struct mlx5e_priv *priv) +static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -166,17 +166,6 @@ static void mlx5e_uplink_rep_update_hw_counters(struct mlx5e_priv *priv) vport_stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok); } -static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) -{ - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep = rpriv->rep; - - if (rep->vport == MLX5_VPORT_UPLINK) - mlx5e_uplink_rep_update_hw_counters(priv); - else - mlx5e_vf_rep_update_hw_counters(priv); -} - static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv) { struct mlx5e_sw_stats *s = &priv->stats.sw; @@ -203,7 +192,7 @@ static void mlx5e_rep_get_ethtool_stats(struct net_device *dev, mutex_lock(&priv->state_lock); mlx5e_rep_update_sw_counters(priv); - mlx5e_rep_update_hw_counters(priv); + priv->profile->update_stats(priv); mutex_unlock(&priv->state_lock); for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++) @@ -363,7 +352,7 @@ static int mlx5e_uplink_rep_set_link_ksettings(struct net_device *netdev, return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings); } -static const struct ethtool_ops mlx5e_vf_rep_ethtool_ops = { +static const struct ethtool_ops mlx5e_rep_ethtool_ops = { .get_drvinfo = mlx5e_rep_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = mlx5e_rep_get_strings, @@ -1101,7 +1090,7 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type); } -static int mlx5e_vf_rep_open(struct net_device *dev) +static int mlx5e_rep_open(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -1124,7 +1113,7 @@ unlock: return err; } -static int mlx5e_vf_rep_close(struct net_device *dev) +static int mlx5e_rep_close(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -1276,7 +1265,7 @@ static int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev } static void -mlx5e_vf_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) +mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -1285,7 +1274,7 @@ mlx5e_vf_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) memcpy(stats, &priv->stats.vf_vport, sizeof(*stats)); } -static int mlx5e_vf_rep_change_mtu(struct net_device *netdev, int new_mtu) +static int mlx5e_rep_change_mtu(struct net_device *netdev, int new_mtu) { return mlx5e_change_mtu(netdev, new_mtu, NULL); } @@ -1318,16 +1307,16 @@ static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan return 0; } -static const struct net_device_ops mlx5e_netdev_ops_vf_rep = { - .ndo_open = mlx5e_vf_rep_open, - .ndo_stop = mlx5e_vf_rep_close, +static const struct net_device_ops mlx5e_netdev_ops_rep = { + .ndo_open = mlx5e_rep_open, + .ndo_stop = mlx5e_rep_close, .ndo_start_xmit = mlx5e_xmit, .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, .ndo_setup_tc = mlx5e_rep_setup_tc, - .ndo_get_stats64 = mlx5e_vf_rep_get_stats, + .ndo_get_stats64 = mlx5e_rep_get_stats, .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, - .ndo_change_mtu = mlx5e_vf_rep_change_mtu, + .ndo_change_mtu = mlx5e_rep_change_mtu, .ndo_get_port_parent_id = mlx5e_rep_get_port_parent_id, }; @@ -1355,7 +1344,7 @@ static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { bool mlx5e_eswitch_rep(struct net_device *netdev) { - if (netdev->netdev_ops == &mlx5e_netdev_ops_vf_rep || + if (netdev->netdev_ops == &mlx5e_netdev_ops_rep || netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep) return true; @@ -1418,9 +1407,9 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) netdev->dcbnl_ops = &mlx5e_dcbnl_ops; #endif } else { - netdev->netdev_ops = &mlx5e_netdev_ops_vf_rep; + netdev->netdev_ops = &mlx5e_netdev_ops_rep; eth_hw_addr_random(netdev); - netdev->ethtool_ops = &mlx5e_vf_rep_ethtool_ops; + netdev->ethtool_ops = &mlx5e_rep_ethtool_ops; } netdev->watchdog_timeo = 15 * HZ; @@ -1640,7 +1629,7 @@ static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv) } } -static void mlx5e_vf_rep_enable(struct mlx5e_priv *priv) +static void mlx5e_rep_enable(struct mlx5e_priv *priv) { mlx5e_set_netdev_mtu_boundaries(priv); } @@ -1712,15 +1701,15 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) mlx5_lag_remove(mdev); } -static const struct mlx5e_profile mlx5e_vf_rep_profile = { +static const struct mlx5e_profile mlx5e_rep_profile = { .init = mlx5e_init_rep, .cleanup = mlx5e_cleanup_rep, .init_rx = mlx5e_init_rep_rx, .cleanup_rx = mlx5e_cleanup_rep_rx, .init_tx = mlx5e_init_rep_tx, .cleanup_tx = mlx5e_cleanup_rep_tx, - .enable = mlx5e_vf_rep_enable, - .update_stats = mlx5e_vf_rep_update_hw_counters, + .enable = mlx5e_rep_enable, + .update_stats = mlx5e_rep_update_hw_counters, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, .max_tc = 1, @@ -1759,7 +1748,8 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) rpriv->rep = rep; nch = mlx5e_get_max_num_channels(dev); - profile = (rep->vport == MLX5_VPORT_UPLINK) ? &mlx5e_uplink_rep_profile : &mlx5e_vf_rep_profile; + profile = (rep->vport == MLX5_VPORT_UPLINK) ? + &mlx5e_uplink_rep_profile : &mlx5e_rep_profile; netdev = mlx5e_create_netdev(dev, profile, nch, rpriv); if (!netdev) { pr_warn("Failed to create representor netdev for vport %d\n", From d1bda7eecd88e3d10e44541d675c07564f4f6938 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 6 May 2019 11:28:37 -0700 Subject: [PATCH 09/16] net/mlx5e: Allow matching only enc_key_id/enc_dst_port for decapsulation action In some case, we don't care the enc_src_ip and enc_dst_ip, and if we don't match the field enc_src_ip and enc_dst_ip, we can use fewer flows in hardware when revice the tunnel packets. For example, the tunnel packets may be sent from different hosts, we must offload one rule for each host. $ tc filter add dev vxlan0 protocol ip parent ffff: prio 1 \ flower dst_mac 00:11:22:33:44:00 \ enc_src_ip Host0_IP enc_dst_ip 2.2.2.100 \ enc_dst_port 4789 enc_key_id 100 \ action tunnel_key unset action mirred egress redirect dev eth0_1 $ tc filter add dev vxlan0 protocol ip parent ffff: prio 1 \ flower dst_mac 00:11:22:33:44:00 \ enc_src_ip Host1_IP enc_dst_ip 2.2.2.100 \ enc_dst_port 4789 enc_key_id 100 \ action tunnel_key unset action mirred egress redirect dev eth0_1 If we support flows which only match the enc_key_id and enc_dst_port, a flow can process the packets sent to VM which (mac 00:11:22:33:44:00). $ tc filter add dev vxlan0 protocol ip parent ffff: prio 1 \ flower dst_mac 00:11:22:33:44:00 \ enc_dst_port 4789 enc_key_id 100 \ action tunnel_key unset action mirred egress redirect dev eth0_1 Signed-off-by: Tonghao Zhang Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 27 +++++-------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 31cd02f11499..1c49b745b579 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1339,7 +1339,6 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); - struct flow_match_control enc_control; int err; err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, @@ -1350,9 +1349,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, return err; } - flow_rule_match_enc_control(rule, &enc_control); - - if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { struct flow_match_ipv4_addrs match; flow_rule_match_enc_ipv4_addrs(rule, &match); @@ -1372,7 +1369,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP); - } else if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + } else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { struct flow_match_ipv6_addrs match; flow_rule_match_enc_ipv6_addrs(rule, &match); @@ -1504,22 +1501,12 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - if ((flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) || - flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID) || - flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) && - flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { - struct flow_match_control match; - - flow_rule_match_enc_control(rule, &match); - switch (match.key->addr_type) { - case FLOW_DISSECTOR_KEY_IPV4_ADDRS: - case FLOW_DISSECTOR_KEY_IPV6_ADDRS: - if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level)) - return -EOPNOTSUPP; - break; - default: + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) { + if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level)) return -EOPNOTSUPP; - } /* In decap flow, header pointers should point to the inner * headers, outer header were already set by parse_tunnel_attr From d4a18e16c570fd84ef6cba9933803cf01a7b71e2 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Wed, 30 Jan 2019 15:52:35 +0200 Subject: [PATCH 10/16] net/mlx5e: Enable setting multiple match criteria for flow group When filling in flow spec match criteria, to allow previous modifications of the match criteria, use "|=" rather than "=". Tunnel options are parsed before the match criteria of the offloaded flow are being set. If the the flow that we're about to offload has encapsulation options, the flow group might need to match on additional criteria. For Geneve, an additional flow group matching parameter should be used - misc3. The appropriate bit in the match criteria is set while parsing the tunnel options, so the criteria value shouldn't be overwritten. This is a pre-step for supporting Geneve TLV options offload. Reviewed-by: Oz Shlomo Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 1c49b745b579..8e2d8e735faa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -799,7 +799,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, } if (attr->match_level != MLX5_MATCH_NONE) - parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + parse_attr->spec.match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec, &flow_act, dest, dest_ix); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d987bd06935d..a8c6683c3349 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -173,7 +173,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_eswitch_owner_vhca_id); - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { if (attr->tunnel_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; @@ -266,10 +266,10 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, source_eswitch_owner_vhca_id); if (attr->match_level == MLX5_MATCH_NONE) - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; else - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | - MLX5_MATCH_MISC_PARAMETERS; + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS | + MLX5_MATCH_MISC_PARAMETERS; rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i); From 0ccc171ea6a2fa34a6b898329c0a447c84e27057 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Wed, 30 Jan 2019 17:21:55 +0200 Subject: [PATCH 11/16] net/mlx5: Geneve, Manage Geneve TLV options Use Geneve TLV Options object to manage the flex parser matching on the 32-bit options data. When the first flow with a certain class/type values is requested to be offloaded, create a FW object with FW command (Geneve TLV Options general object) and start counting the number of flows using this object. During this time, any request with a different class/type values will fail to be offloaded. Once the refcount reaches 0, destroy the TLV options general object, and can now offload a flow with any class/type parameters. Geneve TLV Options object is added to core device. It is currently used to manage Geneve TLV options general object allocation in FW and its reference counting only. In the future it will also be used for managing geneve ports by registering callbacks for ndo_udp_tunnel_add/del. Reviewed-by: Oz Shlomo Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/Makefile | 3 +- .../ethernet/mellanox/mlx5/core/lib/geneve.c | 157 ++++++++++++++++++ .../ethernet/mellanox/mlx5/core/lib/geneve.h | 33 ++++ .../net/ethernet/mellanox/mlx5/core/main.c | 4 + include/linux/mlx5/driver.h | 2 + 5 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 243368dc23db..e31027277a6e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -31,7 +31,8 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o -mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \ + lib/geneve.o # # Core extra diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c new file mode 100644 index 000000000000..23361a9ae4fa --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include +#include "mlx5_core.h" +#include "geneve.h" + +struct mlx5_geneve { + struct mlx5_core_dev *mdev; + __be16 opt_class; + u8 opt_type; + u32 obj_id; + struct mutex sync_lock; /* protect GENEVE obj operations */ + u32 refcount; +}; + +static int mlx5_geneve_tlv_option_create(struct mlx5_core_dev *mdev, + __be16 class, + u8 type, + u8 len) +{ + u32 in[MLX5_ST_SZ_DW(create_geneve_tlv_option_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u64 general_obj_types; + void *hdr, *opt; + u16 obj_id; + int err; + + general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types); + if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT)) + return -EINVAL; + + hdr = MLX5_ADDR_OF(create_geneve_tlv_option_in, in, hdr); + opt = MLX5_ADDR_OF(create_geneve_tlv_option_in, in, geneve_tlv_opt); + + MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_GENEVE_TLV_OPT); + + MLX5_SET(geneve_tlv_option, opt, option_class, be16_to_cpu(class)); + MLX5_SET(geneve_tlv_option, opt, option_type, type); + MLX5_SET(geneve_tlv_option, opt, option_data_length, len); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + return obj_id; +} + +static void mlx5_geneve_tlv_option_destroy(struct mlx5_core_dev *mdev, u16 obj_id) +{ + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_GENEVE_TLV_OPT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt) +{ + int res = 0; + + if (IS_ERR_OR_NULL(geneve)) + return -EOPNOTSUPP; + + mutex_lock(&geneve->sync_lock); + + if (geneve->refcount) { + if (geneve->opt_class == opt->opt_class && + geneve->opt_type == opt->type) { + /* We already have TLV options obj allocated */ + geneve->refcount++; + } else { + /* TLV options obj allocated, but its params + * do not match the new request. + * We support only one such object. + */ + mlx5_core_warn(geneve->mdev, + "Won't create Geneve TLV opt object with class:type:len = 0x%x:0x%x:%d (another class:type already exists)\n", + be16_to_cpu(opt->opt_class), + opt->type, + opt->length); + res = -EOPNOTSUPP; + goto unlock; + } + } else { + /* We don't have any TLV options obj allocated */ + + res = mlx5_geneve_tlv_option_create(geneve->mdev, + opt->opt_class, + opt->type, + opt->length); + if (res < 0) { + mlx5_core_warn(geneve->mdev, + "Failed creating Geneve TLV opt object class:type:len = 0x%x:0x%x:%d (err=%d)\n", + be16_to_cpu(opt->opt_class), + opt->type, opt->length, res); + goto unlock; + } + geneve->opt_class = opt->opt_class; + geneve->opt_type = opt->type; + geneve->obj_id = res; + geneve->refcount++; + } + +unlock: + mutex_unlock(&geneve->sync_lock); + return res; +} + +void mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve) +{ + if (IS_ERR_OR_NULL(geneve)) + return; + + mutex_lock(&geneve->sync_lock); + if (--geneve->refcount == 0) { + /* We've just removed the last user of Geneve option. + * Now delete the object in FW. + */ + mlx5_geneve_tlv_option_destroy(geneve->mdev, geneve->obj_id); + + geneve->opt_class = 0; + geneve->opt_type = 0; + geneve->obj_id = 0; + } + mutex_unlock(&geneve->sync_lock); +} + +struct mlx5_geneve *mlx5_geneve_create(struct mlx5_core_dev *mdev) +{ + struct mlx5_geneve *geneve = + kzalloc(sizeof(*geneve), GFP_KERNEL); + + if (!geneve) + return ERR_PTR(-ENOMEM); + geneve->mdev = mdev; + mutex_init(&geneve->sync_lock); + + return geneve; +} + +void mlx5_geneve_destroy(struct mlx5_geneve *geneve) +{ + if (IS_ERR_OR_NULL(geneve)) + return; + + /* Lockless since we are unloading */ + if (geneve->refcount) + mlx5_geneve_tlv_option_destroy(geneve->mdev, geneve->obj_id); + + kfree(geneve); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h new file mode 100644 index 000000000000..adee0cbba19c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_GENEVE_H__ +#define __MLX5_GENEVE_H__ + +#include +#include + +struct mlx5_geneve; + +#ifdef CONFIG_MLX5_ESWITCH + +struct mlx5_geneve *mlx5_geneve_create(struct mlx5_core_dev *mdev); +void mlx5_geneve_destroy(struct mlx5_geneve *geneve); + +int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt); +void mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve); + +#else /* CONFIG_MLX5_ESWITCH */ + +static inline struct mlx5_geneve +*mlx5_geneve_create(struct mlx5_core_dev *mdev) { return NULL; } +static inline void +mlx5_geneve_destroy(struct mlx5_geneve *geneve) {} +static inline int +mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt) { return 0; } +static inline void +mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve) {} + +#endif /* CONFIG_MLX5_ESWITCH */ + +#endif /* __MLX5_GENEVE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 23d53163ce15..b27f9537256c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -63,6 +63,7 @@ #include "accel/tls.h" #include "lib/clock.h" #include "lib/vxlan.h" +#include "lib/geneve.h" #include "lib/devcom.h" #include "diag/fw_tracer.h" #include "ecpf.h" @@ -821,6 +822,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) mlx5_init_clock(dev); dev->vxlan = mlx5_vxlan_create(dev); + dev->geneve = mlx5_geneve_create(dev); err = mlx5_init_rl_table(dev); if (err) { @@ -865,6 +867,7 @@ err_mpfs_cleanup: err_rl_cleanup: mlx5_cleanup_rl_table(dev); err_tables_cleanup: + mlx5_geneve_destroy(dev->geneve); mlx5_vxlan_destroy(dev->vxlan); mlx5_cleanup_mkey_table(dev); mlx5_cleanup_qp_table(dev); @@ -887,6 +890,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_eswitch_cleanup(dev->priv.eswitch); mlx5_mpfs_cleanup(dev); mlx5_cleanup_rl_table(dev); + mlx5_geneve_destroy(dev->geneve); mlx5_vxlan_destroy(dev->vxlan); mlx5_cleanup_clock(dev); mlx5_cleanup_reserved_gids(dev); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index b5431f7d97cb..3a810bf043fe 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -647,6 +647,7 @@ struct mlx5_clock { struct mlx5_fw_tracer; struct mlx5_vxlan; +struct mlx5_geneve; struct mlx5_core_dev { struct device *device; @@ -681,6 +682,7 @@ struct mlx5_core_dev { u32 issi; struct mlx5e_resources mlx5e_res; struct mlx5_vxlan *vxlan; + struct mlx5_geneve *geneve; struct { struct mlx5_rsvd_gids reserved_gids; u32 roce_en; From 1f6da30697d0e102f5ed983a2a4942df9875195a Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Tue, 12 Feb 2019 13:31:00 +0200 Subject: [PATCH 12/16] net/mlx5e: Geneve, Keep tunnel info as pointer to the original struct In mlx5e encap entry structure, IP tunnel info data structure is copied by value. This approach worked till now, but it breaks when there are encapsulation options, such as in case of Geneve. These options are stored in the structure that is allocated adjacent to the IP tunnel info struct, and not pointed at by any field in that struct. Therefore, when copying the struct by value, we loose the address of the original struct and can't get to the encapsulation options. Fix the problem by storing the pointer to the tunnel info data instead. Reviewed-by: Oz Shlomo Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 15 ++++++++------- drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 14 +++++++------- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index fe5d4d7f15ed..2004d04c4c46 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -141,7 +141,8 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, return 0; } -static int mlx5e_gen_vxlan_header(char buf[], struct ip_tunnel_key *tun_key) +static int mlx5e_gen_vxlan_header(char buf[], + const struct ip_tunnel_key *tun_key) { __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); struct udphdr *udp = (struct udphdr *)(buf); @@ -155,7 +156,7 @@ static int mlx5e_gen_vxlan_header(char buf[], struct ip_tunnel_key *tun_key) return 0; } -static int mlx5e_gen_gre_header(char buf[], struct ip_tunnel_key *tun_key) +static int mlx5e_gen_gre_header(char buf[], const struct ip_tunnel_key *tun_key) { __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); int hdr_len; @@ -183,7 +184,7 @@ static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto, struct mlx5e_encap_entry *e) { int err = 0; - struct ip_tunnel_key *key = &e->tun_info.key; + const struct ip_tunnel_key *key = &e->tun_info->key; if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { *ip_proto = IPPROTO_UDP; @@ -229,7 +230,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); - struct ip_tunnel_key *tun_key = &e->tun_info.key; + const struct ip_tunnel_key *tun_key = &e->tun_info->key; struct net_device *out_dev, *route_dev; struct neighbour *n = NULL; struct flowi4 fl4 = {}; @@ -345,7 +346,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); - struct ip_tunnel_key *tun_key = &e->tun_info.key; + const struct ip_tunnel_key *tun_key = &e->tun_info->key; struct net_device *out_dev, *route_dev; struct neighbour *n = NULL; struct flowi6 fl6 = {}; @@ -489,7 +490,7 @@ int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, e->tunnel_type = mlx5e_tc_tun_get_type(tunnel_dev); if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { - int dst_port = be16_to_cpu(e->tun_info.key.tp_dst); + int dst_port = be16_to_cpu(e->tun_info->key.tp_dst); if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) { NL_SET_ERR_MSG_MOD(extack, @@ -503,7 +504,7 @@ int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, e->tunnel_hlen = VXLAN_HLEN; } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE; - e->tunnel_hlen = gre_calc_hlen(e->tun_info.key.tun_flags); + e->tunnel_hlen = gre_calc_hlen(e->tun_info->key.tun_flags); } else { e->reformat_type = -1; e->tunnel_hlen = -1; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index e34573fd88c1..5472bb4a0b51 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -150,7 +150,7 @@ struct mlx5e_encap_entry { struct hlist_node encap_hlist; struct list_head flows; u32 encap_id; - struct ip_tunnel_info tun_info; + const struct ip_tunnel_info *tun_info; unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ struct net_device *out_dev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 8e2d8e735faa..8b06c98cd436 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -126,7 +126,7 @@ struct mlx5e_tc_flow { }; struct mlx5e_tc_flow_parse_attr { - struct ip_tunnel_info tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; + const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; struct net_device *filter_dev; struct mlx5_flow_spec spec; int num_mod_hdr_actions; @@ -2568,7 +2568,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, } struct encap_key { - struct ip_tunnel_key *ip_tun_key; + const struct ip_tunnel_key *ip_tun_key; int tunnel_type; }; @@ -2612,7 +2612,7 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; - struct ip_tunnel_info *tun_info; + const struct ip_tunnel_info *tun_info; struct encap_key key, e_key; struct mlx5e_encap_entry *e; unsigned short family; @@ -2621,7 +2621,7 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, int err = 0; parse_attr = attr->parse_attr; - tun_info = &parse_attr->tun_info[out_index]; + tun_info = parse_attr->tun_info[out_index]; family = ip_tunnel_info_af(tun_info); key.ip_tun_key = &tun_info->key; key.tunnel_type = mlx5e_tc_tun_get_type(mirred_dev); @@ -2630,7 +2630,7 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, encap_hlist, hash_key) { - e_key.ip_tun_key = &e->tun_info.key; + e_key.ip_tun_key = &e->tun_info->key; e_key.tunnel_type = e->tunnel_type; if (!cmp_encap_info(&e_key, &key)) { found = true; @@ -2646,7 +2646,7 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, if (!e) return -ENOMEM; - e->tun_info = *tun_info; + e->tun_info = tun_info; err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); if (err) goto out_err; @@ -2885,7 +2885,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, } else if (encap) { parse_attr->mirred_ifindex[attr->out_count] = out_dev->ifindex; - parse_attr->tun_info[attr->out_count] = *info; + parse_attr->tun_info[attr->out_count] = info; encap = false; attr->dests[attr->out_count].flags |= MLX5_ESW_DEST_ENCAP; From d386939a327d2f1c9e918ee843087124eee0efda Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 14 Apr 2019 17:50:01 +0300 Subject: [PATCH 13/16] net/mlx5e: Rearrange tc tunnel code in a modular way Rearrange tc tunnel code so that it would be easy to add future tunnels: - Define tc tunnel object with the fields and callbacks that any tunnel must implement. - Define tc UDP tunnel object for UDP tunnels, such as VXLAN - Move each tunnel code (GRE, VXLAN) to its own separate file - Rewrite tc tunnel implementation in a general way - using only the objects and their callbacks. Reviewed-by: Oz Shlomo Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- .../ethernet/mellanox/mlx5/core/en/tc_tun.c | 313 ++++++------------ .../ethernet/mellanox/mlx5/core/en/tc_tun.h | 39 ++- .../mellanox/mlx5/core/en/tc_tun_gre.c | 95 ++++++ .../mellanox/mlx5/core/en/tc_tun_vxlan.c | 151 +++++++++ .../net/ethernet/mellanox/mlx5/core/en_rep.h | 3 +- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 10 +- 7 files changed, 389 insertions(+), 224 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index e31027277a6e..f1f222fc67e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -32,7 +32,7 @@ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \ - lib/geneve.o + lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o # # Core extra diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 2004d04c4c46..6f31b6876316 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -3,9 +3,19 @@ #include #include -#include "lib/vxlan.h" #include "en/tc_tun.h" +struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev) +{ + if (netif_is_vxlan(tunnel_dev)) + return &vxlan_tunnel; + else if (netif_is_gretap(tunnel_dev) || + netif_is_ip6gretap(tunnel_dev)) + return &gre_tunnel; + else + return NULL; +} + static int get_route_and_out_devs(struct mlx5e_priv *priv, struct net_device *dev, struct net_device **route_dev, @@ -141,64 +151,15 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, return 0; } -static int mlx5e_gen_vxlan_header(char buf[], - const struct ip_tunnel_key *tun_key) -{ - __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); - struct udphdr *udp = (struct udphdr *)(buf); - struct vxlanhdr *vxh = (struct vxlanhdr *) - ((char *)udp + sizeof(struct udphdr)); - - udp->dest = tun_key->tp_dst; - vxh->vx_flags = VXLAN_HF_VNI; - vxh->vx_vni = vxlan_vni_field(tun_id); - - return 0; -} - -static int mlx5e_gen_gre_header(char buf[], const struct ip_tunnel_key *tun_key) -{ - __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); - int hdr_len; - struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf); - - /* the HW does not calculate GRE csum or sequences */ - if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ)) - return -EOPNOTSUPP; - - greh->protocol = htons(ETH_P_TEB); - - /* GRE key */ - hdr_len = gre_calc_hlen(tun_key->tun_flags); - greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags); - if (tun_key->tun_flags & TUNNEL_KEY) { - __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); - - *ptr = tun_id; - } - - return 0; -} - static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto, struct mlx5e_encap_entry *e) { - int err = 0; - const struct ip_tunnel_key *key = &e->tun_info->key; - - if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { - *ip_proto = IPPROTO_UDP; - err = mlx5e_gen_vxlan_header(buf, key); - } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { - *ip_proto = IPPROTO_GRE; - err = mlx5e_gen_gre_header(buf, key); - } else { - pr_warn("mlx5: Cannot generate tunnel header for tunnel type (%d)\n" - , e->tunnel_type); - err = -EOPNOTSUPP; + if (!e->tunnel) { + pr_warn("mlx5: Cannot generate tunnel header for this tunnel\n"); + return -EOPNOTSUPP; } - return err; + return e->tunnel->generate_ip_tun_hdr(buf, ip_proto, e); } static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev, @@ -254,7 +215,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, ipv4_encap_size = (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + sizeof(struct iphdr) + - e->tunnel_hlen; + e->tunnel->calc_hlen(e); if (max_encap_size < ipv4_encap_size) { mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", @@ -370,7 +331,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, ipv6_encap_size = (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + sizeof(struct ipv6hdr) + - e->tunnel_hlen; + e->tunnel->calc_hlen(e); if (max_encap_size < ipv6_encap_size) { mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", @@ -456,27 +417,12 @@ out: return err; } -int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev) -{ - if (netif_is_vxlan(tunnel_dev)) - return MLX5E_TC_TUNNEL_TYPE_VXLAN; - else if (netif_is_gretap(tunnel_dev) || - netif_is_ip6gretap(tunnel_dev)) - return MLX5E_TC_TUNNEL_TYPE_GRETAP; - else - return MLX5E_TC_TUNNEL_TYPE_UNKNOWN; -} - bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, struct net_device *netdev) { - int tunnel_type = mlx5e_tc_tun_get_type(netdev); + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(netdev); - if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN && - MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) - return true; - else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP && - MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) + if (tunnel && tunnel->can_offload(priv)) return true; else return false; @@ -487,137 +433,14 @@ int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, struct mlx5e_encap_entry *e, struct netlink_ext_ack *extack) { - e->tunnel_type = mlx5e_tc_tun_get_type(tunnel_dev); + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev); - if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { - int dst_port = be16_to_cpu(e->tun_info->key.tp_dst); - - if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) { - NL_SET_ERR_MSG_MOD(extack, - "vxlan udp dport was not registered with the HW"); - netdev_warn(priv->netdev, - "%d isn't an offloaded vxlan udp dport\n", - dst_port); - return -EOPNOTSUPP; - } - e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; - e->tunnel_hlen = VXLAN_HLEN; - } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { - e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE; - e->tunnel_hlen = gre_calc_hlen(e->tun_info->key.tun_flags); - } else { + if (!tunnel) { e->reformat_type = -1; - e->tunnel_hlen = -1; - return -EOPNOTSUPP; - } - return 0; -} - -static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f, - void *headers_c, - void *headers_v) -{ - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); - struct netlink_ext_ack *extack = f->common.extack; - void *misc_c = MLX5_ADDR_OF(fte_match_param, - spec->match_criteria, - misc_parameters); - void *misc_v = MLX5_ADDR_OF(fte_match_param, - spec->match_value, - misc_parameters); - struct flow_match_ports enc_ports; - - flow_rule_match_enc_ports(rule, &enc_ports); - - /* Full udp dst port must be given */ - if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS) || - memchr_inv(&enc_ports.mask->dst, 0xff, sizeof(enc_ports.mask->dst))) { - NL_SET_ERR_MSG_MOD(extack, - "VXLAN decap filter must include enc_dst_port condition"); - netdev_warn(priv->netdev, - "VXLAN decap filter must include enc_dst_port condition\n"); return -EOPNOTSUPP; } - /* udp dst port must be knonwn as a VXLAN port */ - if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(enc_ports.key->dst))) { - NL_SET_ERR_MSG_MOD(extack, - "Matched UDP port is not registered as a VXLAN port"); - netdev_warn(priv->netdev, - "UDP port %d is not registered as a VXLAN port\n", - be16_to_cpu(enc_ports.key->dst)); - return -EOPNOTSUPP; - } - - /* dst UDP port is valid here */ - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, - ntohs(enc_ports.mask->dst)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, - ntohs(enc_ports.key->dst)); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, - ntohs(enc_ports.mask->src)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, - ntohs(enc_ports.key->src)); - - /* match on VNI */ - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_match_enc_keyid enc_keyid; - - flow_rule_match_enc_keyid(rule, &enc_keyid); - - MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, - be32_to_cpu(enc_keyid.mask->keyid)); - MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, - be32_to_cpu(enc_keyid.key->keyid)); - } - return 0; -} - -static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f, - void *outer_headers_c, - void *outer_headers_v) -{ - void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters); - void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); - - if (!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) { - NL_SET_ERR_MSG_MOD(f->common.extack, - "GRE HW offloading is not supported"); - netdev_warn(priv->netdev, "GRE HW offloading is not supported\n"); - return -EOPNOTSUPP; - } - - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, - ip_protocol, IPPROTO_GRE); - - /* gre protocol*/ - MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol); - MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB); - - /* gre key */ - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_match_enc_keyid enc_keyid; - - flow_rule_match_enc_keyid(rule, &enc_keyid); - MLX5_SET(fte_match_set_misc, misc_c, - gre_key.key, be32_to_cpu(enc_keyid.mask->keyid)); - MLX5_SET(fte_match_set_misc, misc_v, - gre_key.key, be32_to_cpu(enc_keyid.key->keyid)); - } - - return 0; + return tunnel->init_encap_attr(tunnel_dev, priv, e, extack); } int mlx5e_tc_tun_parse(struct net_device *filter_dev, @@ -627,25 +450,87 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, void *headers_c, void *headers_v, u8 *match_level) { - int tunnel_type; + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev); int err = 0; - tunnel_type = mlx5e_tc_tun_get_type(filter_dev); - if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { - *match_level = MLX5_MATCH_L4; - err = mlx5e_tc_tun_parse_vxlan(priv, spec, f, - headers_c, headers_v); - } else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { - *match_level = MLX5_MATCH_L3; - err = mlx5e_tc_tun_parse_gretap(priv, spec, f, - headers_c, headers_v); - } else { + if (!tunnel) { netdev_warn(priv->netdev, - "decapsulation offload is not supported for %s (kind: \"%s\")\n", - netdev_name(filter_dev), + "decapsulation offload is not supported for %s net device\n", mlx5e_netdev_kind(filter_dev)); - - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto out; } + + *match_level = tunnel->match_level; + + if (tunnel->parse_udp_ports) { + err = tunnel->parse_udp_ports(priv, spec, f, + headers_c, headers_v); + if (err) + goto out; + } + + if (tunnel->parse_tunnel) { + err = tunnel->parse_tunnel(priv, spec, f, + headers_c, headers_v); + if (err) + goto out; + } + +out: return err; } + +int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v) +{ + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_ports enc_ports; + + /* Full udp dst port must be given */ + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) { + NL_SET_ERR_MSG_MOD(extack, + "UDP tunnel decap filter must include enc_dst_port condition"); + netdev_warn(priv->netdev, + "UDP tunnel decap filter must include enc_dst_port condition\n"); + return -EOPNOTSUPP; + } + + flow_rule_match_enc_ports(rule, &enc_ports); + + if (memchr_inv(&enc_ports.mask->dst, 0xff, + sizeof(enc_ports.mask->dst))) { + NL_SET_ERR_MSG_MOD(extack, + "UDP tunnel decap filter must match enc_dst_port fully"); + netdev_warn(priv->netdev, + "UDP tunnel decap filter must match enc_dst_port fully\n"); + return -EOPNOTSUPP; + } + + /* match on UDP protocol and dst port number */ + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, + ntohs(enc_ports.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, + ntohs(enc_ports.key->dst)); + + /* UDP src port on outer header is generated by HW, + * so it is probably a bad idea to request matching it. + * Nonetheless, it is allowed. + */ + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, + ntohs(enc_ports.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, + ntohs(enc_ports.key->src)); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index b63f15de899d..ca1d3370b429 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -14,9 +14,39 @@ enum { MLX5E_TC_TUNNEL_TYPE_UNKNOWN, MLX5E_TC_TUNNEL_TYPE_VXLAN, - MLX5E_TC_TUNNEL_TYPE_GRETAP + MLX5E_TC_TUNNEL_TYPE_GRETAP, }; +struct mlx5e_tc_tunnel { + int tunnel_type; + enum mlx5_flow_match_level match_level; + + bool (*can_offload)(struct mlx5e_priv *priv); + int (*calc_hlen)(struct mlx5e_encap_entry *e); + int (*init_encap_attr)(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack); + int (*generate_ip_tun_hdr)(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *e); + int (*parse_udp_ports)(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v); + int (*parse_tunnel)(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v); +}; + +extern struct mlx5e_tc_tunnel vxlan_tunnel; +extern struct mlx5e_tc_tunnel gre_tunnel; + +struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev); + int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, struct mlx5e_priv *priv, struct mlx5e_encap_entry *e, @@ -30,7 +60,6 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, struct net_device *mirred_dev, struct mlx5e_encap_entry *e); -int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev); bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, struct net_device *netdev); @@ -41,4 +70,10 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, void *headers_c, void *headers_v, u8 *match_level); +int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v); + #endif //__MLX5_EN_TC_TUNNEL_H__ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c new file mode 100644 index 000000000000..06908441d932 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include +#include "en/tc_tun.h" + +static bool mlx5e_tc_tun_can_offload_gretap(struct mlx5e_priv *priv) +{ + return !!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap); +} + +static int mlx5e_tc_tun_calc_hlen_gretap(struct mlx5e_encap_entry *e) +{ + return gre_calc_hlen(e->tun_info->key.tun_flags); +} + +static int mlx5e_tc_tun_init_encap_attr_gretap(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + e->tunnel = &gre_tunnel; + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE; + return 0; +} + +static int mlx5e_gen_ip_tunnel_header_gretap(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *e) +{ + const struct ip_tunnel_key *tun_key = &e->tun_info->key; + struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf); + __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); + int hdr_len; + + *ip_proto = IPPROTO_GRE; + + /* the HW does not calculate GRE csum or sequences */ + if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ)) + return -EOPNOTSUPP; + + greh->protocol = htons(ETH_P_TEB); + + /* GRE key */ + hdr_len = mlx5e_tc_tun_calc_hlen_gretap(e); + greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags); + if (tun_key->tun_flags & TUNNEL_KEY) { + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); + *ptr = tun_id; + } + + return 0; +} + +static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v) +{ + void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_GRE); + + /* gre protocol */ + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol); + MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB); + + /* gre key */ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid enc_keyid; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + MLX5_SET(fte_match_set_misc, misc_c, + gre_key.key, be32_to_cpu(enc_keyid.mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, + gre_key.key, be32_to_cpu(enc_keyid.key->keyid)); + } + + return 0; +} + +struct mlx5e_tc_tunnel gre_tunnel = { + .tunnel_type = MLX5E_TC_TUNNEL_TYPE_GRETAP, + .match_level = MLX5_MATCH_L3, + .can_offload = mlx5e_tc_tun_can_offload_gretap, + .calc_hlen = mlx5e_tc_tun_calc_hlen_gretap, + .init_encap_attr = mlx5e_tc_tun_init_encap_attr_gretap, + .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_gretap, + .parse_udp_ports = NULL, + .parse_tunnel = mlx5e_tc_tun_parse_gretap, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c new file mode 100644 index 000000000000..2857b38527d6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include +#include "lib/vxlan.h" +#include "en/tc_tun.h" + +static bool mlx5e_tc_tun_can_offload_vxlan(struct mlx5e_priv *priv) +{ + return !!MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap); +} + +static int mlx5e_tc_tun_calc_hlen_vxlan(struct mlx5e_encap_entry *e) +{ + return VXLAN_HLEN; +} + +static int mlx5e_tc_tun_check_udp_dport_vxlan(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f) +{ + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_ports enc_ports; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) + return -EOPNOTSUPP; + + flow_rule_match_enc_ports(rule, &enc_ports); + + /* check the UDP destination port validity */ + + if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, + be16_to_cpu(enc_ports.key->dst))) { + NL_SET_ERR_MSG_MOD(extack, + "Matched UDP dst port is not registered as a VXLAN port"); + netdev_warn(priv->netdev, + "UDP port %d is not registered as a VXLAN port\n", + be16_to_cpu(enc_ports.key->dst)); + return -EOPNOTSUPP; + } + + return 0; +} + +static int mlx5e_tc_tun_parse_udp_ports_vxlan(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v) +{ + int err = 0; + + err = mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v); + if (err) + return err; + + return mlx5e_tc_tun_check_udp_dport_vxlan(priv, f); +} + +static int mlx5e_tc_tun_init_encap_attr_vxlan(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + int dst_port = be16_to_cpu(e->tun_info->key.tp_dst); + + e->tunnel = &vxlan_tunnel; + + if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) { + NL_SET_ERR_MSG_MOD(extack, + "vxlan udp dport was not registered with the HW"); + netdev_warn(priv->netdev, + "%d isn't an offloaded vxlan udp dport\n", + dst_port); + return -EOPNOTSUPP; + } + + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; + return 0; +} + +static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *e) +{ + const struct ip_tunnel_key *tun_key = &e->tun_info->key; + __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); + struct udphdr *udp = (struct udphdr *)(buf); + struct vxlanhdr *vxh; + + vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); + *ip_proto = IPPROTO_UDP; + + udp->dest = tun_key->tp_dst; + vxh->vx_flags = VXLAN_HF_VNI; + vxh->vx_vni = vxlan_vni_field(tun_id); + + return 0; +} + +static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v) +{ + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_enc_keyid enc_keyid; + void *misc_c, *misc_v; + + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) + return 0; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + + if (!enc_keyid.mask->keyid) + return 0; + + /* match on VNI is required */ + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.outer_vxlan_vni)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on VXLAN VNI is not supported"); + netdev_warn(priv->netdev, + "Matching on VXLAN VNI is not supported\n"); + return -EOPNOTSUPP; + } + + MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, + be32_to_cpu(enc_keyid.mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, + be32_to_cpu(enc_keyid.key->keyid)); + + return 0; +} + +struct mlx5e_tc_tunnel vxlan_tunnel = { + .tunnel_type = MLX5E_TC_TUNNEL_TYPE_VXLAN, + .match_level = MLX5_MATCH_L4, + .can_offload = mlx5e_tc_tun_can_offload_vxlan, + .calc_hlen = mlx5e_tc_tun_calc_hlen_vxlan, + .init_encap_attr = mlx5e_tc_tun_init_encap_attr_vxlan, + .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_vxlan, + .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_vxlan, + .parse_tunnel = mlx5e_tc_tun_parse_vxlan, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 5472bb4a0b51..d4585f3b8cb2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -155,8 +155,7 @@ struct mlx5e_encap_entry { struct net_device *out_dev; struct net_device *route_dev; - int tunnel_type; - int tunnel_hlen; + struct mlx5e_tc_tunnel *tunnel; int reformat_type; u8 flags; char *encap_header; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 8b06c98cd436..915f0abc21e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2569,20 +2569,20 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct encap_key { const struct ip_tunnel_key *ip_tun_key; - int tunnel_type; + struct mlx5e_tc_tunnel *tc_tunnel; }; static inline int cmp_encap_info(struct encap_key *a, struct encap_key *b) { return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) || - a->tunnel_type != b->tunnel_type; + a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type; } static inline int hash_encap_info(struct encap_key *key) { return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), - key->tunnel_type); + key->tc_tunnel->tunnel_type); } @@ -2624,14 +2624,14 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, tun_info = parse_attr->tun_info[out_index]; family = ip_tunnel_info_af(tun_info); key.ip_tun_key = &tun_info->key; - key.tunnel_type = mlx5e_tc_tun_get_type(mirred_dev); + key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev); hash_key = hash_encap_info(&key); hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, encap_hlist, hash_key) { e_key.ip_tun_key = &e->tun_info->key; - e_key.tunnel_type = e->tunnel_type; + e_key.tc_tunnel = e->tunnel; if (!cmp_encap_info(&e_key, &key)) { found = true; break; From 9272e3df3023aa53256cc53a9e4e62ee715575a7 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Thu, 4 Apr 2019 03:37:36 +0300 Subject: [PATCH 14/16] net/mlx5e: Geneve, Add support for encap/decap flows offload Add HW offloading support for flows with Geneve encap/decap. Notes about decap flows with Geneve TLV Options: - Support offloading of 32-bit options data only - At any given time, only one combination of class/type parameters can be offloaded, but the same class/type combination can have many different flows offloaded with different 32-bit option data - Options with value of 0 can't be offloaded Reviewed-by: Oz Shlomo Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/Makefile | 3 +- .../ethernet/mellanox/mlx5/core/en/tc_tun.c | 3 + .../ethernet/mellanox/mlx5/core/en/tc_tun.h | 2 + .../mellanox/mlx5/core/en/tc_tun_geneve.c | 335 ++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en_tc.c | 23 +- 5 files changed, 363 insertions(+), 3 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index f1f222fc67e2..0a34223e8867 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -32,7 +32,8 @@ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \ - lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o + lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o \ + en/tc_tun_geneve.o # # Core extra diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 6f31b6876316..b099968b2b7a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -3,12 +3,15 @@ #include #include +#include #include "en/tc_tun.h" struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev) { if (netif_is_vxlan(tunnel_dev)) return &vxlan_tunnel; + else if (netif_is_geneve(tunnel_dev)) + return &geneve_tunnel; else if (netif_is_gretap(tunnel_dev) || netif_is_ip6gretap(tunnel_dev)) return &gre_tunnel; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index ca1d3370b429..3c48f7e62505 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -14,6 +14,7 @@ enum { MLX5E_TC_TUNNEL_TYPE_UNKNOWN, MLX5E_TC_TUNNEL_TYPE_VXLAN, + MLX5E_TC_TUNNEL_TYPE_GENEVE, MLX5E_TC_TUNNEL_TYPE_GRETAP, }; @@ -43,6 +44,7 @@ struct mlx5e_tc_tunnel { }; extern struct mlx5e_tc_tunnel vxlan_tunnel; +extern struct mlx5e_tc_tunnel geneve_tunnel; extern struct mlx5e_tc_tunnel gre_tunnel; struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c new file mode 100644 index 000000000000..238ae85d07cc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include +#include "lib/geneve.h" +#include "en/tc_tun.h" + +#define MLX5E_GENEVE_VER 0 + +static bool mlx5e_tc_tun_can_offload_geneve(struct mlx5e_priv *priv) +{ + return !!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_GENEVE); +} + +static int mlx5e_tc_tun_calc_hlen_geneve(struct mlx5e_encap_entry *e) +{ + return sizeof(struct udphdr) + + sizeof(struct genevehdr) + + e->tun_info->options_len; +} + +static int mlx5e_tc_tun_check_udp_dport_geneve(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f) +{ + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_ports enc_ports; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) + return -EOPNOTSUPP; + + flow_rule_match_enc_ports(rule, &enc_ports); + + /* Currently we support only default GENEVE + * port, so udp dst port must match. + */ + if (be16_to_cpu(enc_ports.key->dst) != GENEVE_UDP_PORT) { + NL_SET_ERR_MSG_MOD(extack, + "Matched UDP dst port is not registered as a GENEVE port"); + netdev_warn(priv->netdev, + "UDP port %d is not registered as a GENEVE port\n", + be16_to_cpu(enc_ports.key->dst)); + return -EOPNOTSUPP; + } + + return 0; +} + +static int mlx5e_tc_tun_parse_udp_ports_geneve(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v) +{ + int err; + + err = mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v); + if (err) + return err; + + return mlx5e_tc_tun_check_udp_dport_geneve(priv, f); +} + +static int mlx5e_tc_tun_init_encap_attr_geneve(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + e->tunnel = &geneve_tunnel; + + /* Reformat type for GENEVE encap is similar to VXLAN: + * in both cases the HW adds in the same place a + * defined encapsulation header that the SW provides. + */ + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; + return 0; +} + +static void mlx5e_tunnel_id_to_vni(__be64 tun_id, __u8 *vni) +{ +#ifdef __BIG_ENDIAN + vni[0] = (__force __u8)(tun_id >> 16); + vni[1] = (__force __u8)(tun_id >> 8); + vni[2] = (__force __u8)tun_id; +#else + vni[0] = (__force __u8)((__force u64)tun_id >> 40); + vni[1] = (__force __u8)((__force u64)tun_id >> 48); + vni[2] = (__force __u8)((__force u64)tun_id >> 56); +#endif +} + +static int mlx5e_gen_ip_tunnel_header_geneve(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *e) +{ + const struct ip_tunnel_info *tun_info = e->tun_info; + struct udphdr *udp = (struct udphdr *)(buf); + struct genevehdr *geneveh; + + geneveh = (struct genevehdr *)((char *)udp + sizeof(struct udphdr)); + + *ip_proto = IPPROTO_UDP; + + udp->dest = tun_info->key.tp_dst; + + memset(geneveh, 0, sizeof(*geneveh)); + geneveh->ver = MLX5E_GENEVE_VER; + geneveh->opt_len = tun_info->options_len / 4; + geneveh->oam = !!(tun_info->key.tun_flags & TUNNEL_OAM); + geneveh->critical = !!(tun_info->key.tun_flags & TUNNEL_CRIT_OPT); + mlx5e_tunnel_id_to_vni(tun_info->key.tun_id, geneveh->vni); + geneveh->proto_type = htons(ETH_P_TEB); + + if (tun_info->key.tun_flags & TUNNEL_GENEVE_OPT) { + if (!geneveh->opt_len) + return -EOPNOTSUPP; + ip_tunnel_info_opts_get(geneveh->options, tun_info); + } + + return 0; +} + +static int mlx5e_tc_tun_parse_geneve_vni(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f) +{ + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_enc_keyid enc_keyid; + void *misc_c, *misc_v; + + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) + return 0; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + + if (!enc_keyid.mask->keyid) + return 0; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_geneve_vni)) { + NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE VNI is not supported"); + netdev_warn(priv->netdev, "Matching on GENEVE VNI is not supported\n"); + return -EOPNOTSUPP; + } + + MLX5_SET(fte_match_set_misc, misc_c, geneve_vni, be32_to_cpu(enc_keyid.mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, geneve_vni, be32_to_cpu(enc_keyid.key->keyid)); + + return 0; +} + +static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f) +{ + u8 max_tlv_option_data_len = MLX5_CAP_GEN(priv->mdev, max_geneve_tlv_option_data_len); + u8 max_tlv_options = MLX5_CAP_GEN(priv->mdev, max_geneve_tlv_options); + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + void *misc_c, *misc_v, *misc_3_c, *misc_3_v; + struct geneve_opt *option_key, *option_mask; + __be32 opt_data_key = 0, opt_data_mask = 0; + struct flow_match_enc_opts enc_opts; + int res = 0; + + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + misc_3_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_3); + misc_3_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_3); + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) + return 0; + + flow_rule_match_enc_opts(rule, &enc_opts); + + if (memchr_inv(&enc_opts.mask->data, 0, sizeof(enc_opts.mask->data)) && + !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.geneve_tlv_option_0_data)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options is not supported"); + netdev_warn(priv->netdev, + "Matching on GENEVE options is not supported\n"); + return -EOPNOTSUPP; + } + + /* make sure that we're talking about GENEVE options */ + + if (enc_opts.key->dst_opt_type != TUNNEL_GENEVE_OPT) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: option type is not GENEVE"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: option type is not GENEVE\n"); + return -EOPNOTSUPP; + } + + if (enc_opts.mask->len && + !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.outer_geneve_opt_len)) { + NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE options len is not supported"); + netdev_warn(priv->netdev, + "Matching on GENEVE options len is not supported\n"); + return -EOPNOTSUPP; + } + + /* max_geneve_tlv_option_data_len comes in multiples of 4 bytes, and it + * doesn't include the TLV option header. 'geneve_opt_len' is a total + * len of all the options, including the headers, also multiples of 4 + * bytes. Len that comes from the dissector is in bytes. + */ + + if ((enc_opts.key->len / 4) > ((max_tlv_option_data_len + 1) * max_tlv_options)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: unsupported options len"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: unsupported options len (len=%d)\n", + enc_opts.key->len); + return -EOPNOTSUPP; + } + + MLX5_SET(fte_match_set_misc, misc_c, geneve_opt_len, enc_opts.mask->len / 4); + MLX5_SET(fte_match_set_misc, misc_v, geneve_opt_len, enc_opts.key->len / 4); + + /* we support matching on one option only, so just get it */ + option_key = (struct geneve_opt *)&enc_opts.key->data[0]; + option_mask = (struct geneve_opt *)&enc_opts.mask->data[0]; + + if (option_key->length > max_tlv_option_data_len) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: unsupported option len"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: unsupported option len (key=%d, mask=%d)\n", + option_key->length, option_mask->length); + return -EOPNOTSUPP; + } + + /* data can't be all 0 - fail to offload such rule */ + if (!memchr_inv(option_key->opt_data, 0, option_key->length * 4)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: can't match on 0 data field"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: can't match on 0 data field\n"); + return -EOPNOTSUPP; + } + + /* add new GENEVE TLV options object */ + res = mlx5_geneve_tlv_option_add(priv->mdev->geneve, option_key); + if (res) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: failed creating TLV opt object"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: failed creating TLV opt object (class:type:len = 0x%x:0x%x:%d)\n", + be16_to_cpu(option_key->opt_class), + option_key->type, option_key->length); + return res; + } + + /* In general, after creating the object, need to query it + * in order to check which option data to set in misc3. + * But we support only geneve_tlv_option_0_data, so no + * point querying at this stage. + */ + + memcpy(&opt_data_key, option_key->opt_data, option_key->length * 4); + memcpy(&opt_data_mask, option_mask->opt_data, option_mask->length * 4); + MLX5_SET(fte_match_set_misc3, misc_3_v, + geneve_tlv_option_0_data, be32_to_cpu(opt_data_key)); + MLX5_SET(fte_match_set_misc3, misc_3_c, + geneve_tlv_option_0_data, be32_to_cpu(opt_data_mask)); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3; + + return 0; +} + +static int mlx5e_tc_tun_parse_geneve_params(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f) +{ + void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + struct netlink_ext_ack *extack = f->common.extack; + + /* match on OAM - packets with OAM bit on should NOT be offloaded */ + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_geneve_oam)) { + NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE OAM is not supported"); + netdev_warn(priv->netdev, "Matching on GENEVE OAM is not supported\n"); + return -EOPNOTSUPP; + } + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_oam); + MLX5_SET(fte_match_set_misc, misc_v, geneve_oam, 0); + + /* Match on GENEVE protocol. We support only Transparent Eth Bridge. */ + + if (MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.outer_geneve_protocol_type)) { + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_protocol_type); + MLX5_SET(fte_match_set_misc, misc_v, geneve_protocol_type, ETH_P_TEB); + } + + return 0; +} + +static int mlx5e_tc_tun_parse_geneve(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v) +{ + int err; + + err = mlx5e_tc_tun_parse_geneve_params(priv, spec, f); + if (err) + return err; + + err = mlx5e_tc_tun_parse_geneve_vni(priv, spec, f); + if (err) + return err; + + return mlx5e_tc_tun_parse_geneve_options(priv, spec, f); +} + +struct mlx5e_tc_tunnel geneve_tunnel = { + .tunnel_type = MLX5E_TC_TUNNEL_TYPE_GENEVE, + .match_level = MLX5_MATCH_L4, + .can_offload = mlx5e_tc_tun_can_offload_geneve, + .calc_hlen = mlx5e_tc_tun_calc_hlen_geneve, + .init_encap_attr = mlx5e_tc_tun_init_encap_attr_geneve, + .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_geneve, + .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_geneve, + .parse_tunnel = mlx5e_tc_tun_parse_geneve, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 915f0abc21e2..151e55c8c1ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -53,6 +53,7 @@ #include "en/port.h" #include "en/tc_tun.h" #include "lib/devcom.h" +#include "lib/geneve.h" struct mlx5_nic_flow_attr { u32 action; @@ -1063,6 +1064,19 @@ err_max_prio_chain: return err; } +static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) +{ + struct mlx5_flow_spec *spec = &flow->esw_attr->parse_attr->spec; + void *headers_v = MLX5_ADDR_OF(fte_match_param, + spec->match_value, + misc_parameters_3); + u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3, + headers_v, + geneve_tlv_option_0_data); + + return !!geneve_tlv_opt_0_data; +} + static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { @@ -1084,6 +1098,9 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); } + if (mlx5_flow_has_geneve_opt(flow)) + mlx5_geneve_tlv_option_del(priv->mdev->geneve); + mlx5_eswitch_del_vlan_action(esw, attr); for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) @@ -1494,7 +1511,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | BIT(FLOW_DISSECTOR_KEY_TCP) | BIT(FLOW_DISSECTOR_KEY_IP) | - BIT(FLOW_DISSECTOR_KEY_ENC_IP))) { + BIT(FLOW_DISSECTOR_KEY_ENC_IP) | + BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) { NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n", dissector->used_keys); @@ -1504,7 +1522,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) || flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) || flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID) || - flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) { + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) { if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level)) return -EOPNOTSUPP; From 10caabdaad5ace85577a453da97d1f8d3b944427 Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Thu, 18 Apr 2019 16:45:29 +0300 Subject: [PATCH 15/16] net/mlx5e: Use termination table for VLAN push actions HW does not support push VLAN action in the RX direction (packets arriving from the wire). The FW works around this limitation by haripining the packet. The hairpin workaround applies only when the push VLAN action is specified in a termination table, assuring that there are no actions following the haripin. Instantiate termination table for push VLAN actions. Re-use identical terminating tables for increased HW cache efficiency. Signed-off-by: Oz Shlomo Reviewed-by: Paul Blakey Reviewed-by: Eli Britstein Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/Makefile | 3 +- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 22 ++ .../mellanox/mlx5/core/eswitch_offloads.c | 15 +- .../mlx5/core/eswitch_offloads_termtbl.c | 277 ++++++++++++++++++ 4 files changed, 315 insertions(+), 2 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 0a34223e8867..d9d363fe5cf7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -38,7 +38,8 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tu # # Core extra # -mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o ecpf.o rdma.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \ + ecpf.o rdma.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 38022fed1550..849a628f6d17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -173,6 +173,8 @@ struct mlx5_esw_offload { struct mutex peer_mutex; DECLARE_HASHTABLE(encap_tbl, 8); DECLARE_HASHTABLE(mod_hdr_tbl, 8); + DECLARE_HASHTABLE(termtbl_tbl, 8); + struct mutex termtbl_mutex; /* protects termtbl hash */ const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES]; u8 inline_mode; u64 num_flows; @@ -269,6 +271,25 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule); struct mlx5_flow_spec; struct mlx5_esw_flow_attr; +struct mlx5_termtbl_handle; + +bool +mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_spec *spec); + +struct mlx5_flow_handle * +mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int num_dest); + +void +mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw, + struct mlx5_termtbl_handle *tt); struct mlx5_flow_handle * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, @@ -339,6 +360,7 @@ struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *rep; struct mlx5_core_dev *mdev; u32 encap_id; + struct mlx5_termtbl_handle *termtbl; } dests[MLX5_MAX_FLOW_FWD_VPORTS]; u32 mod_hdr_id; u8 match_level; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index a8c6683c3349..060de01f09b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -192,7 +192,11 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, goto err_esw_get; } - rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i); + if (mlx5_eswitch_termtbl_required(esw, &flow_act, spec)) + rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, attr, + &flow_act, dest, i); + else + rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i); if (IS_ERR(rule)) goto err_add_rule; else @@ -294,8 +298,16 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, bool fwd_rule) { bool split = (attr->split_count > 0); + int i; mlx5_del_flow_rules(rule); + + /* unref the term table */ + for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { + if (attr->dests[i].termtbl) + mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl); + } + esw->offloads.num_flows--; if (fwd_rule) { @@ -1870,6 +1882,7 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, goto err_reps; esw_offloads_devcom_init(esw); + mutex_init(&esw->offloads.termtbl_mutex); esw_functions_changed_event_init(esw, vf_nvports); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c new file mode 100644 index 000000000000..cb7d8ebe2c95 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -0,0 +1,277 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies. + +#include +#include "eswitch.h" + +struct mlx5_termtbl_handle { + struct hlist_node termtbl_hlist; + + struct mlx5_flow_table *termtbl; + struct mlx5_flow_act flow_act; + struct mlx5_flow_destination dest; + + struct mlx5_flow_handle *rule; + int ref_count; +}; + +static u32 +mlx5_eswitch_termtbl_hash(struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest) +{ + u32 hash; + + hash = jhash_1word(flow_act->action, 0); + hash = jhash((const void *)&flow_act->vlan, + sizeof(flow_act->vlan), hash); + hash = jhash((const void *)&dest->vport.num, + sizeof(dest->vport.num), hash); + hash = jhash((const void *)&dest->vport.vhca_id, + sizeof(dest->vport.num), hash); + return hash; +} + +static int +mlx5_eswitch_termtbl_cmp(struct mlx5_flow_act *flow_act1, + struct mlx5_flow_destination *dest1, + struct mlx5_flow_act *flow_act2, + struct mlx5_flow_destination *dest2) +{ + return flow_act1->action != flow_act2->action || + dest1->vport.num != dest2->vport.num || + dest1->vport.vhca_id != dest2->vport.vhca_id || + memcmp(&flow_act1->vlan, &flow_act2->vlan, + sizeof(flow_act1->vlan)); +} + +static int +mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, + struct mlx5_termtbl_handle *tt, + struct mlx5_flow_act *flow_act) +{ + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_spec spec = {}; + int prio, flags; + int err; + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + return -EOPNOTSUPP; + } + + /* As this is the terminating action then the termination table is the + * same prio as the slow path + */ + prio = FDB_SLOW_PATH; + flags = MLX5_FLOW_TABLE_TERMINATION; + tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, prio, 1, 1, + 0, flags); + if (IS_ERR(tt->termtbl)) { + esw_warn(dev, "Failed to create termination table\n"); + return -EOPNOTSUPP; + } + + tt->rule = mlx5_add_flow_rules(tt->termtbl, &spec, flow_act, + &tt->dest, 1); + + if (IS_ERR(tt->rule)) { + esw_warn(dev, "Failed to create termination table rule\n"); + goto add_flow_err; + } + return 0; + +add_flow_err: + err = mlx5_destroy_flow_table(tt->termtbl); + if (err) + esw_warn(dev, "Failed to destroy termination table\n"); + + return -EOPNOTSUPP; +} + +static struct mlx5_termtbl_handle * +mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest) +{ + struct mlx5_termtbl_handle *tt; + bool found = false; + u32 hash_key; + int err; + + mutex_lock(&esw->offloads.termtbl_mutex); + + hash_key = mlx5_eswitch_termtbl_hash(flow_act, dest); + hash_for_each_possible(esw->offloads.termtbl_tbl, tt, + termtbl_hlist, hash_key) { + if (!mlx5_eswitch_termtbl_cmp(&tt->flow_act, &tt->dest, + flow_act, dest)) { + found = true; + break; + } + } + if (found) + goto tt_add_ref; + + tt = kzalloc(sizeof(*tt), GFP_KERNEL); + if (!tt) { + err = -ENOMEM; + goto tt_create_err; + } + + tt->dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + tt->dest.vport.num = dest->vport.num; + tt->dest.vport.vhca_id = dest->vport.vhca_id; + memcpy(&tt->flow_act, flow_act, sizeof(*flow_act)); + + err = mlx5_eswitch_termtbl_create(esw->dev, tt, flow_act); + if (err) { + esw_warn(esw->dev, "Failed to create termination table\n"); + goto tt_create_err; + } + hash_add(esw->offloads.termtbl_tbl, &tt->termtbl_hlist, hash_key); +tt_add_ref: + tt->ref_count++; + mutex_unlock(&esw->offloads.termtbl_mutex); + return tt; +tt_create_err: + kfree(tt); + mutex_unlock(&esw->offloads.termtbl_mutex); + return ERR_PTR(err); +} + +void +mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw, + struct mlx5_termtbl_handle *tt) +{ + mutex_lock(&esw->offloads.termtbl_mutex); + if (--tt->ref_count == 0) + hash_del(&tt->termtbl_hlist); + mutex_unlock(&esw->offloads.termtbl_mutex); + + if (!tt->ref_count) { + mlx5_del_flow_rules(tt->rule); + mlx5_destroy_flow_table(tt->termtbl); + kfree(tt); + } +} + +static void +mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src, + struct mlx5_flow_act *dst) +{ + if (!(src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)) + return; + + src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + memcpy(&dst->vlan[0], &src->vlan[0], sizeof(src->vlan[0])); + memset(&src->vlan[0], 0, sizeof(src->vlan[0])); + + if (!(src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2)) + return; + + src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + memcpy(&dst->vlan[1], &src->vlan[1], sizeof(src->vlan[1])); + memset(&src->vlan[1], 0, sizeof(src->vlan[1])); +} + +bool +mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_spec *spec) +{ + u32 port_mask = MLX5_GET(fte_match_param, spec->match_criteria, + misc_parameters.source_port); + u32 port_value = MLX5_GET(fte_match_param, spec->match_value, + misc_parameters.source_port); + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table)) + return false; + + /* push vlan on RX */ + return (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) && + ((port_mask & port_value) == MLX5_VPORT_UPLINK); +} + +struct mlx5_flow_handle * +mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_table *fdb, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int num_dest) +{ + struct mlx5_flow_act term_tbl_act = {}; + struct mlx5_flow_handle *rule = NULL; + bool term_table_created = false; + int num_vport_dests = 0; + int i, curr_dest; + + mlx5_eswitch_termtbl_actions_move(flow_act, &term_tbl_act); + term_tbl_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + for (i = 0; i < num_dest; i++) { + struct mlx5_termtbl_handle *tt; + + /* only vport destinations can be terminated */ + if (dest[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT) + continue; + + /* get the terminating table for the action list */ + tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act, + &dest[i]); + if (IS_ERR(tt)) { + esw_warn(esw->dev, "Failed to create termination table\n"); + goto revert_changes; + } + attr->dests[num_vport_dests].termtbl = tt; + num_vport_dests++; + + /* link the destination with the termination table */ + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = tt->termtbl; + term_table_created = true; + } + + /* at least one destination should reference a termination table */ + if (!term_table_created) + goto revert_changes; + + /* create the FTE */ + rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest); + if (IS_ERR(rule)) + goto revert_changes; + + goto out; + +revert_changes: + /* revert the changes that were made to the original flow_act + * and fall-back to the original rule actions + */ + mlx5_eswitch_termtbl_actions_move(&term_tbl_act, flow_act); + + for (curr_dest = 0; curr_dest < num_vport_dests; curr_dest++) { + struct mlx5_termtbl_handle *tt = attr->dests[curr_dest].termtbl; + + /* search for the destination associated with the + * current term table + */ + for (i = 0; i < num_dest; i++) { + if (dest[i].ft != tt->termtbl) + continue; + + memset(&dest[i], 0, sizeof(dest[i])); + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest[i].vport.num = tt->dest.vport.num; + dest[i].vport.vhca_id = tt->dest.vport.vhca_id; + mlx5_eswitch_termtbl_put(esw, tt); + break; + } + } + rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest); +out: + return rule; +} From ca6c7df00a89206f142365091c689059fc0b67bf Mon Sep 17 00:00:00 2001 From: Erez Alfasi Date: Tue, 14 May 2019 13:55:22 +0300 Subject: [PATCH 16/16] net/mlx5e: TX, Improve performance under GSO workload __netdev_tx_sent_queue() was introduced by: commit 3e59020abf0f ("net: bql: add __netdev_tx_sent_queue()") BQL counters should be updated without flipping/caring about BQL status, if the current skb has xmit_more set. Using __netdev_tx_sent_queue() avoids messing with BQL stop flag, increases performance on GSO workload by keeping doorbells to the minimum required and also sparing atomic operations. Signed-off-by: Erez Alfasi Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 195a7d903cec..6fd6d5356246 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -301,6 +301,7 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more) { struct mlx5_wq_cyc *wq = &sq->wq; + bool send_doorbell; wi->num_bytes = num_bytes; wi->num_dma = num_dma; @@ -310,8 +311,6 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - netdev_tx_sent_queue(sq->txq, num_bytes); - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; @@ -321,7 +320,9 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, sq->stats->stopped++; } - if (!xmit_more || netif_xmit_stopped(sq->txq)) + send_doorbell = __netdev_tx_sent_queue(sq->txq, num_bytes, + xmit_more); + if (send_doorbell) mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg); }