Merge branch 'bnxt_en-Error-recovery-fixes'

Michael Chan says:

====================
bnxt_en: Error recovery fixes.

This patch series contains fixes mostly for the error recovery feature
and related areas.  Please queue the series for -stable also.  Thanks.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2019-12-10 17:37:14 -08:00
Родитель 868afbaca1 7e334fc800
Коммит ac397934b3
6 изменённых файлов: 146 добавлений и 60 удалений

Просмотреть файл

@ -2001,6 +2001,9 @@ static int bnxt_async_event_process(struct bnxt *bp,
case ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY: {
u32 data1 = le32_to_cpu(cmpl->event_data1);
if (!bp->fw_health)
goto async_event_process_exit;
bp->fw_reset_timestamp = jiffies;
bp->fw_reset_min_dsecs = cmpl->timestamp_lo;
if (!bp->fw_reset_min_dsecs)
@ -4421,8 +4424,9 @@ int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap, int bmap_size,
FUNC_DRV_RGTR_REQ_ENABLES_ASYNC_EVENT_FWD);
req.os_type = cpu_to_le16(FUNC_DRV_RGTR_REQ_OS_TYPE_LINUX);
flags = FUNC_DRV_RGTR_REQ_FLAGS_16BIT_VER_MODE |
FUNC_DRV_RGTR_REQ_FLAGS_HOT_RESET_SUPPORT;
flags = FUNC_DRV_RGTR_REQ_FLAGS_16BIT_VER_MODE;
if (bp->fw_cap & BNXT_FW_CAP_HOT_RESET)
flags |= FUNC_DRV_RGTR_REQ_FLAGS_HOT_RESET_SUPPORT;
if (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY)
flags |= FUNC_DRV_RGTR_REQ_FLAGS_ERROR_RECOVERY_SUPPORT |
FUNC_DRV_RGTR_REQ_FLAGS_MASTER_SUPPORT;
@ -7115,14 +7119,6 @@ static int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp)
rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
if (rc)
goto err_recovery_out;
if (!fw_health) {
fw_health = kzalloc(sizeof(*fw_health), GFP_KERNEL);
bp->fw_health = fw_health;
if (!fw_health) {
rc = -ENOMEM;
goto err_recovery_out;
}
}
fw_health->flags = le32_to_cpu(resp->flags);
if ((fw_health->flags & ERROR_RECOVERY_QCFG_RESP_FLAGS_CO_CPU) &&
!(bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL)) {
@ -8796,6 +8792,9 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
if (fw_reset) {
if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
bnxt_ulp_stop(bp);
bnxt_free_ctx_mem(bp);
kfree(bp->ctx);
bp->ctx = NULL;
rc = bnxt_fw_init_one(bp);
if (rc) {
set_bit(BNXT_STATE_ABORT_ERR, &bp->state);
@ -9990,8 +9989,7 @@ static void bnxt_fw_health_check(struct bnxt *bp)
struct bnxt_fw_health *fw_health = bp->fw_health;
u32 val;
if (!fw_health || !fw_health->enabled ||
test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
if (!fw_health->enabled || test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
return;
if (fw_health->tmr_counter) {
@ -10482,6 +10480,23 @@ static void bnxt_init_dflt_coal(struct bnxt *bp)
bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS;
}
static void bnxt_alloc_fw_health(struct bnxt *bp)
{
if (bp->fw_health)
return;
if (!(bp->fw_cap & BNXT_FW_CAP_HOT_RESET) &&
!(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY))
return;
bp->fw_health = kzalloc(sizeof(*bp->fw_health), GFP_KERNEL);
if (!bp->fw_health) {
netdev_warn(bp->dev, "Failed to allocate fw_health\n");
bp->fw_cap &= ~BNXT_FW_CAP_HOT_RESET;
bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY;
}
}
static int bnxt_fw_init_one_p1(struct bnxt *bp)
{
int rc;
@ -10528,6 +10543,7 @@ static int bnxt_fw_init_one_p2(struct bnxt *bp)
netdev_warn(bp->dev, "hwrm query adv flow mgnt failure rc: %d\n",
rc);
bnxt_alloc_fw_health(bp);
rc = bnxt_hwrm_error_recovery_qcfg(bp);
if (rc)
netdev_warn(bp->dev, "hwrm query error recovery failure rc: %d\n",
@ -10609,6 +10625,12 @@ static int bnxt_fw_init_one(struct bnxt *bp)
rc = bnxt_approve_mac(bp, bp->dev->dev_addr, false);
if (rc)
return rc;
/* In case fw capabilities have changed, destroy the unneeded
* reporters and create newly capable ones.
*/
bnxt_dl_fw_reporters_destroy(bp, false);
bnxt_dl_fw_reporters_create(bp);
bnxt_fw_init_one_p3(bp);
return 0;
}
@ -10751,8 +10773,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
bnxt_queue_fw_reset_work(bp, bp->fw_reset_min_dsecs * HZ / 10);
return;
case BNXT_FW_RESET_STATE_ENABLE_DEV:
if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state) &&
bp->fw_health) {
if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) {
u32 val;
val = bnxt_fw_health_readl(bp,
@ -11396,11 +11417,11 @@ static void bnxt_remove_one(struct pci_dev *pdev)
struct net_device *dev = pci_get_drvdata(pdev);
struct bnxt *bp = netdev_priv(dev);
if (BNXT_PF(bp)) {
if (BNXT_PF(bp))
bnxt_sriov_disable(bp);
bnxt_dl_unregister(bp);
}
bnxt_dl_fw_reporters_destroy(bp, true);
bnxt_dl_unregister(bp);
pci_disable_pcie_error_reporting(pdev);
unregister_netdev(dev);
bnxt_shutdown_tc(bp);
@ -11415,6 +11436,8 @@ static void bnxt_remove_one(struct pci_dev *pdev)
bnxt_dcb_free(bp);
kfree(bp->edev);
bp->edev = NULL;
kfree(bp->fw_health);
bp->fw_health = NULL;
bnxt_cleanup_pci(bp);
bnxt_free_ctx_mem(bp);
kfree(bp->ctx);
@ -11875,8 +11898,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (rc)
goto init_err_cleanup_tc;
if (BNXT_PF(bp))
bnxt_dl_register(bp);
bnxt_dl_register(bp);
bnxt_dl_fw_reporters_create(bp);
netdev_info(dev, "%s found at mem %lx, node addr %pM\n",
board_info[ent->driver_data].name,

Просмотреть файл

@ -39,11 +39,10 @@ static int bnxt_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
struct netlink_ext_ack *extack)
{
struct bnxt *bp = devlink_health_reporter_priv(reporter);
struct bnxt_fw_health *health = bp->fw_health;
u32 val, health_status;
int rc;
if (!health || test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
return 0;
val = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
@ -126,21 +125,15 @@ struct devlink_health_reporter_ops bnxt_dl_fw_fatal_reporter_ops = {
.recover = bnxt_fw_fatal_recover,
};
static void bnxt_dl_fw_reporters_create(struct bnxt *bp)
void bnxt_dl_fw_reporters_create(struct bnxt *bp)
{
struct bnxt_fw_health *health = bp->fw_health;
if (!health)
if (!bp->dl || !health)
return;
health->fw_reporter =
devlink_health_reporter_create(bp->dl, &bnxt_dl_fw_reporter_ops,
0, false, bp);
if (IS_ERR(health->fw_reporter)) {
netdev_warn(bp->dev, "Failed to create FW health reporter, rc = %ld\n",
PTR_ERR(health->fw_reporter));
health->fw_reporter = NULL;
}
if (!(bp->fw_cap & BNXT_FW_CAP_HOT_RESET) || health->fw_reset_reporter)
goto err_recovery;
health->fw_reset_reporter =
devlink_health_reporter_create(bp->dl,
@ -150,8 +143,30 @@ static void bnxt_dl_fw_reporters_create(struct bnxt *bp)
netdev_warn(bp->dev, "Failed to create FW fatal health reporter, rc = %ld\n",
PTR_ERR(health->fw_reset_reporter));
health->fw_reset_reporter = NULL;
bp->fw_cap &= ~BNXT_FW_CAP_HOT_RESET;
}
err_recovery:
if (!(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY))
return;
if (!health->fw_reporter) {
health->fw_reporter =
devlink_health_reporter_create(bp->dl,
&bnxt_dl_fw_reporter_ops,
0, false, bp);
if (IS_ERR(health->fw_reporter)) {
netdev_warn(bp->dev, "Failed to create FW health reporter, rc = %ld\n",
PTR_ERR(health->fw_reporter));
health->fw_reporter = NULL;
bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY;
return;
}
}
if (health->fw_fatal_reporter)
return;
health->fw_fatal_reporter =
devlink_health_reporter_create(bp->dl,
&bnxt_dl_fw_fatal_reporter_ops,
@ -160,24 +175,35 @@ static void bnxt_dl_fw_reporters_create(struct bnxt *bp)
netdev_warn(bp->dev, "Failed to create FW fatal health reporter, rc = %ld\n",
PTR_ERR(health->fw_fatal_reporter));
health->fw_fatal_reporter = NULL;
bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY;
}
}
static void bnxt_dl_fw_reporters_destroy(struct bnxt *bp)
void bnxt_dl_fw_reporters_destroy(struct bnxt *bp, bool all)
{
struct bnxt_fw_health *health = bp->fw_health;
if (!health)
if (!bp->dl || !health)
return;
if (health->fw_reporter)
devlink_health_reporter_destroy(health->fw_reporter);
if (health->fw_reset_reporter)
if ((all || !(bp->fw_cap & BNXT_FW_CAP_HOT_RESET)) &&
health->fw_reset_reporter) {
devlink_health_reporter_destroy(health->fw_reset_reporter);
health->fw_reset_reporter = NULL;
}
if (health->fw_fatal_reporter)
if ((bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY) && !all)
return;
if (health->fw_reporter) {
devlink_health_reporter_destroy(health->fw_reporter);
health->fw_reporter = NULL;
}
if (health->fw_fatal_reporter) {
devlink_health_reporter_destroy(health->fw_fatal_reporter);
health->fw_fatal_reporter = NULL;
}
}
void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event)
@ -185,9 +211,6 @@ void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event)
struct bnxt_fw_health *fw_health = bp->fw_health;
struct bnxt_fw_reporter_ctx fw_reporter_ctx;
if (!fw_health)
return;
fw_reporter_ctx.sp_event = event;
switch (event) {
case BNXT_FW_RESET_NOTIFY_SP_EVENT:
@ -247,6 +270,8 @@ static const struct devlink_ops bnxt_dl_ops = {
.flash_update = bnxt_dl_flash_update,
};
static const struct devlink_ops bnxt_vf_dl_ops;
enum bnxt_dl_param_id {
BNXT_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK,
@ -460,7 +485,10 @@ int bnxt_dl_register(struct bnxt *bp)
return -ENOTSUPP;
}
dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl));
if (BNXT_PF(bp))
dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl));
else
dl = devlink_alloc(&bnxt_vf_dl_ops, sizeof(struct bnxt_dl));
if (!dl) {
netdev_warn(bp->dev, "devlink_alloc failed");
return -ENOMEM;
@ -479,6 +507,9 @@ int bnxt_dl_register(struct bnxt *bp)
goto err_dl_free;
}
if (!BNXT_PF(bp))
return 0;
rc = devlink_params_register(dl, bnxt_dl_params,
ARRAY_SIZE(bnxt_dl_params));
if (rc) {
@ -506,8 +537,6 @@ int bnxt_dl_register(struct bnxt *bp)
devlink_params_publish(dl);
bnxt_dl_fw_reporters_create(bp);
return 0;
err_dl_port_unreg:
@ -530,12 +559,14 @@ void bnxt_dl_unregister(struct bnxt *bp)
if (!dl)
return;
bnxt_dl_fw_reporters_destroy(bp);
devlink_port_params_unregister(&bp->dl_port, bnxt_dl_port_params,
ARRAY_SIZE(bnxt_dl_port_params));
devlink_port_unregister(&bp->dl_port);
devlink_params_unregister(dl, bnxt_dl_params,
ARRAY_SIZE(bnxt_dl_params));
if (BNXT_PF(bp)) {
devlink_port_params_unregister(&bp->dl_port,
bnxt_dl_port_params,
ARRAY_SIZE(bnxt_dl_port_params));
devlink_port_unregister(&bp->dl_port);
devlink_params_unregister(dl, bnxt_dl_params,
ARRAY_SIZE(bnxt_dl_params));
}
devlink_unregister(dl);
devlink_free(dl);
}

Просмотреть файл

@ -58,6 +58,8 @@ struct bnxt_dl_nvm_param {
void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event);
void bnxt_dl_health_status_update(struct bnxt *bp, bool healthy);
void bnxt_dl_fw_reporters_create(struct bnxt *bp);
void bnxt_dl_fw_reporters_destroy(struct bnxt *bp, bool all);
int bnxt_dl_register(struct bnxt *bp);
void bnxt_dl_unregister(struct bnxt *bp);

Просмотреть файл

@ -3071,8 +3071,15 @@ static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg, int msg_len,
}
}
if (info->dest_buf)
memcpy(info->dest_buf + off, dma_buf, len);
if (info->dest_buf) {
if ((info->seg_start + off + len) <=
BNXT_COREDUMP_BUF_LEN(info->buf_len)) {
memcpy(info->dest_buf + off, dma_buf, len);
} else {
rc = -ENOBUFS;
break;
}
}
if (cmn_req->req_type ==
cpu_to_le16(HWRM_DBG_COREDUMP_RETRIEVE))
@ -3126,7 +3133,7 @@ static int bnxt_hwrm_dbg_coredump_initiate(struct bnxt *bp, u16 component_id,
static int bnxt_hwrm_dbg_coredump_retrieve(struct bnxt *bp, u16 component_id,
u16 segment_id, u32 *seg_len,
void *buf, u32 offset)
void *buf, u32 buf_len, u32 offset)
{
struct hwrm_dbg_coredump_retrieve_input req = {0};
struct bnxt_hwrm_dbg_dma_info info = {NULL};
@ -3141,8 +3148,11 @@ static int bnxt_hwrm_dbg_coredump_retrieve(struct bnxt *bp, u16 component_id,
seq_no);
info.data_len_off = offsetof(struct hwrm_dbg_coredump_retrieve_output,
data_len);
if (buf)
if (buf) {
info.dest_buf = buf + offset;
info.buf_len = buf_len;
info.seg_start = offset;
}
rc = bnxt_hwrm_dbg_dma_data(bp, &req, sizeof(req), &info);
if (!rc)
@ -3232,14 +3242,17 @@ bnxt_fill_coredump_record(struct bnxt *bp, struct bnxt_coredump_record *record,
static int bnxt_get_coredump(struct bnxt *bp, void *buf, u32 *dump_len)
{
u32 ver_get_resp_len = sizeof(struct hwrm_ver_get_output);
u32 offset = 0, seg_hdr_len, seg_record_len, buf_len = 0;
struct coredump_segment_record *seg_record = NULL;
u32 offset = 0, seg_hdr_len, seg_record_len;
struct bnxt_coredump_segment_hdr seg_hdr;
struct bnxt_coredump coredump = {NULL};
time64_t start_time;
u16 start_utc;
int rc = 0, i;
if (buf)
buf_len = *dump_len;
start_time = ktime_get_real_seconds();
start_utc = sys_tz.tz_minuteswest * 60;
seg_hdr_len = sizeof(seg_hdr);
@ -3272,6 +3285,12 @@ static int bnxt_get_coredump(struct bnxt *bp, void *buf, u32 *dump_len)
u32 duration = 0, seg_len = 0;
unsigned long start, end;
if (buf && ((offset + seg_hdr_len) >
BNXT_COREDUMP_BUF_LEN(buf_len))) {
rc = -ENOBUFS;
goto err;
}
start = jiffies;
rc = bnxt_hwrm_dbg_coredump_initiate(bp, comp_id, seg_id);
@ -3284,9 +3303,11 @@ static int bnxt_get_coredump(struct bnxt *bp, void *buf, u32 *dump_len)
/* Write segment data into the buffer */
rc = bnxt_hwrm_dbg_coredump_retrieve(bp, comp_id, seg_id,
&seg_len, buf,
&seg_len, buf, buf_len,
offset + seg_hdr_len);
if (rc)
if (rc && rc == -ENOBUFS)
goto err;
else if (rc)
netdev_err(bp->dev,
"Failed to retrieve coredump for seg = %d\n",
seg_record->segment_id);
@ -3316,7 +3337,8 @@ err:
rc);
kfree(coredump.data);
*dump_len += sizeof(struct bnxt_coredump_record);
if (rc == -ENOBUFS)
netdev_err(bp->dev, "Firmware returned large coredump buffer");
return rc;
}

Просмотреть файл

@ -31,6 +31,8 @@ struct bnxt_coredump {
u16 total_segs;
};
#define BNXT_COREDUMP_BUF_LEN(len) ((len) - sizeof(struct bnxt_coredump_record))
struct bnxt_hwrm_dbg_dma_info {
void *dest_buf;
int dest_buf_size;
@ -38,6 +40,8 @@ struct bnxt_hwrm_dbg_dma_info {
u16 seq_off;
u16 data_len_off;
u16 segs;
u32 seg_start;
u32 buf_len;
};
struct hwrm_dbg_cmn_input {

Просмотреть файл

@ -113,8 +113,10 @@ static int bnxt_req_msix_vecs(struct bnxt_en_dev *edev, int ulp_id,
{
struct net_device *dev = edev->net;
struct bnxt *bp = netdev_priv(dev);
struct bnxt_hw_resc *hw_resc;
int max_idx, max_cp_rings;
int avail_msix, idx;
int total_vecs;
int rc = 0;
ASSERT_RTNL();
@ -142,7 +144,10 @@ static int bnxt_req_msix_vecs(struct bnxt_en_dev *edev, int ulp_id,
}
edev->ulp_tbl[ulp_id].msix_base = idx;
edev->ulp_tbl[ulp_id].msix_requested = avail_msix;
if (bp->total_irqs < (idx + avail_msix)) {
hw_resc = &bp->hw_resc;
total_vecs = idx + avail_msix;
if (bp->total_irqs < total_vecs ||
(BNXT_NEW_RM(bp) && hw_resc->resv_irqs < total_vecs)) {
if (netif_running(dev)) {
bnxt_close_nic(bp, true, false);
rc = bnxt_open_nic(bp, true, false);
@ -156,7 +161,6 @@ static int bnxt_req_msix_vecs(struct bnxt_en_dev *edev, int ulp_id,
}
if (BNXT_NEW_RM(bp)) {
struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
int resv_msix;
resv_msix = hw_resc->resv_irqs - bp->cp_nr_rings;