pds_core: add devlink health facilities
Add devlink health reporting on top of our fw watchdog. Example: # devlink health show pci/0000:2b:00.0 reporter fw pci/0000:2b:00.0: reporter fw state healthy error 0 recover 0 # devlink health diagnose pci/0000:2b:00.0 reporter fw Status: healthy State: 1 Generation: 0 Recoveries: 0 Signed-off-by: Shannon Nelson <shannon.nelson@amd.com> Acked-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Родитель
c2dbb09043
Коммит
25b450c05a
|
@ -26,6 +26,18 @@ messages such as these::
|
|||
pds_core 0000:b6:00.0: 252.048 Gb/s available PCIe bandwidth (16.0 GT/s PCIe x16 link)
|
||||
pds_core 0000:b6:00.0: FW: 1.60.0-73
|
||||
|
||||
Health Reporters
|
||||
================
|
||||
|
||||
The driver supports a devlink health reporter for FW status::
|
||||
|
||||
# devlink health show pci/0000:2b:00.0 reporter fw
|
||||
pci/0000:2b:00.0:
|
||||
reporter fw
|
||||
state healthy error 0 recover 0
|
||||
# devlink health diagnose pci/0000:2b:00.0 reporter fw
|
||||
Status: healthy State: 1 Generation: 0 Recoveries: 0
|
||||
|
||||
Support
|
||||
=======
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
obj-$(CONFIG_PDS_CORE) := pds_core.o
|
||||
|
||||
pds_core-y := main.o \
|
||||
devlink.o \
|
||||
dev.o \
|
||||
core.o
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
int pdsc_setup(struct pdsc *pdsc, bool init)
|
||||
{
|
||||
int err = 0;
|
||||
int err;
|
||||
|
||||
if (init)
|
||||
err = pdsc_dev_init(pdsc);
|
||||
|
@ -42,6 +42,8 @@ static void pdsc_fw_down(struct pdsc *pdsc)
|
|||
return;
|
||||
}
|
||||
|
||||
devlink_health_report(pdsc->fw_reporter, "FW down reported", pdsc);
|
||||
|
||||
pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
|
||||
}
|
||||
|
||||
|
@ -58,6 +60,10 @@ static void pdsc_fw_up(struct pdsc *pdsc)
|
|||
if (err)
|
||||
goto err_out;
|
||||
|
||||
pdsc->fw_recoveries++;
|
||||
devlink_health_reporter_state_update(pdsc->fw_reporter,
|
||||
DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
|
||||
|
||||
return;
|
||||
|
||||
err_out:
|
||||
|
|
|
@ -68,6 +68,8 @@ struct pdsc {
|
|||
struct timer_list wdtimer;
|
||||
unsigned int wdtimer_period;
|
||||
struct work_struct health_work;
|
||||
struct devlink_health_reporter *fw_reporter;
|
||||
u32 fw_recoveries;
|
||||
|
||||
struct pdsc_devinfo dev_info;
|
||||
struct pds_core_dev_identity dev_ident;
|
||||
|
@ -88,6 +90,10 @@ struct pdsc {
|
|||
u64 __iomem *kern_dbpage;
|
||||
};
|
||||
|
||||
int pdsc_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
|
||||
struct devlink_fmsg *fmsg,
|
||||
struct netlink_ext_ack *extack);
|
||||
|
||||
void pdsc_debugfs_create(void);
|
||||
void pdsc_debugfs_destroy(void);
|
||||
void pdsc_debugfs_add_dev(struct pdsc *pdsc);
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright(c) 2023 Advanced Micro Devices, Inc */
|
||||
|
||||
#include "core.h"
|
||||
|
||||
int pdsc_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
|
||||
struct devlink_fmsg *fmsg,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
struct pdsc *pdsc = devlink_health_reporter_priv(reporter);
|
||||
int err;
|
||||
|
||||
mutex_lock(&pdsc->config_lock);
|
||||
|
||||
if (test_bit(PDSC_S_FW_DEAD, &pdsc->state))
|
||||
err = devlink_fmsg_string_pair_put(fmsg, "Status", "dead");
|
||||
else if (!pdsc_is_fw_good(pdsc))
|
||||
err = devlink_fmsg_string_pair_put(fmsg, "Status", "unhealthy");
|
||||
else
|
||||
err = devlink_fmsg_string_pair_put(fmsg, "Status", "healthy");
|
||||
|
||||
mutex_unlock(&pdsc->config_lock);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = devlink_fmsg_u32_pair_put(fmsg, "State",
|
||||
pdsc->fw_status &
|
||||
~PDS_CORE_FW_STS_F_GENERATION);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = devlink_fmsg_u32_pair_put(fmsg, "Generation",
|
||||
pdsc->fw_generation >> 4);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return devlink_fmsg_u32_pair_put(fmsg, "Recoveries",
|
||||
pdsc->fw_recoveries);
|
||||
}
|
|
@ -130,10 +130,16 @@ static int pdsc_init_vf(struct pdsc *vf)
|
|||
return -1;
|
||||
}
|
||||
|
||||
static const struct devlink_health_reporter_ops pdsc_fw_reporter_ops = {
|
||||
.name = "fw",
|
||||
.diagnose = pdsc_fw_reporter_diagnose,
|
||||
};
|
||||
|
||||
#define PDSC_WQ_NAME_LEN 24
|
||||
|
||||
static int pdsc_init_pf(struct pdsc *pdsc)
|
||||
{
|
||||
struct devlink_health_reporter *hr;
|
||||
char wq_name[PDSC_WQ_NAME_LEN];
|
||||
struct devlink *dl;
|
||||
int err;
|
||||
|
@ -172,6 +178,16 @@ static int pdsc_init_pf(struct pdsc *pdsc)
|
|||
|
||||
dl = priv_to_devlink(pdsc);
|
||||
devl_lock(dl);
|
||||
|
||||
hr = devl_health_reporter_create(dl, &pdsc_fw_reporter_ops, 0, pdsc);
|
||||
if (IS_ERR(hr)) {
|
||||
dev_warn(pdsc->dev, "Failed to create fw reporter: %pe\n", hr);
|
||||
err = PTR_ERR(hr);
|
||||
devl_unlock(dl);
|
||||
goto err_out_teardown;
|
||||
}
|
||||
pdsc->fw_reporter = hr;
|
||||
|
||||
devl_register(dl);
|
||||
devl_unlock(dl);
|
||||
|
||||
|
@ -180,6 +196,8 @@ static int pdsc_init_pf(struct pdsc *pdsc)
|
|||
|
||||
return 0;
|
||||
|
||||
err_out_teardown:
|
||||
pdsc_teardown(pdsc, PDSC_TEARDOWN_REMOVING);
|
||||
err_out_unmap_bars:
|
||||
mutex_unlock(&pdsc->config_lock);
|
||||
del_timer_sync(&pdsc->wdtimer);
|
||||
|
@ -283,6 +301,10 @@ static void pdsc_remove(struct pci_dev *pdev)
|
|||
dl = priv_to_devlink(pdsc);
|
||||
devl_lock(dl);
|
||||
devl_unregister(dl);
|
||||
if (pdsc->fw_reporter) {
|
||||
devl_health_reporter_destroy(pdsc->fw_reporter);
|
||||
pdsc->fw_reporter = NULL;
|
||||
}
|
||||
devl_unlock(dl);
|
||||
|
||||
if (!pdev->is_virtfn) {
|
||||
|
|
Загрузка…
Ссылка в новой задаче