net/mlx5: Create FW devlink_health_reporter

Create mlx5_devlink_health_reporter for FW reporter. The FW reporter
implements devlink_health_reporter diagnose callback.

The fw reporter diagnose command can be triggered any time by the user
to check current fw status.
In healthy status, it will return clear syndrome. Otherwise it will
return the syndrome and description of the error type.

Command example and output on healthy status:
$ devlink health diagnose pci/0000:82:00.0 reporter fw
Syndrome: 0

Command example and output on non healthy status:
$ devlink health diagnose pci/0000:82:00.0 reporter fw
Syndrome: 8 Description: unrecoverable hardware error

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
This commit is contained in:
Moshe Shemesh 2018-12-11 16:09:53 +02:00 коммит произвёл Saeed Mahameed
Родитель 3e5b72ac2f
Коммит 1e34f3efd4
2 изменённых файлов: 50 добавлений и 0 удалений

Просмотреть файл

@ -388,6 +388,51 @@ static void print_health_info(struct mlx5_core_dev *dev)
mlx5_core_err(dev, "raw fw_ver 0x%08x\n", fw);
}
static int
mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
struct devlink_fmsg *fmsg)
{
struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
struct mlx5_core_health *health = &dev->priv.health;
struct health_buffer __iomem *h = health->health;
u8 synd;
int err;
synd = ioread8(&h->synd);
err = devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd);
if (err || !synd)
return err;
return devlink_fmsg_string_pair_put(fmsg, "Description", hsynd_str(synd));
}
static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = {
.name = "fw",
.diagnose = mlx5_fw_reporter_diagnose,
};
static void mlx5_fw_reporter_create(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;
struct devlink *devlink = priv_to_devlink(dev);
health->fw_reporter =
devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops,
0, false, dev);
if (IS_ERR(health->fw_reporter))
mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n",
PTR_ERR(health->fw_reporter));
}
static void mlx5_fw_reporter_destroy(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;
if (IS_ERR_OR_NULL(health->fw_reporter))
return;
devlink_health_reporter_destroy(health->fw_reporter);
}
static unsigned long get_next_poll_jiffies(void)
{
unsigned long next;
@ -498,6 +543,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev)
struct mlx5_core_health *health = &dev->priv.health;
destroy_workqueue(health->wq);
mlx5_fw_reporter_destroy(dev);
}
int mlx5_health_init(struct mlx5_core_dev *dev)
@ -519,5 +565,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev)
spin_lock_init(&health->wq_lock);
INIT_WORK(&health->work, health_care);
mlx5_fw_reporter_create(dev);
return 0;
}

Просмотреть файл

@ -53,6 +53,7 @@
#include <linux/mlx5/eq.h>
#include <linux/timecounter.h>
#include <linux/ptp_clock_kernel.h>
#include <net/devlink.h>
enum {
MLX5_BOARD_ID_LEN = 64,
@ -443,6 +444,7 @@ struct mlx5_core_health {
unsigned long flags;
struct work_struct work;
struct delayed_work recover_work;
struct devlink_health_reporter *fw_reporter;
};
struct mlx5_qp_table {