habanalabs: add uapi to retrieve engines status

Currently, to get engines status, user needed to read debugfs file
with root permissions.

This new uapi allows user apace apps retrieve status, so for example,
in case of failure, status can be retrieved immediately by the
application itself which runs without root permissions.

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
Dani Liberman 2022-07-09 12:34:17 +03:00 коммит произвёл Oded Gabbay
Родитель 5f92c1e296
Коммит f018c54e3d
3 изменённых файлов: 50 добавлений и 2 удалений

Просмотреть файл

@ -17,7 +17,6 @@
#define MMU_ASID_BUF_SIZE 10
#define MMU_KBUF_SIZE (MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE)
#define I2C_MAX_TRANSACTION_LEN 8
#define ENGINES_DATA_MAX_SIZE SZ_16K
static struct dentry *hl_debug_root;
@ -626,7 +625,7 @@ static int engines_show(struct seq_file *s, void *data)
}
eng_data.actual_size = 0;
eng_data.allocated_buf_size = ENGINES_DATA_MAX_SIZE;
eng_data.allocated_buf_size = HL_ENGINES_DATA_MAX_SIZE;
eng_data.buf = vmalloc(eng_data.allocated_buf_size);
if (!eng_data.buf)
return -ENOMEM;

Просмотреть файл

@ -14,6 +14,7 @@
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = {
[HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr),
@ -697,6 +698,42 @@ static int eventfd_unregister(struct hl_fpriv *hpriv, struct hl_info_args *args)
return 0;
}
static int engine_status_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
u32 status_buf_size = args->return_size;
struct hl_device *hdev = hpriv->hdev;
struct engines_data eng_data;
int rc;
if ((status_buf_size < SZ_1K) || (status_buf_size > HL_ENGINES_DATA_MAX_SIZE) || (!out))
return -EINVAL;
eng_data.actual_size = 0;
eng_data.allocated_buf_size = status_buf_size;
eng_data.buf = vmalloc(status_buf_size);
if (!eng_data.buf)
return -ENOMEM;
hdev->asic_funcs->is_device_idle(hdev, NULL, 0, &eng_data);
if (eng_data.actual_size > eng_data.allocated_buf_size) {
dev_err(hdev->dev,
"Engines data size (%d Bytes) is bigger than allocated size (%u Bytes)\n",
eng_data.actual_size, status_buf_size);
vfree(eng_data.buf);
return -ENOMEM;
}
args->user_buffer_actual_size = eng_data.actual_size;
rc = copy_to_user(out, eng_data.buf, min_t(size_t, status_buf_size, eng_data.actual_size)) ?
-EFAULT : 0;
vfree(eng_data.buf);
return rc;
}
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
struct device *dev)
{
@ -812,6 +849,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_UNREGISTER_EVENTFD:
return eventfd_unregister(hpriv, args);
case HL_INFO_ENGINE_STATUS:
return engine_status_info(hpriv, args);
default:
dev_err(dev, "Invalid request %d\n", args->op);
rc = -EINVAL;

Просмотреть файл

@ -787,10 +787,14 @@ enum hl_server_type {
#define HL_INFO_UNREGISTER_EVENTFD 29
#define HL_INFO_GET_EVENTS 30
#define HL_INFO_UNDEFINED_OPCODE_EVENT 31
#define HL_INFO_ENGINE_STATUS 32
#define HL_INFO_VERSION_MAX_LEN 128
#define HL_INFO_CARD_NAME_MAX_LEN 16
/* Maximum buffer size for retrieving engines status */
#define HL_ENGINES_DATA_MAX_SIZE SZ_1M
/**
* struct hl_info_hw_ip_info - hardware information on various IPs in the ASIC
* @sram_base_address: The first SRAM physical base address that is free to be
@ -1130,6 +1134,10 @@ enum gaudi_dcores {
* resolution. Currently not in use.
* @pll_index: Index as defined in hl_<asic type>_pll_index enumeration.
* @eventfd: event file descriptor for event notifications.
* @user_buffer_actual_size: Actual data size which was copied to user allocated buffer by the
* driver. It is possible for the user to allocate buffer larger than
* needed, hence updating this variable so user will know the exact amount
* of bytes copied by the kernel to the buffer.
* @pad: Padding to 64 bit.
*/
struct hl_info_args {
@ -1143,6 +1151,7 @@ struct hl_info_args {
__u32 period_ms;
__u32 pll_index;
__u32 eventfd;
__u32 user_buffer_actual_size;
};
__u32 pad;