habanalabs: add security violations dump to debugfs

In order to improve driver security debuggability, we add
security violations dump to debugfs.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
Ofir Bitton 2021-01-07 12:14:17 +02:00 коммит произвёл Oded Gabbay
Родитель eea4c2557c
Коммит d2b980f329
9 изменённых файлов: 48 добавлений и 2 удалений

Просмотреть файл

@ -182,3 +182,11 @@ KernelVersion: 5.6
Contact: oded.gabbay@gmail.com
Description: Sets the stop-on_error option for the device engines. Value of
"0" is for disable, otherwise enable.
What: /sys/kernel/debug/habanalabs/hl<n>/dump_security_violations
Date: Jan 2021
KernelVersion: 5.12
Contact: oded.gabbay@gmail.com
Description: Dumps all security violations to dmesg. This will also ack
all security violations meanings those violations will not be
dumped next time user calls this API

Просмотреть файл

@ -867,6 +867,17 @@ static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf,
return count;
}
static ssize_t hl_security_violations_read(struct file *f, char __user *buf,
size_t count, loff_t *ppos)
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
hdev->asic_funcs->ack_protection_bits_errors(hdev);
return 0;
}
static const struct file_operations hl_data32b_fops = {
.owner = THIS_MODULE,
.read = hl_data_read32,
@ -924,6 +935,11 @@ static const struct file_operations hl_stop_on_err_fops = {
.write = hl_stop_on_err_write
};
static const struct file_operations hl_security_violations_fops = {
.owner = THIS_MODULE,
.read = hl_security_violations_read
};
static const struct hl_info_list hl_debugfs_list[] = {
{"command_buffers", command_buffers_show, NULL},
{"command_submission", command_submission_show, NULL},
@ -1073,6 +1089,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
dev_entry,
&hl_stop_on_err_fops);
debugfs_create_file("dump_security_violations",
0644,
dev_entry->root,
dev_entry,
&hl_security_violations_fops);
for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
ent = debugfs_create_file(hl_debugfs_list[i].name,

Просмотреть файл

@ -850,6 +850,7 @@ enum div_select_defs {
* @collective_wait_create_jobs: allocate collective wait cs jobs
* @scramble_vaddr: Routine to scramble the virtual address prior of mapping it
* in the MMU.
* @ack_protection_bits_errors: ack and dump all security violations
*/
struct hl_asic_funcs {
int (*early_init)(struct hl_device *hdev);
@ -960,6 +961,7 @@ struct hl_asic_funcs {
struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
u32 collective_engine_id);
u64 (*scramble_vaddr)(struct hl_device *hdev, u64 virt_addr);
void (*ack_protection_bits_errors)(struct hl_device *hdev);
};

Просмотреть файл

@ -8546,7 +8546,8 @@ static const struct hl_asic_funcs gaudi_funcs = {
.get_device_time = gaudi_get_device_time,
.collective_wait_init_cs = gaudi_collective_wait_init_cs,
.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
.scramble_vaddr = hl_mmu_scramble_vaddr
.scramble_vaddr = hl_mmu_scramble_vaddr,
.ack_protection_bits_errors = gaudi_ack_protection_bits_errors
};
/**

Просмотреть файл

@ -335,6 +335,7 @@ struct gaudi_device {
};
void gaudi_init_security(struct hl_device *hdev);
void gaudi_ack_protection_bits_errors(struct hl_device *hdev);
void gaudi_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp);
void gaudi_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);

Просмотреть файл

@ -13052,3 +13052,8 @@ void gaudi_init_security(struct hl_device *hdev)
gaudi_init_protection_bits(hdev);
}
void gaudi_ack_protection_bits_errors(struct hl_device *hdev)
{
}

Просмотреть файл

@ -5457,7 +5457,8 @@ static const struct hl_asic_funcs goya_funcs = {
.get_device_time = goya_get_device_time,
.collective_wait_init_cs = goya_collective_wait_init_cs,
.collective_wait_create_jobs = goya_collective_wait_create_jobs,
.scramble_vaddr = hl_mmu_scramble_vaddr
.scramble_vaddr = hl_mmu_scramble_vaddr,
.ack_protection_bits_errors = goya_ack_protection_bits_errors
};
/*

Просмотреть файл

@ -173,6 +173,7 @@ void goya_init_mme_qmans(struct hl_device *hdev);
void goya_init_tpc_qmans(struct hl_device *hdev);
int goya_init_cpu_queues(struct hl_device *hdev);
void goya_init_security(struct hl_device *hdev);
void goya_ack_protection_bits_errors(struct hl_device *hdev);
int goya_late_init(struct hl_device *hdev);
void goya_late_fini(struct hl_device *hdev);

Просмотреть файл

@ -3120,3 +3120,8 @@ void goya_init_security(struct hl_device *hdev)
goya_init_protection_bits(hdev);
}
void goya_ack_protection_bits_errors(struct hl_device *hdev)
{
}