diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 61474627a32c..e1e4115dcf78 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -53,6 +53,7 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_int_process_v9.o \ $(AMDKFD_PATH)/kfd_dbgdev.o \ $(AMDKFD_PATH)/kfd_dbgmgr.o \ + $(AMDKFD_PATH)/kfd_smi_events.o \ $(AMDKFD_PATH)/kfd_crat.o ifneq ($(CONFIG_AMD_IOMMU_V2),) diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 9f59ba93cfe0..24b471734117 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -24,6 +24,7 @@ #include "kfd_events.h" #include "cik_int.h" #include "amdgpu_amdkfd.h" +#include "kfd_smi_events.h" static bool cik_event_interrupt_isr(struct kfd_dev *dev, const uint32_t *ih_ring_entry, @@ -107,6 +108,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) { struct kfd_vm_fault_info info; + kfd_smi_event_update_vmfault(dev, pasid); kfd_process_vm_fault(dev->dqm, pasid); memset(&info, 0, sizeof(info)); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index cf0017f4d9d5..e9b96ad3d9a5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -39,6 +39,7 @@ #include "kfd_device_queue_manager.h" #include "kfd_dbgmgr.h" #include "amdgpu_amdkfd.h" +#include "kfd_smi_events.h" static long kfd_ioctl(struct file *, unsigned int, unsigned long); static int kfd_open(struct inode *, struct file *); @@ -1740,6 +1741,20 @@ err_unlock: return r; } +/* Handle requests for watching SMI events */ +static int kfd_ioctl_smi_events(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_smi_events_args *args = data; + struct kfd_dev *dev; + + dev = kfd_device_by_id(args->gpuid); + if (!dev) + return -EINVAL; + + return kfd_smi_event_open(dev, &args->anon_fd); +} + #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ .cmd_drv = 0, .name = #ioctl} @@ -1835,6 +1850,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS, kfd_ioctl_alloc_queue_gws, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS, + kfd_ioctl_smi_events, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 3a646e757560..4bfedaab183f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -635,6 +635,11 @@ static int kfd_gws_init(struct kfd_dev *kfd) return ret; } +static void kfd_smi_init(struct kfd_dev *dev) { + INIT_LIST_HEAD(&dev->smi_clients); + spin_lock_init(&dev->smi_lock); +} + bool kgd2kfd_device_init(struct kfd_dev *kfd, struct drm_device *ddev, const struct kgd2kfd_shared_resources *gpu_resources) @@ -749,6 +754,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto kfd_topology_add_device_error; } + kfd_smi_init(kfd); + kfd->init_complete = true; dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor, kfd->pdev->device); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index fce6ccabe38b..241bd6ff79f4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -24,6 +24,7 @@ #include "kfd_events.h" #include "soc15_int.h" #include "kfd_device_queue_manager.h" +#include "kfd_smi_events.h" static bool event_interrupt_isr_v9(struct kfd_dev *dev, const uint32_t *ih_ring_entry, @@ -117,6 +118,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, info.prot_read = ring_id & 0x10; info.prot_write = ring_id & 0x20; + kfd_smi_event_update_vmfault(dev, pasid); kfd_process_vm_fault(dev->dqm, pasid); kfd_signal_vm_fault_event(dev, pasid, &info); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 67b9c398f0e4..6727e9de5b8b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -305,6 +305,10 @@ struct kfd_dev { /* Global GWS resource shared between processes */ void *gws; + + /* Clients watching SMI events */ + struct list_head smi_clients; + spinlock_t smi_lock; }; enum kfd_mempool { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c new file mode 100644 index 000000000000..bfc9330df987 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -0,0 +1,215 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include "amdgpu_vm.h" +#include "kfd_priv.h" +#include "kfd_smi_events.h" + +struct kfd_smi_client { + struct list_head list; + struct kfifo fifo; + wait_queue_head_t wait_queue; + /* events enabled */ + uint64_t events; + struct kfd_dev *dev; + spinlock_t lock; +}; + +#define MAX_KFIFO_SIZE 1024 + +static __poll_t kfd_smi_ev_poll(struct file *, struct poll_table_struct *); +static ssize_t kfd_smi_ev_read(struct file *, char __user *, size_t, loff_t *); +static ssize_t kfd_smi_ev_write(struct file *, const char __user *, size_t, + loff_t *); +static int kfd_smi_ev_release(struct inode *, struct file *); + +static const char kfd_smi_name[] = "kfd_smi_ev"; + +static const struct file_operations kfd_smi_ev_fops = { + .owner = THIS_MODULE, + .poll = kfd_smi_ev_poll, + .read = kfd_smi_ev_read, + .write = kfd_smi_ev_write, + .release = kfd_smi_ev_release +}; + +static __poll_t kfd_smi_ev_poll(struct file *filep, + struct poll_table_struct *wait) +{ + struct kfd_smi_client *client = filep->private_data; + __poll_t mask = 0; + + poll_wait(filep, &client->wait_queue, wait); + + spin_lock(&client->lock); + if (!kfifo_is_empty(&client->fifo)) + mask = EPOLLIN | EPOLLRDNORM; + spin_unlock(&client->lock); + + return mask; +} + +static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user, + size_t size, loff_t *offset) +{ + int ret; + size_t to_copy; + struct kfd_smi_client *client = filep->private_data; + unsigned char buf[MAX_KFIFO_SIZE]; + + BUILD_BUG_ON(MAX_KFIFO_SIZE > 1024); + + /* kfifo_to_user can sleep so we can't use spinlock protection around + * it. Instead, we kfifo out as spinlocked then copy them to the user. + */ + spin_lock(&client->lock); + to_copy = kfifo_len(&client->fifo); + if (!to_copy) { + spin_unlock(&client->lock); + return -EAGAIN; + } + to_copy = min3(size, sizeof(buf), to_copy); + ret = kfifo_out(&client->fifo, buf, to_copy); + spin_unlock(&client->lock); + if (ret <= 0) + return -EAGAIN; + + ret = copy_to_user(user, buf, to_copy); + if (ret) + return -EFAULT; + + return to_copy; +} + +static ssize_t kfd_smi_ev_write(struct file *filep, const char __user *user, + size_t size, loff_t *offset) +{ + struct kfd_smi_client *client = filep->private_data; + uint64_t events; + + if (!access_ok(user, size) || size < sizeof(events)) + return -EFAULT; + if (copy_from_user(&events, user, sizeof(events))) + return -EFAULT; + + WRITE_ONCE(client->events, events); + + return sizeof(events); +} + +static int kfd_smi_ev_release(struct inode *inode, struct file *filep) +{ + struct kfd_smi_client *client = filep->private_data; + struct kfd_dev *dev = client->dev; + + spin_lock(&dev->smi_lock); + list_del_rcu(&client->list); + spin_unlock(&dev->smi_lock); + + synchronize_rcu(); + kfifo_free(&client->fifo); + kfree(client); + + return 0; +} + +void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd; + struct amdgpu_task_info task_info; + /* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */ + /* 16 bytes event + 1 byte space + 25 bytes msg + 1 byte \n = 43 + */ + char fifo_in[43]; + struct kfd_smi_client *client; + int len; + + if (list_empty(&dev->smi_clients)) + return; + + memset(&task_info, 0, sizeof(struct amdgpu_task_info)); + amdgpu_vm_get_task_info(adev, pasid, &task_info); + /* Report VM faults from user applications, not retry from kernel */ + if (!task_info.pid) + return; + + len = snprintf(fifo_in, 43, "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT, + task_info.pid, task_info.task_name); + + rcu_read_lock(); + + list_for_each_entry_rcu(client, &dev->smi_clients, list) { + if (!(READ_ONCE(client->events) & KFD_SMI_EVENT_VMFAULT)) + continue; + spin_lock(&client->lock); + if (kfifo_avail(&client->fifo) >= len) { + kfifo_in(&client->fifo, fifo_in, len); + wake_up_all(&client->wait_queue); + } + else + pr_debug("smi_event(vmfault): no space left\n"); + spin_unlock(&client->lock); + } + + rcu_read_unlock(); +} + +int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd) +{ + struct kfd_smi_client *client; + int ret; + + client = kzalloc(sizeof(struct kfd_smi_client), GFP_KERNEL); + if (!client) + return -ENOMEM; + INIT_LIST_HEAD(&client->list); + + ret = kfifo_alloc(&client->fifo, MAX_KFIFO_SIZE, GFP_KERNEL); + if (ret) { + kfree(client); + return ret; + } + + ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client, + O_RDWR); + if (ret < 0) { + kfifo_free(&client->fifo); + kfree(client); + return ret; + } + *fd = ret; + + init_waitqueue_head(&client->wait_queue); + spin_lock_init(&client->lock); + client->events = 0; + client->dev = dev; + + spin_lock(&dev->smi_lock); + list_add_rcu(&client->list, &dev->smi_clients); + spin_unlock(&dev->smi_lock); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h new file mode 100644 index 000000000000..a9cb218fef96 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h @@ -0,0 +1,29 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef KFD_SMI_EVENTS_H_INCLUDED +#define KFD_SMI_EVENTS_H_INCLUDED + +int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd); +void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid); + +#endif diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index b6be62356d34..733c183d165e 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -442,6 +442,17 @@ struct kfd_ioctl_import_dmabuf_args { __u32 dmabuf_fd; /* to KFD */ }; +/* + * KFD SMI(System Management Interface) events + */ +/* Event type (defined by bitmask) */ +#define KFD_SMI_EVENT_VMFAULT 0x0000000000000001 + +struct kfd_ioctl_smi_events_args { + __u32 gpuid; /* to KFD */ + __u32 anon_fd; /* from KFD */ +}; + /* Register offset inside the remapped mmio page */ enum kfd_mmio_remap { @@ -546,7 +557,10 @@ enum kfd_mmio_remap { #define AMDKFD_IOC_ALLOC_QUEUE_GWS \ AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args) +#define AMDKFD_IOC_SMI_EVENTS \ + AMDKFD_IOWR(0x1F, struct kfd_ioctl_smi_events_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x1F +#define AMDKFD_COMMAND_END 0x20 #endif