diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/Makefile | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 308 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 886 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h | 193 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c | 168 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h | 294 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device.c | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 48 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 46 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h | 290 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 24 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process.c | 11 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 18 |
15 files changed, 2279 insertions, 27 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 652d25478fd5..28551153ec6d 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -12,6 +12,7 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ kfd_kernel_queue_vi.o kfd_packet_manager.o \ kfd_process_queue_manager.o kfd_device_queue_manager.o \ kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \ - kfd_interrupt.o kfd_events.o cik_event_interrupt.o + kfd_interrupt.o kfd_events.o cik_event_interrupt.o \ + kfd_dbgdev.o kfd_dbgmgr.o obj-$(CONFIG_HSA_AMD) += amdkfd.o diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index b2c6109bd7af..96c904b3acb7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -35,6 +35,7 @@ #include <asm/processor.h> #include "kfd_priv.h" #include "kfd_device_queue_manager.h" +#include "kfd_dbgmgr.h" static long kfd_ioctl(struct file *, unsigned int, unsigned long); static int kfd_open(struct inode *, struct file *); @@ -432,6 +433,301 @@ out: return err; } +static int kfd_ioctl_dbg_register(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_dbg_register_args *args = data; + struct kfd_dev *dev; + struct kfd_dbgmgr *dbgmgr_ptr; + struct kfd_process_device *pdd; + bool create_ok; + long status = 0; + + dev = kfd_device_by_id(args->gpu_id); + if (dev == NULL) + return -EINVAL; + + if (dev->device_info->asic_family == CHIP_CARRIZO) { + pr_debug("kfd_ioctl_dbg_register not supported on CZ\n"); + return -EINVAL; + } + + mutex_lock(kfd_get_dbgmgr_mutex()); + mutex_lock(&p->mutex); + + /* + * make sure that we have pdd, if this the first queue created for + * this process + */ + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { + mutex_unlock(&p->mutex); + mutex_unlock(kfd_get_dbgmgr_mutex()); + return PTR_ERR(pdd); + } + + if (dev->dbgmgr == NULL) { + /* In case of a legal call, we have no dbgmgr yet */ + create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev); + if (create_ok) { + status = kfd_dbgmgr_register(dbgmgr_ptr, p); + if (status != 0) + kfd_dbgmgr_destroy(dbgmgr_ptr); + else + dev->dbgmgr = dbgmgr_ptr; + } + } else { + pr_debug("debugger already registered\n"); + status = -EINVAL; + } + + mutex_unlock(&p->mutex); + mutex_unlock(kfd_get_dbgmgr_mutex()); + + return status; +} + +static int kfd_ioctl_dbg_unrgesiter(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_dbg_unregister_args *args = data; + struct kfd_dev *dev; + long status; + + dev = kfd_device_by_id(args->gpu_id); + if (dev == NULL) + return -EINVAL; + + if (dev->device_info->asic_family == CHIP_CARRIZO) { + pr_debug("kfd_ioctl_dbg_unrgesiter not supported on CZ\n"); + return -EINVAL; + } + + mutex_lock(kfd_get_dbgmgr_mutex()); + + status = kfd_dbgmgr_unregister(dev->dbgmgr, p); + if (status == 0) { + kfd_dbgmgr_destroy(dev->dbgmgr); + dev->dbgmgr = NULL; + } + + mutex_unlock(kfd_get_dbgmgr_mutex()); + + return status; +} + +/* + * Parse and generate variable size data structure for address watch. + * Total size of the buffer and # watch points is limited in order + * to prevent kernel abuse. (no bearing to the much smaller HW limitation + * which is enforced by dbgdev module) + * please also note that the watch address itself are not "copied from user", + * since it be set into the HW in user mode values. + * + */ +static int kfd_ioctl_dbg_address_watch(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_dbg_address_watch_args *args = data; + struct kfd_dev *dev; + struct dbg_address_watch_info aw_info; + unsigned char *args_buff; + long status; + void __user *cmd_from_user; + uint64_t watch_mask_value = 0; + unsigned int args_idx = 0; + + memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info)); + + dev = kfd_device_by_id(args->gpu_id); + if (dev == NULL) + return -EINVAL; + + if (dev->device_info->asic_family == CHIP_CARRIZO) { + pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); + return -EINVAL; + } + + cmd_from_user = (void __user *) args->content_ptr; + + /* Validate arguments */ + + if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) || + (args->buf_size_in_bytes <= sizeof(*args)) || + (cmd_from_user == NULL)) + return -EINVAL; + + /* this is the actual buffer to work with */ + + args_buff = kmalloc(args->buf_size_in_bytes - + sizeof(*args), GFP_KERNEL); + if (args_buff == NULL) + return -ENOMEM; + + status = copy_from_user(args_buff, cmd_from_user, + args->buf_size_in_bytes - sizeof(*args)); + + if (status != 0) { + pr_debug("Failed to copy address watch user data\n"); + kfree(args_buff); + return -EINVAL; + } + + aw_info.process = p; + + aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx])); + args_idx += sizeof(aw_info.num_watch_points); + + aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx]; + args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points; + + /* + * set watch address base pointer to point on the array base + * within args_buff + */ + aw_info.watch_address = (uint64_t *) &args_buff[args_idx]; + + /* skip over the addresses buffer */ + args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points; + + if (args_idx >= args->buf_size_in_bytes) { + kfree(args_buff); + return -EINVAL; + } + + watch_mask_value = (uint64_t) args_buff[args_idx]; + + if (watch_mask_value > 0) { + /* + * There is an array of masks. + * set watch mask base pointer to point on the array base + * within args_buff + */ + aw_info.watch_mask = (uint64_t *) &args_buff[args_idx]; + + /* skip over the masks buffer */ + args_idx += sizeof(aw_info.watch_mask) * + aw_info.num_watch_points; + } else { + /* just the NULL mask, set to NULL and skip over it */ + aw_info.watch_mask = NULL; + args_idx += sizeof(aw_info.watch_mask); + } + + if (args_idx > args->buf_size_in_bytes) { + kfree(args_buff); + return -EINVAL; + } + + /* Currently HSA Event is not supported for DBG */ + aw_info.watch_event = NULL; + + mutex_lock(kfd_get_dbgmgr_mutex()); + + status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info); + + mutex_unlock(kfd_get_dbgmgr_mutex()); + + kfree(args_buff); + + return status; +} + +/* Parse and generate fixed size data structure for wave control */ +static int kfd_ioctl_dbg_wave_control(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_dbg_wave_control_args *args = data; + struct kfd_dev *dev; + struct dbg_wave_control_info wac_info; + unsigned char *args_buff; + uint32_t computed_buff_size; + long status; + void __user *cmd_from_user; + unsigned int args_idx = 0; + + memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info)); + + /* we use compact form, independent of the packing attribute value */ + computed_buff_size = sizeof(*args) + + sizeof(wac_info.mode) + + sizeof(wac_info.operand) + + sizeof(wac_info.dbgWave_msg.DbgWaveMsg) + + sizeof(wac_info.dbgWave_msg.MemoryVA) + + sizeof(wac_info.trapId); + + dev = kfd_device_by_id(args->gpu_id); + if (dev == NULL) + return -EINVAL; + + if (dev->device_info->asic_family == CHIP_CARRIZO) { + pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); + return -EINVAL; + } + + /* input size must match the computed "compact" size */ + if (args->buf_size_in_bytes != computed_buff_size) { + pr_debug("size mismatch, computed : actual %u : %u\n", + args->buf_size_in_bytes, computed_buff_size); + return -EINVAL; + } + + cmd_from_user = (void __user *) args->content_ptr; + + if (cmd_from_user == NULL) + return -EINVAL; + + /* this is the actual buffer to work with */ + + args_buff = kmalloc(args->buf_size_in_bytes - sizeof(*args), + GFP_KERNEL); + + if (args_buff == NULL) + return -ENOMEM; + + /* Now copy the entire buffer from user */ + status = copy_from_user(args_buff, cmd_from_user, + args->buf_size_in_bytes - sizeof(*args)); + if (status != 0) { + pr_debug("Failed to copy wave control user data\n"); + kfree(args_buff); + return -EINVAL; + } + + /* move ptr to the start of the "pay-load" area */ + wac_info.process = p; + + wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx])); + args_idx += sizeof(wac_info.operand); + + wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx])); + args_idx += sizeof(wac_info.mode); + + wac_info.trapId = *((uint32_t *)(&args_buff[args_idx])); + args_idx += sizeof(wac_info.trapId); + + wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = + *((uint32_t *)(&args_buff[args_idx])); + wac_info.dbgWave_msg.MemoryVA = NULL; + + mutex_lock(kfd_get_dbgmgr_mutex()); + + pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n", + wac_info.process, wac_info.operand, + wac_info.mode, wac_info.trapId, + wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); + + status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info); + + pr_debug("Returned status of dbg manager is %ld\n", status); + + mutex_unlock(kfd_get_dbgmgr_mutex()); + + kfree(args_buff); + + return status; +} + static int kfd_ioctl_get_clock_counters(struct file *filep, struct kfd_process *p, void *data) { @@ -612,6 +908,18 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS, kfd_ioctl_wait_events, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER, + kfd_ioctl_dbg_register, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER, + kfd_ioctl_dbg_unrgesiter, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH, + kfd_ioctl_dbg_address_watch, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL, + kfd_ioctl_dbg_wave_control, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c new file mode 100644 index 000000000000..96153f28d73f --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -0,0 +1,886 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/log2.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/mutex.h> +#include <linux/device.h> + +#include "kfd_pm4_headers.h" +#include "kfd_pm4_headers_diq.h" +#include "kfd_kernel_queue.h" +#include "kfd_priv.h" +#include "kfd_pm4_opcodes.h" +#include "cik_regs.h" +#include "kfd_dbgmgr.h" +#include "kfd_dbgdev.h" +#include "kfd_device_queue_manager.h" +#include "../../radeon/cik_reg.h" + +static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev) +{ + BUG_ON(!dev || !dev->kfd2kgd); + + dev->kfd2kgd->address_watch_disable(dev->kgd); +} + +static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, + unsigned int pasid, uint64_t vmid0_address, + uint32_t *packet_buff, size_t size_in_bytes) +{ + struct pm4__release_mem *rm_packet; + struct pm4__indirect_buffer_pasid *ib_packet; + struct kfd_mem_obj *mem_obj; + size_t pq_packets_size_in_bytes; + union ULARGE_INTEGER *largep; + union ULARGE_INTEGER addr; + struct kernel_queue *kq; + uint64_t *rm_state; + unsigned int *ib_packet_buff; + int status; + + BUG_ON(!dbgdev || !dbgdev->kq || !packet_buff || !size_in_bytes); + + kq = dbgdev->kq; + + pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) + + sizeof(struct pm4__indirect_buffer_pasid); + + /* + * We acquire a buffer from DIQ + * The receive packet buff will be sitting on the Indirect Buffer + * and in the PQ we put the IB packet + sync packet(s). + */ + status = kq->ops.acquire_packet_buffer(kq, + pq_packets_size_in_bytes / sizeof(uint32_t), + &ib_packet_buff); + if (status != 0) { + pr_err("amdkfd: acquire_packet_buffer failed\n"); + return status; + } + + memset(ib_packet_buff, 0, pq_packets_size_in_bytes); + + ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff); + + ib_packet->header.count = 3; + ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID; + ib_packet->header.type = PM4_TYPE_3; + + largep = (union ULARGE_INTEGER *) &vmid0_address; + + ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2; + ib_packet->bitfields3.ib_base_hi = largep->u.high_part; + + ib_packet->control = (1 << 23) | (1 << 31) | + ((size_in_bytes / sizeof(uint32_t)) & 0xfffff); + + ib_packet->bitfields5.pasid = pasid; + + /* + * for now we use release mem for GPU-CPU synchronization + * Consider WaitRegMem + WriteData as a better alternative + * we get a GART allocations ( gpu/cpu mapping), + * for the sync variable, and wait until: + * (a) Sync with HW + * (b) Sync var is written by CP to mem. + */ + rm_packet = (struct pm4__release_mem *) (ib_packet_buff + + (sizeof(struct pm4__indirect_buffer_pasid) / + sizeof(unsigned int))); + + status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t), + &mem_obj); + + if (status != 0) { + pr_err("amdkfd: Failed to allocate GART memory\n"); + kq->ops.rollback_packet(kq); + return status; + } + + rm_state = (uint64_t *) mem_obj->cpu_ptr; + + *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING; + + rm_packet->header.opcode = IT_RELEASE_MEM; + rm_packet->header.type = PM4_TYPE_3; + rm_packet->header.count = sizeof(struct pm4__release_mem) / + sizeof(unsigned int) - 2; + + rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; + rm_packet->bitfields2.event_index = + event_index___release_mem__end_of_pipe; + + rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru; + rm_packet->bitfields2.atc = 0; + rm_packet->bitfields2.tc_wb_action_ena = 1; + + addr.quad_part = mem_obj->gpu_addr; + + rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2; + rm_packet->address_hi = addr.u.high_part; + + rm_packet->bitfields3.data_sel = + data_sel___release_mem__send_64_bit_data; + + rm_packet->bitfields3.int_sel = + int_sel___release_mem__send_data_after_write_confirm; + + rm_packet->bitfields3.dst_sel = + dst_sel___release_mem__memory_controller; + + rm_packet->data_lo = QUEUESTATE__ACTIVE; + + kq->ops.submit_packet(kq); + + /* Wait till CP writes sync code: */ + status = amdkfd_fence_wait_timeout( + (unsigned int *) rm_state, + QUEUESTATE__ACTIVE, 1500); + + kfd_gtt_sa_free(dbgdev->dev, mem_obj); + + return status; +} + +static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev) +{ + BUG_ON(!dbgdev); + + /* + * no action is needed in this case, + * just make sure diq will not be used + */ + + dbgdev->kq = NULL; + + return 0; +} + +static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) +{ + struct queue_properties properties; + unsigned int qid; + struct kernel_queue *kq = NULL; + int status; + + BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->dev); + + status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, + &properties, 0, KFD_QUEUE_TYPE_DIQ, + &qid); + + if (status) { + pr_err("amdkfd: Failed to create DIQ\n"); + return status; + } + + pr_debug("DIQ Created with queue id: %d\n", qid); + + kq = pqm_get_kernel_queue(dbgdev->pqm, qid); + + if (kq == NULL) { + pr_err("amdkfd: Error getting DIQ\n"); + pqm_destroy_queue(dbgdev->pqm, qid); + return -EFAULT; + } + + dbgdev->kq = kq; + + return status; +} + +static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev) +{ + BUG_ON(!dbgdev || !dbgdev->dev); + + /* disable watch address */ + dbgdev_address_watch_disable_nodiq(dbgdev->dev); + return 0; +} + +static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev) +{ + /* todo - disable address watch */ + int status; + + BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->kq); + + status = pqm_destroy_queue(dbgdev->pqm, + dbgdev->kq->queue->properties.queue_id); + dbgdev->kq = NULL; + + return status; +} + +static void dbgdev_address_watch_set_registers( + const struct dbg_address_watch_info *adw_info, + union TCP_WATCH_ADDR_H_BITS *addrHi, + union TCP_WATCH_ADDR_L_BITS *addrLo, + union TCP_WATCH_CNTL_BITS *cntl, + unsigned int index, unsigned int vmid) +{ + union ULARGE_INTEGER addr; + + BUG_ON(!adw_info || !addrHi || !addrLo || !cntl); + + addr.quad_part = 0; + addrHi->u32All = 0; + addrLo->u32All = 0; + cntl->u32All = 0; + + if (adw_info->watch_mask != NULL) + cntl->bitfields.mask = + (uint32_t) (adw_info->watch_mask[index] & + ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK); + else + cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; + + addr.quad_part = (unsigned long long) adw_info->watch_address[index]; + + addrHi->bitfields.addr = addr.u.high_part & + ADDRESS_WATCH_REG_ADDHIGH_MASK; + addrLo->bitfields.addr = + (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT); + + cntl->bitfields.mode = adw_info->watch_mode[index]; + cntl->bitfields.vmid = (uint32_t) vmid; + /* for now assume it is an ATC address */ + cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT; + + pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask); + pr_debug("\t\t%20s %08x\n", "set reg add high :", + addrHi->bitfields.addr); + pr_debug("\t\t%20s %08x\n", "set reg add low :", + addrLo->bitfields.addr); +} + +static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, + struct dbg_address_watch_info *adw_info) +{ + union TCP_WATCH_ADDR_H_BITS addrHi; + union TCP_WATCH_ADDR_L_BITS addrLo; + union TCP_WATCH_CNTL_BITS cntl; + struct kfd_process_device *pdd; + unsigned int i; + + BUG_ON(!dbgdev || !dbgdev->dev || !adw_info); + + /* taking the vmid for that process on the safe way using pdd */ + pdd = kfd_get_process_device_data(dbgdev->dev, + adw_info->process); + if (!pdd) { + pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n"); + return -EFAULT; + } + + addrHi.u32All = 0; + addrLo.u32All = 0; + cntl.u32All = 0; + + if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || + (adw_info->num_watch_points == 0)) { + pr_err("amdkfd: num_watch_points is invalid\n"); + return -EINVAL; + } + + if ((adw_info->watch_mode == NULL) || + (adw_info->watch_address == NULL)) { + pr_err("amdkfd: adw_info fields are not valid\n"); + return -EINVAL; + } + + for (i = 0 ; i < adw_info->num_watch_points ; i++) { + dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo, + &cntl, i, pdd->qpd.vmid); + + pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); + pr_debug("\t\t%20s %08x\n", "register index :", i); + pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid); + pr_debug("\t\t%20s %08x\n", "Address Low is :", + addrLo.bitfields.addr); + pr_debug("\t\t%20s %08x\n", "Address high is :", + addrHi.bitfields.addr); + pr_debug("\t\t%20s %08x\n", "Address high is :", + addrHi.bitfields.addr); + pr_debug("\t\t%20s %08x\n", "Control Mask is :", + cntl.bitfields.mask); + pr_debug("\t\t%20s %08x\n", "Control Mode is :", + cntl.bitfields.mode); + pr_debug("\t\t%20s %08x\n", "Control Vmid is :", + cntl.bitfields.vmid); + pr_debug("\t\t%20s %08x\n", "Control atc is :", + cntl.bitfields.atc); + pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); + + pdd->dev->kfd2kgd->address_watch_execute( + dbgdev->dev->kgd, + i, + cntl.u32All, + addrHi.u32All, + addrLo.u32All); + } + + return 0; +} + +static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, + struct dbg_address_watch_info *adw_info) +{ + struct pm4__set_config_reg *packets_vec; + union TCP_WATCH_ADDR_H_BITS addrHi; + union TCP_WATCH_ADDR_L_BITS addrLo; + union TCP_WATCH_CNTL_BITS cntl; + struct kfd_mem_obj *mem_obj; + unsigned int aw_reg_add_dword; + uint32_t *packet_buff_uint; + unsigned int i; + int status; + size_t ib_size = sizeof(struct pm4__set_config_reg) * 4; + /* we do not control the vmid in DIQ mode, just a place holder */ + unsigned int vmid = 0; + + BUG_ON(!dbgdev || !dbgdev->dev || !adw_info); + + addrHi.u32All = 0; + addrLo.u32All = 0; + cntl.u32All = 0; + + if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || + (adw_info->num_watch_points == 0)) { + pr_err("amdkfd: num_watch_points is invalid\n"); + return -EINVAL; + } + + if ((NULL == adw_info->watch_mode) || + (NULL == adw_info->watch_address)) { + pr_err("amdkfd: adw_info fields are not valid\n"); + return -EINVAL; + } + + status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); + + if (status != 0) { + pr_err("amdkfd: Failed to allocate GART memory\n"); + return status; + } + + packet_buff_uint = mem_obj->cpu_ptr; + + memset(packet_buff_uint, 0, ib_size); + + packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint); + + packets_vec[0].header.count = 1; + packets_vec[0].header.opcode = IT_SET_CONFIG_REG; + packets_vec[0].header.type = PM4_TYPE_3; + packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; + packets_vec[0].bitfields2.insert_vmid = 1; + packets_vec[1].ordinal1 = packets_vec[0].ordinal1; + packets_vec[1].bitfields2.insert_vmid = 0; + packets_vec[2].ordinal1 = packets_vec[0].ordinal1; + packets_vec[2].bitfields2.insert_vmid = 0; + packets_vec[3].ordinal1 = packets_vec[0].ordinal1; + packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; + packets_vec[3].bitfields2.insert_vmid = 1; + + for (i = 0; i < adw_info->num_watch_points; i++) { + dbgdev_address_watch_set_registers(adw_info, + &addrHi, + &addrLo, + &cntl, + i, + vmid); + + pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); + pr_debug("\t\t%20s %08x\n", "register index :", i); + pr_debug("\t\t%20s %08x\n", "vmid is :", vmid); + pr_debug("\t\t%20s %p\n", "Add ptr is :", + adw_info->watch_address); + pr_debug("\t\t%20s %08llx\n", "Add is :", + adw_info->watch_address[i]); + pr_debug("\t\t%20s %08x\n", "Address Low is :", + addrLo.bitfields.addr); + pr_debug("\t\t%20s %08x\n", "Address high is :", + addrHi.bitfields.addr); + pr_debug("\t\t%20s %08x\n", "Control Mask is :", + cntl.bitfields.mask); + pr_debug("\t\t%20s %08x\n", "Control Mode is :", + cntl.bitfields.mode); + pr_debug("\t\t%20s %08x\n", "Control Vmid is :", + cntl.bitfields.vmid); + pr_debug("\t\t%20s %08x\n", "Control atc is :", + cntl.bitfields.atc); + pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); + + aw_reg_add_dword = + dbgdev->dev->kfd2kgd->address_watch_get_offset( + dbgdev->dev->kgd, + i, + ADDRESS_WATCH_REG_CNTL); + + aw_reg_add_dword /= sizeof(uint32_t); + + packets_vec[0].bitfields2.reg_offset = + aw_reg_add_dword - CONFIG_REG_BASE; + + packets_vec[0].reg_data[0] = cntl.u32All; + + aw_reg_add_dword = + dbgdev->dev->kfd2kgd->address_watch_get_offset( + dbgdev->dev->kgd, + i, + ADDRESS_WATCH_REG_ADDR_HI); + + aw_reg_add_dword /= sizeof(uint32_t); + + packets_vec[1].bitfields2.reg_offset = + aw_reg_add_dword - CONFIG_REG_BASE; + packets_vec[1].reg_data[0] = addrHi.u32All; + + aw_reg_add_dword = + dbgdev->dev->kfd2kgd->address_watch_get_offset( + dbgdev->dev->kgd, + i, + ADDRESS_WATCH_REG_ADDR_LO); + + aw_reg_add_dword /= sizeof(uint32_t); + + packets_vec[2].bitfields2.reg_offset = + aw_reg_add_dword - CONFIG_REG_BASE; + packets_vec[2].reg_data[0] = addrLo.u32All; + + /* enable watch flag if address is not zero*/ + if (adw_info->watch_address[i] > 0) + cntl.bitfields.valid = 1; + else + cntl.bitfields.valid = 0; + + aw_reg_add_dword = + dbgdev->dev->kfd2kgd->address_watch_get_offset( + dbgdev->dev->kgd, + i, + ADDRESS_WATCH_REG_CNTL); + + aw_reg_add_dword /= sizeof(uint32_t); + + packets_vec[3].bitfields2.reg_offset = + aw_reg_add_dword - CONFIG_REG_BASE; + packets_vec[3].reg_data[0] = cntl.u32All; + + status = dbgdev_diq_submit_ib( + dbgdev, + adw_info->process->pasid, + mem_obj->gpu_addr, + packet_buff_uint, + ib_size); + + if (status != 0) { + pr_err("amdkfd: Failed to submit IB to DIQ\n"); + break; + } + } + + kfd_gtt_sa_free(dbgdev->dev, mem_obj); + return status; +} + +static int dbgdev_wave_control_set_registers( + struct dbg_wave_control_info *wac_info, + union SQ_CMD_BITS *in_reg_sq_cmd, + union GRBM_GFX_INDEX_BITS *in_reg_gfx_index) +{ + int status; + union SQ_CMD_BITS reg_sq_cmd; + union GRBM_GFX_INDEX_BITS reg_gfx_index; + struct HsaDbgWaveMsgAMDGen2 *pMsg; + + BUG_ON(!wac_info || !in_reg_sq_cmd || !in_reg_gfx_index); + + reg_sq_cmd.u32All = 0; + reg_gfx_index.u32All = 0; + pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2; + + switch (wac_info->mode) { + /* Send command to single wave */ + case HSA_DBG_WAVEMODE_SINGLE: + /* + * Limit access to the process waves only, + * by setting vmid check + */ + reg_sq_cmd.bits.check_vmid = 1; + reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD; + reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId; + reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE; + + reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; + reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; + reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; + + break; + + /* Send command to all waves with matching VMID */ + case HSA_DBG_WAVEMODE_BROADCAST_PROCESS: + + reg_gfx_index.bits.sh_broadcast_writes = 1; + reg_gfx_index.bits.se_broadcast_writes = 1; + reg_gfx_index.bits.instance_broadcast_writes = 1; + + reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; + + break; + + /* Send command to all CU waves with matching VMID */ + case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU: + + reg_sq_cmd.bits.check_vmid = 1; + reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; + + reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; + reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; + reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; + + break; + + default: + return -EINVAL; + } + + switch (wac_info->operand) { + case HSA_DBG_WAVEOP_HALT: + reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT; + break; + + case HSA_DBG_WAVEOP_RESUME: + reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME; + break; + + case HSA_DBG_WAVEOP_KILL: + reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; + break; + + case HSA_DBG_WAVEOP_DEBUG: + reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG; + break; + + case HSA_DBG_WAVEOP_TRAP: + if (wac_info->trapId < MAX_TRAPID) { + reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP; + reg_sq_cmd.bits.trap_id = wac_info->trapId; + } else { + status = -EINVAL; + } + break; + + default: + status = -EINVAL; + break; + } + + if (status == 0) { + *in_reg_sq_cmd = reg_sq_cmd; + *in_reg_gfx_index = reg_gfx_index; + } + + return status; +} + +static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, + struct dbg_wave_control_info *wac_info) +{ + + int status; + union SQ_CMD_BITS reg_sq_cmd; + union GRBM_GFX_INDEX_BITS reg_gfx_index; + struct kfd_mem_obj *mem_obj; + uint32_t *packet_buff_uint; + struct pm4__set_config_reg *packets_vec; + size_t ib_size = sizeof(struct pm4__set_config_reg) * 3; + + BUG_ON(!dbgdev || !wac_info); + + reg_sq_cmd.u32All = 0; + + status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, + ®_gfx_index); + if (status) { + pr_err("amdkfd: Failed to set wave control registers\n"); + return status; + } + + /* we do not control the VMID in DIQ,so reset it to a known value */ + reg_sq_cmd.bits.vm_id = 0; + + pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); + + pr_debug("\t\t mode is: %u\n", wac_info->mode); + pr_debug("\t\t operand is: %u\n", wac_info->operand); + pr_debug("\t\t trap id is: %u\n", wac_info->trapId); + pr_debug("\t\t msg value is: %u\n", + wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); + pr_debug("\t\t vmid is: N/A\n"); + + pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); + pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); + pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); + pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); + pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); + pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); + pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); + + pr_debug("\t\t ibw is : %u\n", + reg_gfx_index.bitfields.instance_broadcast_writes); + pr_debug("\t\t ii is : %u\n", + reg_gfx_index.bitfields.instance_index); + pr_debug("\t\t sebw is : %u\n", + reg_gfx_index.bitfields.se_broadcast_writes); + pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); + pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); + pr_debug("\t\t sbw is : %u\n", + reg_gfx_index.bitfields.sh_broadcast_writes); + + pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); + + status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); + + if (status != 0) { + pr_err("amdkfd: Failed to allocate GART memory\n"); + return status; + } + + packet_buff_uint = mem_obj->cpu_ptr; + + memset(packet_buff_uint, 0, ib_size); + + packets_vec = (struct pm4__set_config_reg *) packet_buff_uint; + packets_vec[0].header.count = 1; + packets_vec[0].header.opcode = IT_SET_UCONFIG_REG; + packets_vec[0].header.type = PM4_TYPE_3; + packets_vec[0].bitfields2.reg_offset = + GRBM_GFX_INDEX / (sizeof(uint32_t)) - + USERCONFIG_REG_BASE; + + packets_vec[0].bitfields2.insert_vmid = 0; + packets_vec[0].reg_data[0] = reg_gfx_index.u32All; + + packets_vec[1].header.count = 1; + packets_vec[1].header.opcode = IT_SET_CONFIG_REG; + packets_vec[1].header.type = PM4_TYPE_3; + packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) - + CONFIG_REG_BASE; + + packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET; + packets_vec[1].bitfields2.insert_vmid = 1; + packets_vec[1].reg_data[0] = reg_sq_cmd.u32All; + + /* Restore the GRBM_GFX_INDEX register */ + + reg_gfx_index.u32All = 0; + reg_gfx_index.bits.sh_broadcast_writes = 1; + reg_gfx_index.bits.instance_broadcast_writes = 1; + reg_gfx_index.bits.se_broadcast_writes = 1; + + + packets_vec[2].ordinal1 = packets_vec[0].ordinal1; + packets_vec[2].bitfields2.reg_offset = + GRBM_GFX_INDEX / (sizeof(uint32_t)) - + USERCONFIG_REG_BASE; + + packets_vec[2].bitfields2.insert_vmid = 0; + packets_vec[2].reg_data[0] = reg_gfx_index.u32All; + + status = dbgdev_diq_submit_ib( + dbgdev, + wac_info->process->pasid, + mem_obj->gpu_addr, + packet_buff_uint, + ib_size); + + if (status != 0) + pr_err("amdkfd: Failed to submit IB to DIQ\n"); + + kfd_gtt_sa_free(dbgdev->dev, mem_obj); + + return status; +} + +static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev, + struct dbg_wave_control_info *wac_info) +{ + int status; + union SQ_CMD_BITS reg_sq_cmd; + union GRBM_GFX_INDEX_BITS reg_gfx_index; + struct kfd_process_device *pdd; + + BUG_ON(!dbgdev || !dbgdev->dev || !wac_info); + + reg_sq_cmd.u32All = 0; + + /* taking the VMID for that process on the safe way using PDD */ + pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process); + + if (!pdd) { + pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n"); + return -EFAULT; + } + status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, + ®_gfx_index); + if (status) { + pr_err("amdkfd: Failed to set wave control registers\n"); + return status; + } + + /* for non DIQ we need to patch the VMID: */ + + reg_sq_cmd.bits.vm_id = pdd->qpd.vmid; + + pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); + + pr_debug("\t\t mode is: %u\n", wac_info->mode); + pr_debug("\t\t operand is: %u\n", wac_info->operand); + pr_debug("\t\t trap id is: %u\n", wac_info->trapId); + pr_debug("\t\t msg value is: %u\n", + wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); + pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid); + + pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); + pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); + pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); + pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); + pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); + pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); + pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); + + pr_debug("\t\t ibw is : %u\n", + reg_gfx_index.bitfields.instance_broadcast_writes); + pr_debug("\t\t ii is : %u\n", + reg_gfx_index.bitfields.instance_index); + pr_debug("\t\t sebw is : %u\n", + reg_gfx_index.bitfields.se_broadcast_writes); + pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); + pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); + pr_debug("\t\t sbw is : %u\n", + reg_gfx_index.bitfields.sh_broadcast_writes); + + pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); + + return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd, + reg_gfx_index.u32All, + reg_sq_cmd.u32All); +} + +int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) +{ + int status = 0; + unsigned int vmid; + union SQ_CMD_BITS reg_sq_cmd; + union GRBM_GFX_INDEX_BITS reg_gfx_index; + struct kfd_process_device *pdd; + struct dbg_wave_control_info wac_info; + int temp; + int first_vmid_to_scan = 8; + int last_vmid_to_scan = 15; + + first_vmid_to_scan = ffs(dev->shared_resources.compute_vmid_bitmap) - 1; + temp = dev->shared_resources.compute_vmid_bitmap >> first_vmid_to_scan; + last_vmid_to_scan = first_vmid_to_scan + ffz(temp); + + reg_sq_cmd.u32All = 0; + status = 0; + + wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS; + wac_info.operand = HSA_DBG_WAVEOP_KILL; + + pr_debug("Killing all process wavefronts\n"); + + /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. + * ATC_VMID15_PASID_MAPPING + * to check which VMID the current process is mapped to. */ + + for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { + if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid + (dev->kgd, vmid)) { + if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid + (dev->kgd, vmid) == p->pasid) { + pr_debug("Killing wave fronts of vmid %d and pasid %d\n", + vmid, p->pasid); + break; + } + } + } + + if (vmid > last_vmid_to_scan) { + pr_err("amdkfd: didn't found vmid for pasid (%d)\n", p->pasid); + return -EFAULT; + } + + /* taking the VMID for that process on the safe way using PDD */ + pdd = kfd_get_process_device_data(dev, p); + if (!pdd) + return -EFAULT; + + status = dbgdev_wave_control_set_registers(&wac_info, ®_sq_cmd, + ®_gfx_index); + if (status != 0) + return -EINVAL; + + /* for non DIQ we need to patch the VMID: */ + reg_sq_cmd.bits.vm_id = vmid; + + dev->kfd2kgd->wave_control_execute(dev->kgd, + reg_gfx_index.u32All, + reg_sq_cmd.u32All); + + return 0; +} + +void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, + enum DBGDEV_TYPE type) +{ + BUG_ON(!pdbgdev || !pdev); + + pdbgdev->dev = pdev; + pdbgdev->kq = NULL; + pdbgdev->type = type; + pdbgdev->pqm = NULL; + + switch (type) { + case DBGDEV_TYPE_NODIQ: + pdbgdev->dbgdev_register = dbgdev_register_nodiq; + pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq; + pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq; + pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq; + break; + case DBGDEV_TYPE_DIQ: + default: + pdbgdev->dbgdev_register = dbgdev_register_diq; + pdbgdev->dbgdev_unregister = dbgdev_unregister_diq; + pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq; + pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq; + break; + } + +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h new file mode 100644 index 000000000000..4b0dd5aa5306 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h @@ -0,0 +1,193 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef KFD_DBGDEV_H_ +#define KFD_DBGDEV_H_ + +enum { + SQ_CMD_VMID_OFFSET = 28, + ADDRESS_WATCH_CNTL_OFFSET = 24 +}; + +enum { + PRIV_QUEUE_SYNC_TIME_MS = 200 +}; + +/* CONTEXT reg space definition */ +enum { + CONTEXT_REG_BASE = 0xA000, + CONTEXT_REG_END = 0xA400, + CONTEXT_REG_SIZE = CONTEXT_REG_END - CONTEXT_REG_BASE +}; + +/* USER CONFIG reg space definition */ +enum { + USERCONFIG_REG_BASE = 0xC000, + USERCONFIG_REG_END = 0x10000, + USERCONFIG_REG_SIZE = USERCONFIG_REG_END - USERCONFIG_REG_BASE +}; + +/* CONFIG reg space definition */ +enum { + CONFIG_REG_BASE = 0x2000, /* in dwords */ + CONFIG_REG_END = 0x2B00, + CONFIG_REG_SIZE = CONFIG_REG_END - CONFIG_REG_BASE +}; + +/* SH reg space definition */ +enum { + SH_REG_BASE = 0x2C00, + SH_REG_END = 0x3000, + SH_REG_SIZE = SH_REG_END - SH_REG_BASE +}; + +enum SQ_IND_CMD_CMD { + SQ_IND_CMD_CMD_NULL = 0x00000000, + SQ_IND_CMD_CMD_HALT = 0x00000001, + SQ_IND_CMD_CMD_RESUME = 0x00000002, + SQ_IND_CMD_CMD_KILL = 0x00000003, + SQ_IND_CMD_CMD_DEBUG = 0x00000004, + SQ_IND_CMD_CMD_TRAP = 0x00000005, +}; + +enum SQ_IND_CMD_MODE { + SQ_IND_CMD_MODE_SINGLE = 0x00000000, + SQ_IND_CMD_MODE_BROADCAST = 0x00000001, + SQ_IND_CMD_MODE_BROADCAST_QUEUE = 0x00000002, + SQ_IND_CMD_MODE_BROADCAST_PIPE = 0x00000003, + SQ_IND_CMD_MODE_BROADCAST_ME = 0x00000004, +}; + +union SQ_IND_INDEX_BITS { + struct { + uint32_t wave_id:4; + uint32_t simd_id:2; + uint32_t thread_id:6; + uint32_t:1; + uint32_t force_read:1; + uint32_t read_timeout:1; + uint32_t unindexed:1; + uint32_t index:16; + + } bitfields, bits; + uint32_t u32All; + signed int i32All; + float f32All; +}; + +union SQ_IND_CMD_BITS { + struct { + uint32_t data:32; + } bitfields, bits; + uint32_t u32All; + signed int i32All; + float f32All; +}; + +union SQ_CMD_BITS { + struct { + uint32_t cmd:3; + uint32_t:1; + uint32_t mode:3; + uint32_t check_vmid:1; + uint32_t trap_id:3; + uint32_t:5; + uint32_t wave_id:4; + uint32_t simd_id:2; + uint32_t:2; + uint32_t queue_id:3; + uint32_t:1; + uint32_t vm_id:4; + } bitfields, bits; + uint32_t u32All; + signed int i32All; + float f32All; +}; + +union SQ_IND_DATA_BITS { + struct { + uint32_t data:32; + } bitfields, bits; + uint32_t u32All; + signed int i32All; + float f32All; +}; + +union GRBM_GFX_INDEX_BITS { + struct { + uint32_t instance_index:8; + uint32_t sh_index:8; + uint32_t se_index:8; + uint32_t:5; + uint32_t sh_broadcast_writes:1; + uint32_t instance_broadcast_writes:1; + uint32_t se_broadcast_writes:1; + } bitfields, bits; + uint32_t u32All; + signed int i32All; + float f32All; +}; + +union TCP_WATCH_ADDR_H_BITS { + struct { + uint32_t addr:16; + uint32_t:16; + + } bitfields, bits; + uint32_t u32All; + signed int i32All; + float f32All; +}; + +union TCP_WATCH_ADDR_L_BITS { + struct { + uint32_t:6; + uint32_t addr:26; + } bitfields, bits; + uint32_t u32All; + signed int i32All; + float f32All; +}; + +enum { + QUEUESTATE__INVALID = 0, /* so by default we'll get invalid state */ + QUEUESTATE__ACTIVE_COMPLETION_PENDING, + QUEUESTATE__ACTIVE +}; + +union ULARGE_INTEGER { + struct { + uint32_t low_part; + uint32_t high_part; + } u; + unsigned long long quad_part; +}; + + +#define KFD_CIK_VMID_START_OFFSET (8) +#define KFD_CIK_VMID_END_OFFSET (KFD_CIK_VMID_START_OFFSET + (8)) + + +void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, + enum DBGDEV_TYPE type); + +#endif /* KFD_DBGDEV_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c new file mode 100644 index 000000000000..56d676396342 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c @@ -0,0 +1,168 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/log2.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/device.h> + +#include "kfd_priv.h" +#include "cik_regs.h" +#include "kfd_pm4_headers.h" +#include "kfd_pm4_headers_diq.h" +#include "kfd_dbgmgr.h" +#include "kfd_dbgdev.h" + +static DEFINE_MUTEX(kfd_dbgmgr_mutex); + +struct mutex *kfd_get_dbgmgr_mutex(void) +{ + return &kfd_dbgmgr_mutex; +} + + +static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr) +{ + BUG_ON(!pmgr); + + kfree(pmgr->dbgdev); + + pmgr->dbgdev = NULL; + pmgr->pasid = 0; + pmgr->dev = NULL; +} + +void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr) +{ + if (pmgr != NULL) { + kfd_dbgmgr_uninitialize(pmgr); + kfree(pmgr); + } +} + +bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) +{ + enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ; + struct kfd_dbgmgr *new_buff; + + BUG_ON(pdev == NULL); + BUG_ON(!pdev->init_complete); + + new_buff = kfd_alloc_struct(new_buff); + if (!new_buff) { + pr_err("amdkfd: Failed to allocate dbgmgr instance\n"); + return false; + } + + new_buff->pasid = 0; + new_buff->dev = pdev; + new_buff->dbgdev = kfd_alloc_struct(new_buff->dbgdev); + if (!new_buff->dbgdev) { + pr_err("amdkfd: Failed to allocate dbgdev instance\n"); + kfree(new_buff); + return false; + } + + /* get actual type of DBGDevice cpsch or not */ + if (sched_policy == KFD_SCHED_POLICY_NO_HWS) + type = DBGDEV_TYPE_NODIQ; + + kfd_dbgdev_init(new_buff->dbgdev, pdev, type); + *ppmgr = new_buff; + + return true; +} + +long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p) +{ + BUG_ON(!p || !pmgr || !pmgr->dbgdev); + + if (pmgr->pasid != 0) { + pr_debug("H/W debugger is already active using pasid %d\n", + pmgr->pasid); + return -EBUSY; + } + + /* remember pasid */ + pmgr->pasid = p->pasid; + + /* provide the pqm for diq generation */ + pmgr->dbgdev->pqm = &p->pqm; + + /* activate the actual registering */ + pmgr->dbgdev->dbgdev_register(pmgr->dbgdev); + + return 0; +} + +long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p) +{ + BUG_ON(!p || !pmgr || !pmgr->dbgdev); + + /* Is the requests coming from the already registered process? */ + if (pmgr->pasid != p->pasid) { + pr_debug("H/W debugger is not registered by calling pasid %d\n", + p->pasid); + return -EINVAL; + } + + pmgr->dbgdev->dbgdev_unregister(pmgr->dbgdev); + + pmgr->pasid = 0; + + return 0; +} + +long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, + struct dbg_wave_control_info *wac_info) +{ + BUG_ON(!pmgr || !pmgr->dbgdev || !wac_info); + + /* Is the requests coming from the already registered process? */ + if (pmgr->pasid != wac_info->process->pasid) { + pr_debug("H/W debugger support was not registered for requester pasid %d\n", + wac_info->process->pasid); + return -EINVAL; + } + + return (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, wac_info); +} + +long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, + struct dbg_address_watch_info *adw_info) +{ + BUG_ON(!pmgr || !pmgr->dbgdev || !adw_info); + + + /* Is the requests coming from the already registered process? */ + if (pmgr->pasid != adw_info->process->pasid) { + pr_debug("H/W debugger support was not registered for requester pasid %d\n", + adw_info->process->pasid); + return -EINVAL; + } + + return (long) pmgr->dbgdev->dbgdev_address_watch(pmgr->dbgdev, + adw_info); +} + diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h new file mode 100644 index 000000000000..257a745ad0b5 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h @@ -0,0 +1,294 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef KFD_DBGMGR_H_ +#define KFD_DBGMGR_H_ + +#include "kfd_priv.h" + +/* must align with hsakmttypes definition */ +#pragma pack(push, 4) + +enum HSA_DBG_WAVEOP { + HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */ + HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */ + HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */ + HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter + debug mode */ + HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take + a trap */ + HSA_DBG_NUM_WAVEOP = 5, + HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF +}; + +enum HSA_DBG_WAVEMODE { + /* send command to a single wave */ + HSA_DBG_WAVEMODE_SINGLE = 0, + /* + * Broadcast to all wavefronts of all processes is not + * supported for HSA user mode + */ + + /* send to waves within current process */ + HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2, + /* send to waves within current process on CU */ + HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3, + HSA_DBG_NUM_WAVEMODE = 3, + HSA_DBG_MAX_WAVEMODE = 0xFFFFFFFF +}; + +enum HSA_DBG_WAVEMSG_TYPE { + HSA_DBG_WAVEMSG_AUTO = 0, + HSA_DBG_WAVEMSG_USER = 1, + HSA_DBG_WAVEMSG_ERROR = 2, + HSA_DBG_NUM_WAVEMSG, + HSA_DBG_MAX_WAVEMSG = 0xFFFFFFFF +}; + +enum HSA_DBG_WATCH_MODE { + HSA_DBG_WATCH_READ = 0, /* Read operations only */ + HSA_DBG_WATCH_NONREAD = 1, /* Write or Atomic operations only */ + HSA_DBG_WATCH_ATOMIC = 2, /* Atomic Operations only */ + HSA_DBG_WATCH_ALL = 3, /* Read, Write or Atomic operations */ + HSA_DBG_WATCH_NUM, + HSA_DBG_WATCH_SIZE = 0xFFFFFFFF +}; + +/* This structure is hardware specific and may change in the future */ +struct HsaDbgWaveMsgAMDGen2 { + union { + struct ui32 { + uint32_t UserData:8; /* user data */ + uint32_t ShaderArray:1; /* Shader array */ + uint32_t Priv:1; /* Privileged */ + uint32_t Reserved0:4; /* This field is reserved, + should be 0 */ + uint32_t WaveId:4; /* wave id */ + uint32_t SIMD:2; /* SIMD id */ + uint32_t HSACU:4; /* Compute unit */ + uint32_t ShaderEngine:2;/* Shader engine */ + uint32_t MessageType:2; /* see HSA_DBG_WAVEMSG_TYPE */ + uint32_t Reserved1:4; /* This field is reserved, + should be 0 */ + } ui32; + uint32_t Value; + }; + uint32_t Reserved2; +}; + +union HsaDbgWaveMessageAMD { + struct HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2; + /* for future HsaDbgWaveMsgAMDGen3; */ +}; + +struct HsaDbgWaveMessage { + void *MemoryVA; /* ptr to associated host-accessible data */ + union HsaDbgWaveMessageAMD DbgWaveMsg; +}; + +/* + * TODO: This definitions to be MOVED to kfd_event, once it is implemented. + * + * HSA sync primitive, Event and HW Exception notification API definitions. + * The API functions allow the runtime to define a so-called sync-primitive, + * a SW object combining a user-mode provided "syncvar" and a scheduler event + * that can be signaled through a defined GPU interrupt. A syncvar is + * a process virtual memory location of a certain size that can be accessed + * by CPU and GPU shader code within the process to set and query the content + * within that memory. The definition of the content is determined by the HSA + * runtime and potentially GPU shader code interfacing with the HSA runtime. + * The syncvar values may be commonly written through an PM4 WRITE_DATA packet + * in the user mode instruction stream. The OS scheduler event is typically + * associated and signaled by an interrupt issued by the GPU, but other HSA + * system interrupt conditions from other HW (e.g. IOMMUv2) may be surfaced + * by the KFD by this mechanism, too. */ + +/* these are the new definitions for events */ +enum HSA_EVENTTYPE { + HSA_EVENTTYPE_SIGNAL = 0, /* user-mode generated GPU signal */ + HSA_EVENTTYPE_NODECHANGE = 1, /* HSA node change (attach/detach) */ + HSA_EVENTTYPE_DEVICESTATECHANGE = 2, /* HSA device state change + (start/stop) */ + HSA_EVENTTYPE_HW_EXCEPTION = 3, /* GPU shader exception event */ + HSA_EVENTTYPE_SYSTEM_EVENT = 4, /* GPU SYSCALL with parameter info */ + HSA_EVENTTYPE_DEBUG_EVENT = 5, /* GPU signal for debugging */ + HSA_EVENTTYPE_PROFILE_EVENT = 6,/* GPU signal for profiling */ + HSA_EVENTTYPE_QUEUE_EVENT = 7, /* GPU signal queue idle state + (EOP pm4) */ + /* ... */ + HSA_EVENTTYPE_MAXID, + HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF +}; + +/* Sub-definitions for various event types: Syncvar */ +struct HsaSyncVar { + union SyncVar { + void *UserData; /* pointer to user mode data */ + uint64_t UserDataPtrValue; /* 64bit compatibility of value */ + } SyncVar; + uint64_t SyncVarSize; +}; + +/* Sub-definitions for various event types: NodeChange */ + +enum HSA_EVENTTYPE_NODECHANGE_FLAGS { + HSA_EVENTTYPE_NODECHANGE_ADD = 0, + HSA_EVENTTYPE_NODECHANGE_REMOVE = 1, + HSA_EVENTTYPE_NODECHANGE_SIZE = 0xFFFFFFFF +}; + +struct HsaNodeChange { + /* HSA node added/removed on the platform */ + enum HSA_EVENTTYPE_NODECHANGE_FLAGS Flags; +}; + +/* Sub-definitions for various event types: DeviceStateChange */ +enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS { + /* device started (and available) */ + HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0, + /* device stopped (i.e. unavailable) */ + HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1, + HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE = 0xFFFFFFFF +}; + +enum HSA_DEVICE { + HSA_DEVICE_CPU = 0, + HSA_DEVICE_GPU = 1, + MAX_HSA_DEVICE = 2 +}; + +struct HsaDeviceStateChange { + uint32_t NodeId; /* F-NUMA node that contains the device */ + enum HSA_DEVICE Device; /* device type: GPU or CPU */ + enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; /* event flags */ +}; + +struct HsaEventData { + enum HSA_EVENTTYPE EventType; /* event type */ + union EventData { + /* + * return data associated with HSA_EVENTTYPE_SIGNAL + * and other events + */ + struct HsaSyncVar SyncVar; + + /* data associated with HSA_EVENTTYPE_NODE_CHANGE */ + struct HsaNodeChange NodeChangeState; + + /* data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE */ + struct HsaDeviceStateChange DeviceState; + } EventData; + + /* the following data entries are internal to the KFD & thunk itself */ + + /* internal thunk store for Event data (OsEventHandle) */ + uint64_t HWData1; + /* internal thunk store for Event data (HWAddress) */ + uint64_t HWData2; + /* internal thunk store for Event data (HWData) */ + uint32_t HWData3; +}; + +struct HsaEventDescriptor { + /* event type to allocate */ + enum HSA_EVENTTYPE EventType; + /* H-NUMA node containing GPU device that is event source */ + uint32_t NodeId; + /* pointer to user mode syncvar data, syncvar->UserDataPtrValue + * may be NULL + */ + struct HsaSyncVar SyncVar; +}; + +struct HsaEvent { + uint32_t EventId; + struct HsaEventData EventData; +}; + +#pragma pack(pop) + +enum DBGDEV_TYPE { + DBGDEV_TYPE_ILLEGAL = 0, + DBGDEV_TYPE_NODIQ = 1, + DBGDEV_TYPE_DIQ = 2, + DBGDEV_TYPE_TEST = 3 +}; + +struct dbg_address_watch_info { + struct kfd_process *process; + enum HSA_DBG_WATCH_MODE *watch_mode; + uint64_t *watch_address; + uint64_t *watch_mask; + struct HsaEvent *watch_event; + uint32_t num_watch_points; +}; + +struct dbg_wave_control_info { + struct kfd_process *process; + uint32_t trapId; + enum HSA_DBG_WAVEOP operand; + enum HSA_DBG_WAVEMODE mode; + struct HsaDbgWaveMessage dbgWave_msg; +}; + +struct kfd_dbgdev { + + /* The device that owns this data. */ + struct kfd_dev *dev; + + /* kernel queue for DIQ */ + struct kernel_queue *kq; + + /* a pointer to the pqm of the calling process */ + struct process_queue_manager *pqm; + + /* type of debug device ( DIQ, non DIQ, etc. ) */ + enum DBGDEV_TYPE type; + + /* virtualized function pointers to device dbg */ + int (*dbgdev_register)(struct kfd_dbgdev *dbgdev); + int (*dbgdev_unregister)(struct kfd_dbgdev *dbgdev); + int (*dbgdev_address_watch)(struct kfd_dbgdev *dbgdev, + struct dbg_address_watch_info *adw_info); + int (*dbgdev_wave_control)(struct kfd_dbgdev *dbgdev, + struct dbg_wave_control_info *wac_info); + +}; + +struct kfd_dbgmgr { + unsigned int pasid; + struct kfd_dev *dev; + struct kfd_dbgdev *dbgdev; +}; + +/* prototypes for debug manager functions */ +struct mutex *kfd_get_dbgmgr_mutex(void); +void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr); +bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev); +long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p); +long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p); +long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, + struct dbg_wave_control_info *wac_info); +long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, + struct dbg_address_watch_info *adw_info); +#endif /* KFD_DBGMGR_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 52cab0f53ebc..1d1e2e952a79 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -33,8 +33,11 @@ static const struct kfd_device_info kaveri_device_info = { .asic_family = CHIP_KAVERI, .max_pasid_bits = 16, + /* max num of queues for KV.TODO should be a dynamic value */ + .max_no_of_hqd = 24, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED }; @@ -294,6 +297,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto dqm_start_error; } + kfd->dbgmgr = NULL; + kfd->init_complete = true; dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor, kfd->pdev->device); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 4e215bd4d41f..547b0a589693 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -45,7 +45,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd); static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock); -static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock); +static int destroy_queues_cpsch(struct device_queue_manager *dqm, + bool preempt_static_queues, bool lock); static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, @@ -775,7 +776,7 @@ static int stop_cpsch(struct device_queue_manager *dqm) BUG_ON(!dqm); - destroy_queues_cpsch(dqm, true); + destroy_queues_cpsch(dqm, true, true); list_for_each_entry(node, &dqm->queues, list) { pdd = qpd_to_pdd(node->qpd); @@ -829,7 +830,8 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, pr_debug("kfd: In %s\n", __func__); mutex_lock(&dqm->lock); - destroy_queues_cpsch(dqm, false); + /* here we actually preempt the DIQ */ + destroy_queues_cpsch(dqm, true, false); list_del(&kq->list); dqm->queue_count--; qpd->is_debug = false; @@ -913,7 +915,7 @@ out: return retval; } -static int amdkfd_fence_wait_timeout(unsigned int *fence_addr, +int amdkfd_fence_wait_timeout(unsigned int *fence_addr, unsigned int fence_value, unsigned long timeout) { @@ -935,13 +937,16 @@ static int destroy_sdma_queues(struct device_queue_manager *dqm, unsigned int sdma_engine) { return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, - KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, + KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, 0, false, sdma_engine); } -static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock) +static int destroy_queues_cpsch(struct device_queue_manager *dqm, + bool preempt_static_queues, bool lock) { int retval; + enum kfd_preempt_type_filter preempt_type; + struct kfd_process *p; BUG_ON(!dqm); @@ -960,8 +965,12 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock) destroy_sdma_queues(dqm, 1); } + preempt_type = preempt_static_queues ? + KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES : + KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES; + retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, - KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0); + preempt_type, 0, false, 0); if (retval != 0) goto out; @@ -969,8 +978,13 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock) pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr, KFD_FENCE_COMPLETED); /* should be timed out */ - amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, + retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); + if (retval != 0) { + p = kfd_get_process(current); + p->reset_wavefronts = true; + goto out; + } pm_release_ib(&dqm->packets); dqm->active_runlist = false; @@ -989,7 +1003,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock) if (lock) mutex_lock(&dqm->lock); - retval = destroy_queues_cpsch(dqm, false); + retval = destroy_queues_cpsch(dqm, false, false); if (retval != 0) { pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption"); goto out; @@ -1024,13 +1038,27 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, { int retval; struct mqd_manager *mqd; + bool preempt_all_queues; BUG_ON(!dqm || !qpd || !q); + preempt_all_queues = false; + retval = 0; /* remove queue from list to prevent rescheduling after preemption */ mutex_lock(&dqm->lock); + + if (qpd->is_debug) { + /* + * error, currently we do not allow to destroy a queue + * of a currently debugged process + */ + retval = -EBUSY; + goto failed_try_destroy_debugged_queue; + + } + mqd = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); if (!mqd) { @@ -1062,6 +1090,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, return 0; failed: +failed_try_destroy_debugged_queue: + mutex_unlock(&dqm->lock); return retval; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 57278e2d72e0..ec4036a09f3e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -88,9 +88,11 @@ struct device_queue_manager_ops { struct queue *q, struct qcm_process_device *qpd, int *allocate_vmid); + int (*destroy_queue)(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q); + int (*update_queue)(struct device_queue_manager *dqm, struct queue *q); @@ -100,8 +102,10 @@ struct device_queue_manager_ops { int (*register_process)(struct device_queue_manager *dqm, struct qcm_process_device *qpd); + int (*unregister_process)(struct device_queue_manager *dqm, struct qcm_process_device *qpd); + int (*initialize)(struct device_queue_manager *dqm); int (*start)(struct device_queue_manager *dqm); int (*stop)(struct device_queue_manager *dqm); @@ -109,9 +113,11 @@ struct device_queue_manager_ops { int (*create_kernel_queue)(struct device_queue_manager *dqm, struct kernel_queue *kq, struct qcm_process_device *qpd); + void (*destroy_kernel_queue)(struct device_queue_manager *dqm, struct kernel_queue *kq, struct qcm_process_device *qpd); + bool (*set_cache_memory_policy)(struct device_queue_manager *dqm, struct qcm_process_device *qpd, enum cache_policy default_policy, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index e2533d875f43..99b6d28a11c3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -163,7 +163,7 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer, num_queues = 0; list_for_each_entry(cur, &qpd->queues_list, list) num_queues++; - packet->bitfields10.num_queues = num_queues; + packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : num_queues; packet->sh_mem_config = qpd->sh_mem_config; packet->sh_mem_bases = qpd->sh_mem_bases; @@ -177,9 +177,10 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer, } static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, - struct queue *q) + struct queue *q, bool is_static) { struct pm4_map_queues *packet; + bool use_static = is_static; BUG_ON(!pm || !buffer || !q); @@ -209,6 +210,7 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, case KFD_QUEUE_TYPE_SDMA: packet->bitfields2.engine_sel = engine_sel__mes_map_queues__sdma0; + use_static = false; /* no static queues under SDMA */ break; default: BUG(); @@ -218,6 +220,9 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, packet->mes_map_queues_ordinals[0].bitfields3.doorbell_offset = q->properties.doorbell_off; + packet->mes_map_queues_ordinals[0].bitfields3.is_static = + (use_static == true) ? 1 : 0; + packet->mes_map_queues_ordinals[0].mqd_addr_lo = lower_32_bits(q->gart_mqd_addr); @@ -271,9 +276,11 @@ static int pm_create_runlist_ib(struct packet_manager *pm, pm_release_ib(pm); return -ENOMEM; } + retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd); if (retval != 0) return retval; + proccesses_mapped++; inc_wptr(&rl_wptr, sizeof(struct pm4_map_process), alloc_size_bytes); @@ -281,23 +288,36 @@ static int pm_create_runlist_ib(struct packet_manager *pm, list_for_each_entry(kq, &qpd->priv_queue_list, list) { if (kq->queue->properties.is_active != true) continue; + + pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n", + kq->queue->queue, qpd->is_debug); + retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr], - kq->queue); + kq->queue, qpd->is_debug); if (retval != 0) return retval; - inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues), - alloc_size_bytes); + + inc_wptr(&rl_wptr, + sizeof(struct pm4_map_queues), + alloc_size_bytes); } list_for_each_entry(q, &qpd->queues_list, list) { if (q->properties.is_active != true) continue; - retval = pm_create_map_queue(pm, - &rl_buffer[rl_wptr], q); + + pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n", + q->queue, qpd->is_debug); + + retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr], + q, qpd->is_debug); + if (retval != 0) return retval; - inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues), - alloc_size_bytes); + + inc_wptr(&rl_wptr, + sizeof(struct pm4_map_queues), + alloc_size_bytes); } } @@ -488,7 +508,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, packet = (struct pm4_unmap_queues *)buffer; memset(buffer, 0, sizeof(struct pm4_unmap_queues)); - + pr_debug("kfd: static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n", + mode, reset, type); packet->header.u32all = build_pm4_header(IT_UNMAP_QUEUES, sizeof(struct pm4_unmap_queues)); switch (type) { @@ -529,6 +550,11 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, packet->bitfields2.queue_sel = queue_sel__mes_unmap_queues__perform_request_on_all_active_queues; break; + case KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES: + /* in this case, we do not preempt static queues */ + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only; + break; default: BUG(); break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h index 071ad5724bd2..5b393f3e34a9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h @@ -237,7 +237,8 @@ struct pm4_map_queues { struct { union { struct { - uint32_t reserved5:2; + uint32_t is_static:1; + uint32_t reserved5:1; uint32_t doorbell_offset:21; uint32_t reserved6:3; uint32_t queue:6; @@ -328,7 +329,8 @@ enum unmap_queues_action_enum { enum unmap_queues_queue_sel_enum { queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0, queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1, - queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2 + queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2, + queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only = 3 }; enum unmap_queues_engine_sel_enum { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h new file mode 100644 index 000000000000..a0ff34878163 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h @@ -0,0 +1,290 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef KFD_PM4_HEADERS_DIQ_H_ +#define KFD_PM4_HEADERS_DIQ_H_ + +/*--------------------_INDIRECT_BUFFER-------------------- */ + +#ifndef _PM4__INDIRECT_BUFFER_DEFINED +#define _PM4__INDIRECT_BUFFER_DEFINED +enum _INDIRECT_BUFFER_cache_policy_enum { + cache_policy___indirect_buffer__lru = 0, + cache_policy___indirect_buffer__stream = 1, + cache_policy___indirect_buffer__bypass = 2 +}; + +enum { + IT_INDIRECT_BUFFER_PASID = 0x5C +}; + +struct pm4__indirect_buffer_pasid { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + unsigned int ordinal1; + }; + + union { + struct { + unsigned int reserved1:2; + unsigned int ib_base_lo:30; + } bitfields2; + unsigned int ordinal2; + }; + + union { + struct { + unsigned int ib_base_hi:16; + unsigned int reserved2:16; + } bitfields3; + unsigned int ordinal3; + }; + + union { + unsigned int control; + unsigned int ordinal4; + }; + + union { + struct { + unsigned int pasid:10; + unsigned int reserved4:22; + } bitfields5; + unsigned int ordinal5; + }; + +}; + +#endif + +/*--------------------_RELEASE_MEM-------------------- */ + +#ifndef _PM4__RELEASE_MEM_DEFINED +#define _PM4__RELEASE_MEM_DEFINED +enum _RELEASE_MEM_event_index_enum { + event_index___release_mem__end_of_pipe = 5, + event_index___release_mem__shader_done = 6 +}; + +enum _RELEASE_MEM_cache_policy_enum { + cache_policy___release_mem__lru = 0, + cache_policy___release_mem__stream = 1, + cache_policy___release_mem__bypass = 2 +}; + +enum _RELEASE_MEM_dst_sel_enum { + dst_sel___release_mem__memory_controller = 0, + dst_sel___release_mem__tc_l2 = 1, + dst_sel___release_mem__queue_write_pointer_register = 2, + dst_sel___release_mem__queue_write_pointer_poll_mask_bit = 3 +}; + +enum _RELEASE_MEM_int_sel_enum { + int_sel___release_mem__none = 0, + int_sel___release_mem__send_interrupt_only = 1, + int_sel___release_mem__send_interrupt_after_write_confirm = 2, + int_sel___release_mem__send_data_after_write_confirm = 3 +}; + +enum _RELEASE_MEM_data_sel_enum { + data_sel___release_mem__none = 0, + data_sel___release_mem__send_32_bit_low = 1, + data_sel___release_mem__send_64_bit_data = 2, + data_sel___release_mem__send_gpu_clock_counter = 3, + data_sel___release_mem__send_cp_perfcounter_hi_lo = 4, + data_sel___release_mem__store_gds_data_to_memory = 5 +}; + +struct pm4__release_mem { + union { + union PM4_MES_TYPE_3_HEADER header; /*header */ + unsigned int ordinal1; + }; + + union { + struct { + unsigned int event_type:6; + unsigned int reserved1:2; + enum _RELEASE_MEM_event_index_enum event_index:4; + unsigned int tcl1_vol_action_ena:1; + unsigned int tc_vol_action_ena:1; + unsigned int reserved2:1; + unsigned int tc_wb_action_ena:1; + unsigned int tcl1_action_ena:1; + unsigned int tc_action_ena:1; + unsigned int reserved3:6; + unsigned int atc:1; + enum _RELEASE_MEM_cache_policy_enum cache_policy:2; + unsigned int reserved4:5; + } bitfields2; + unsigned int ordinal2; + }; + + union { + struct { + unsigned int reserved5:16; + enum _RELEASE_MEM_dst_sel_enum dst_sel:2; + unsigned int reserved6:6; + enum _RELEASE_MEM_int_sel_enum int_sel:3; + unsigned int reserved7:2; + enum _RELEASE_MEM_data_sel_enum data_sel:3; + } bitfields3; + unsigned int ordinal3; + }; + + union { + struct { + unsigned int reserved8:2; + unsigned int address_lo_32b:30; + } bitfields4; + struct { + unsigned int reserved9:3; + unsigned int address_lo_64b:29; + } bitfields5; + unsigned int ordinal4; + }; + + unsigned int address_hi; + + unsigned int data_lo; + + unsigned int data_hi; + +}; +#endif + + +/*--------------------_SET_CONFIG_REG-------------------- */ + +#ifndef _PM4__SET_CONFIG_REG_DEFINED +#define _PM4__SET_CONFIG_REG_DEFINED + +struct pm4__set_config_reg { + union { + union PM4_MES_TYPE_3_HEADER header; /*header */ + unsigned int ordinal1; + }; + + union { + struct { + unsigned int reg_offset:16; + unsigned int reserved1:7; + unsigned int vmid_shift:5; + unsigned int insert_vmid:1; + unsigned int reserved2:3; + } bitfields2; + unsigned int ordinal2; + }; + + unsigned int reg_data[1]; /*1..N of these fields */ + +}; +#endif + +/*--------------------_WAIT_REG_MEM-------------------- */ + +#ifndef _PM4__WAIT_REG_MEM_DEFINED +#define _PM4__WAIT_REG_MEM_DEFINED +enum _WAIT_REG_MEM_function_enum { + function___wait_reg_mem__always_pass = 0, + function___wait_reg_mem__less_than_ref_value = 1, + function___wait_reg_mem__less_than_equal_to_the_ref_value = 2, + function___wait_reg_mem__equal_to_the_reference_value = 3, + function___wait_reg_mem__not_equal_reference_value = 4, + function___wait_reg_mem__greater_than_or_equal_reference_value = 5, + function___wait_reg_mem__greater_than_reference_value = 6, + function___wait_reg_mem__reserved = 7 +}; + +enum _WAIT_REG_MEM_mem_space_enum { + mem_space___wait_reg_mem__register_space = 0, + mem_space___wait_reg_mem__memory_space = 1 +}; + +enum _WAIT_REG_MEM_operation_enum { + operation___wait_reg_mem__wait_reg_mem = 0, + operation___wait_reg_mem__wr_wait_wr_reg = 1 +}; + +struct pm4__wait_reg_mem { + union { + union PM4_MES_TYPE_3_HEADER header; /*header */ + unsigned int ordinal1; + }; + + union { + struct { + enum _WAIT_REG_MEM_function_enum function:3; + unsigned int reserved1:1; + enum _WAIT_REG_MEM_mem_space_enum mem_space:2; + enum _WAIT_REG_MEM_operation_enum operation:2; + unsigned int reserved2:24; + } bitfields2; + unsigned int ordinal2; + }; + + union { + struct { + unsigned int reserved3:2; + unsigned int memory_poll_addr_lo:30; + } bitfields3; + struct { + unsigned int register_poll_addr:16; + unsigned int reserved4:16; + } bitfields4; + struct { + unsigned int register_write_addr:16; + unsigned int reserved5:16; + } bitfields5; + unsigned int ordinal3; + }; + + union { + struct { + unsigned int poll_address_hi:16; + unsigned int reserved6:16; + } bitfields6; + struct { + unsigned int register_write_addr:16; + unsigned int reserved7:16; + } bitfields7; + unsigned int ordinal4; + }; + + unsigned int reference; + + unsigned int mask; + + union { + struct { + unsigned int poll_interval:16; + unsigned int reserved8:16; + } bitfields8; + unsigned int ordinal7; + }; + +}; +#endif + + +#endif /* KFD_PM4_HEADERS_DIQ_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index b6f838f56589..cb79046e5c80 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -128,6 +128,7 @@ struct kfd_device_info { unsigned int asic_family; const struct kfd_event_interrupt_class *event_interrupt_class; unsigned int max_pasid_bits; + unsigned int max_no_of_hqd; size_t ih_ring_entry_size; uint8_t num_of_watch_points; uint16_t mqd_size_aligned; @@ -167,8 +168,8 @@ struct kfd_dev { const struct kfd2kgd_calls *kfd2kgd; struct mutex doorbell_mutex; - unsigned long doorbell_available_index[DIV_ROUND_UP( - KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)]; + DECLARE_BITMAP(doorbell_available_index, + KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); void *gtt_mem; uint64_t gtt_start_gpu_addr; @@ -195,6 +196,9 @@ struct kfd_dev { * from the HW ring into a SW ring. */ bool interrupts_active; + + /* Debug manager */ + struct kfd_dbgmgr *dbgmgr; }; /* KGD2KFD callbacks */ @@ -231,6 +235,7 @@ struct device *kfd_chardev(void); enum kfd_preempt_type_filter { KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE, KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, + KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, KFD_PREEMPT_TYPE_FILTER_BY_PASID }; @@ -503,8 +508,6 @@ struct kfd_process { /* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */ struct kfd_queue **queues; - unsigned long allocated_queue_bitmap[DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)]; - /*Is the user space process 32 bit?*/ bool is_32bit_user_mode; @@ -516,6 +519,11 @@ struct kfd_process { event_pages */ u32 next_nonsignal_event_id; size_t signal_event_count; + /* + * This flag tells if we should reset all wavefronts on + * process termination + */ + bool reset_wavefronts; }; /** @@ -650,6 +658,12 @@ int pqm_create_queue(struct process_queue_manager *pqm, int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid); int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, struct queue_properties *p); +struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, + unsigned int qid); + +int amdkfd_fence_wait_timeout(unsigned int *fence_addr, + unsigned int fence_value, + unsigned long timeout); /* Packet Manager */ @@ -717,4 +731,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, uint64_t *event_page_offset, uint32_t *event_slot_index); int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); +int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); + #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index dc910af2bb3c..56b904f5bdb1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -31,6 +31,7 @@ struct mm_struct; #include "kfd_priv.h" +#include "kfd_dbgmgr.h" /* * Initial size for the array of queues. @@ -172,6 +173,9 @@ static void kfd_process_wq_release(struct work_struct *work) pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n", pdd->dev->id, p->pasid); + if (p->reset_wavefronts) + dbgdev_wave_reset_wavefronts(pdd->dev, p); + amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); list_del(&pdd->per_device_list); @@ -301,6 +305,8 @@ static struct kfd_process *create_process(const struct task_struct *thread) if (kfd_init_apertures(process) != 0) goto err_init_apretures; + process->reset_wavefronts = false; + return process; err_init_apretures: @@ -399,7 +405,12 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) mutex_lock(&p->mutex); + if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid)) + kfd_dbgmgr_destroy(dev->dbgmgr); + pqm_uninit(&p->pqm); + if (p->reset_wavefronts) + dbgdev_wave_reset_wavefronts(dev, p); pdd = kfd_get_process_device_data(dev, p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 530b82c4e78b..7b69070f7ecc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -158,6 +158,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, struct queue *q; struct process_queue_node *pqn; struct kernel_queue *kq; + int num_queues = 0; + struct queue *cur; BUG_ON(!pqm || !dev || !properties || !qid); @@ -172,6 +174,20 @@ int pqm_create_queue(struct process_queue_manager *pqm, return -1; } + /* + * for debug process, verify that it is within the static queues limit + * currently limit is set to half of the total avail HQD slots + * If we are just about to create DIQ, the is_debug flag is not set yet + * Hence we also check the type as well + */ + if ((pdd->qpd.is_debug) || + (type == KFD_QUEUE_TYPE_DIQ)) { + list_for_each_entry(cur, &pdd->qpd.queues_list, list) + num_queues++; + if (num_queues >= dev->device_info->max_no_of_hqd/2) + return (-ENOSPC); + } + retval = find_available_queue_slot(pqm, qid); if (retval != 0) return retval; @@ -341,7 +357,7 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, return 0; } -static __attribute__((unused)) struct kernel_queue *pqm_get_kernel_queue( +struct kernel_queue *pqm_get_kernel_queue( struct process_queue_manager *pqm, unsigned int qid) { |