summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c308
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c886
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h193
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c168
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h294
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c48
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c46
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h290
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h24
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c18
15 files changed, 2279 insertions, 27 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index 652d25478fd5..28551153ec6d 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -12,6 +12,7 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
kfd_kernel_queue_vi.o kfd_packet_manager.o \
kfd_process_queue_manager.o kfd_device_queue_manager.o \
kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
- kfd_interrupt.o kfd_events.o cik_event_interrupt.o
+ kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
+ kfd_dbgdev.o kfd_dbgmgr.o
obj-$(CONFIG_HSA_AMD) += amdkfd.o
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index b2c6109bd7af..96c904b3acb7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -35,6 +35,7 @@
#include <asm/processor.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
+#include "kfd_dbgmgr.h"
static long kfd_ioctl(struct file *, unsigned int, unsigned long);
static int kfd_open(struct inode *, struct file *);
@@ -432,6 +433,301 @@ out:
return err;
}
+static int kfd_ioctl_dbg_register(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_dbg_register_args *args = data;
+ struct kfd_dev *dev;
+ struct kfd_dbgmgr *dbgmgr_ptr;
+ struct kfd_process_device *pdd;
+ bool create_ok;
+ long status = 0;
+
+ dev = kfd_device_by_id(args->gpu_id);
+ if (dev == NULL)
+ return -EINVAL;
+
+ if (dev->device_info->asic_family == CHIP_CARRIZO) {
+ pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(kfd_get_dbgmgr_mutex());
+ mutex_lock(&p->mutex);
+
+ /*
+ * make sure that we have pdd, if this the first queue created for
+ * this process
+ */
+ pdd = kfd_bind_process_to_device(dev, p);
+ if (IS_ERR(pdd)) {
+ mutex_unlock(&p->mutex);
+ mutex_unlock(kfd_get_dbgmgr_mutex());
+ return PTR_ERR(pdd);
+ }
+
+ if (dev->dbgmgr == NULL) {
+ /* In case of a legal call, we have no dbgmgr yet */
+ create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
+ if (create_ok) {
+ status = kfd_dbgmgr_register(dbgmgr_ptr, p);
+ if (status != 0)
+ kfd_dbgmgr_destroy(dbgmgr_ptr);
+ else
+ dev->dbgmgr = dbgmgr_ptr;
+ }
+ } else {
+ pr_debug("debugger already registered\n");
+ status = -EINVAL;
+ }
+
+ mutex_unlock(&p->mutex);
+ mutex_unlock(kfd_get_dbgmgr_mutex());
+
+ return status;
+}
+
+static int kfd_ioctl_dbg_unrgesiter(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_dbg_unregister_args *args = data;
+ struct kfd_dev *dev;
+ long status;
+
+ dev = kfd_device_by_id(args->gpu_id);
+ if (dev == NULL)
+ return -EINVAL;
+
+ if (dev->device_info->asic_family == CHIP_CARRIZO) {
+ pr_debug("kfd_ioctl_dbg_unrgesiter not supported on CZ\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(kfd_get_dbgmgr_mutex());
+
+ status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
+ if (status == 0) {
+ kfd_dbgmgr_destroy(dev->dbgmgr);
+ dev->dbgmgr = NULL;
+ }
+
+ mutex_unlock(kfd_get_dbgmgr_mutex());
+
+ return status;
+}
+
+/*
+ * Parse and generate variable size data structure for address watch.
+ * Total size of the buffer and # watch points is limited in order
+ * to prevent kernel abuse. (no bearing to the much smaller HW limitation
+ * which is enforced by dbgdev module)
+ * please also note that the watch address itself are not "copied from user",
+ * since it be set into the HW in user mode values.
+ *
+ */
+static int kfd_ioctl_dbg_address_watch(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_dbg_address_watch_args *args = data;
+ struct kfd_dev *dev;
+ struct dbg_address_watch_info aw_info;
+ unsigned char *args_buff;
+ long status;
+ void __user *cmd_from_user;
+ uint64_t watch_mask_value = 0;
+ unsigned int args_idx = 0;
+
+ memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
+
+ dev = kfd_device_by_id(args->gpu_id);
+ if (dev == NULL)
+ return -EINVAL;
+
+ if (dev->device_info->asic_family == CHIP_CARRIZO) {
+ pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
+ return -EINVAL;
+ }
+
+ cmd_from_user = (void __user *) args->content_ptr;
+
+ /* Validate arguments */
+
+ if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
+ (args->buf_size_in_bytes <= sizeof(*args)) ||
+ (cmd_from_user == NULL))
+ return -EINVAL;
+
+ /* this is the actual buffer to work with */
+
+ args_buff = kmalloc(args->buf_size_in_bytes -
+ sizeof(*args), GFP_KERNEL);
+ if (args_buff == NULL)
+ return -ENOMEM;
+
+ status = copy_from_user(args_buff, cmd_from_user,
+ args->buf_size_in_bytes - sizeof(*args));
+
+ if (status != 0) {
+ pr_debug("Failed to copy address watch user data\n");
+ kfree(args_buff);
+ return -EINVAL;
+ }
+
+ aw_info.process = p;
+
+ aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
+ args_idx += sizeof(aw_info.num_watch_points);
+
+ aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
+ args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
+
+ /*
+ * set watch address base pointer to point on the array base
+ * within args_buff
+ */
+ aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
+
+ /* skip over the addresses buffer */
+ args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
+
+ if (args_idx >= args->buf_size_in_bytes) {
+ kfree(args_buff);
+ return -EINVAL;
+ }
+
+ watch_mask_value = (uint64_t) args_buff[args_idx];
+
+ if (watch_mask_value > 0) {
+ /*
+ * There is an array of masks.
+ * set watch mask base pointer to point on the array base
+ * within args_buff
+ */
+ aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
+
+ /* skip over the masks buffer */
+ args_idx += sizeof(aw_info.watch_mask) *
+ aw_info.num_watch_points;
+ } else {
+ /* just the NULL mask, set to NULL and skip over it */
+ aw_info.watch_mask = NULL;
+ args_idx += sizeof(aw_info.watch_mask);
+ }
+
+ if (args_idx > args->buf_size_in_bytes) {
+ kfree(args_buff);
+ return -EINVAL;
+ }
+
+ /* Currently HSA Event is not supported for DBG */
+ aw_info.watch_event = NULL;
+
+ mutex_lock(kfd_get_dbgmgr_mutex());
+
+ status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
+
+ mutex_unlock(kfd_get_dbgmgr_mutex());
+
+ kfree(args_buff);
+
+ return status;
+}
+
+/* Parse and generate fixed size data structure for wave control */
+static int kfd_ioctl_dbg_wave_control(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_dbg_wave_control_args *args = data;
+ struct kfd_dev *dev;
+ struct dbg_wave_control_info wac_info;
+ unsigned char *args_buff;
+ uint32_t computed_buff_size;
+ long status;
+ void __user *cmd_from_user;
+ unsigned int args_idx = 0;
+
+ memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
+
+ /* we use compact form, independent of the packing attribute value */
+ computed_buff_size = sizeof(*args) +
+ sizeof(wac_info.mode) +
+ sizeof(wac_info.operand) +
+ sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
+ sizeof(wac_info.dbgWave_msg.MemoryVA) +
+ sizeof(wac_info.trapId);
+
+ dev = kfd_device_by_id(args->gpu_id);
+ if (dev == NULL)
+ return -EINVAL;
+
+ if (dev->device_info->asic_family == CHIP_CARRIZO) {
+ pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
+ return -EINVAL;
+ }
+
+ /* input size must match the computed "compact" size */
+ if (args->buf_size_in_bytes != computed_buff_size) {
+ pr_debug("size mismatch, computed : actual %u : %u\n",
+ args->buf_size_in_bytes, computed_buff_size);
+ return -EINVAL;
+ }
+
+ cmd_from_user = (void __user *) args->content_ptr;
+
+ if (cmd_from_user == NULL)
+ return -EINVAL;
+
+ /* this is the actual buffer to work with */
+
+ args_buff = kmalloc(args->buf_size_in_bytes - sizeof(*args),
+ GFP_KERNEL);
+
+ if (args_buff == NULL)
+ return -ENOMEM;
+
+ /* Now copy the entire buffer from user */
+ status = copy_from_user(args_buff, cmd_from_user,
+ args->buf_size_in_bytes - sizeof(*args));
+ if (status != 0) {
+ pr_debug("Failed to copy wave control user data\n");
+ kfree(args_buff);
+ return -EINVAL;
+ }
+
+ /* move ptr to the start of the "pay-load" area */
+ wac_info.process = p;
+
+ wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
+ args_idx += sizeof(wac_info.operand);
+
+ wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
+ args_idx += sizeof(wac_info.mode);
+
+ wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
+ args_idx += sizeof(wac_info.trapId);
+
+ wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
+ *((uint32_t *)(&args_buff[args_idx]));
+ wac_info.dbgWave_msg.MemoryVA = NULL;
+
+ mutex_lock(kfd_get_dbgmgr_mutex());
+
+ pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
+ wac_info.process, wac_info.operand,
+ wac_info.mode, wac_info.trapId,
+ wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
+
+ status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
+
+ pr_debug("Returned status of dbg manager is %ld\n", status);
+
+ mutex_unlock(kfd_get_dbgmgr_mutex());
+
+ kfree(args_buff);
+
+ return status;
+}
+
static int kfd_ioctl_get_clock_counters(struct file *filep,
struct kfd_process *p, void *data)
{
@@ -612,6 +908,18 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
kfd_ioctl_wait_events, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
+ kfd_ioctl_dbg_register, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
+ kfd_ioctl_dbg_unrgesiter, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
+ kfd_ioctl_dbg_address_watch, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
+ kfd_ioctl_dbg_wave_control, 0),
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
new file mode 100644
index 000000000000..96153f28d73f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -0,0 +1,886 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+
+#include "kfd_pm4_headers.h"
+#include "kfd_pm4_headers_diq.h"
+#include "kfd_kernel_queue.h"
+#include "kfd_priv.h"
+#include "kfd_pm4_opcodes.h"
+#include "cik_regs.h"
+#include "kfd_dbgmgr.h"
+#include "kfd_dbgdev.h"
+#include "kfd_device_queue_manager.h"
+#include "../../radeon/cik_reg.h"
+
+static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
+{
+ BUG_ON(!dev || !dev->kfd2kgd);
+
+ dev->kfd2kgd->address_watch_disable(dev->kgd);
+}
+
+static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
+ unsigned int pasid, uint64_t vmid0_address,
+ uint32_t *packet_buff, size_t size_in_bytes)
+{
+ struct pm4__release_mem *rm_packet;
+ struct pm4__indirect_buffer_pasid *ib_packet;
+ struct kfd_mem_obj *mem_obj;
+ size_t pq_packets_size_in_bytes;
+ union ULARGE_INTEGER *largep;
+ union ULARGE_INTEGER addr;
+ struct kernel_queue *kq;
+ uint64_t *rm_state;
+ unsigned int *ib_packet_buff;
+ int status;
+
+ BUG_ON(!dbgdev || !dbgdev->kq || !packet_buff || !size_in_bytes);
+
+ kq = dbgdev->kq;
+
+ pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
+ sizeof(struct pm4__indirect_buffer_pasid);
+
+ /*
+ * We acquire a buffer from DIQ
+ * The receive packet buff will be sitting on the Indirect Buffer
+ * and in the PQ we put the IB packet + sync packet(s).
+ */
+ status = kq->ops.acquire_packet_buffer(kq,
+ pq_packets_size_in_bytes / sizeof(uint32_t),
+ &ib_packet_buff);
+ if (status != 0) {
+ pr_err("amdkfd: acquire_packet_buffer failed\n");
+ return status;
+ }
+
+ memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
+
+ ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
+
+ ib_packet->header.count = 3;
+ ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
+ ib_packet->header.type = PM4_TYPE_3;
+
+ largep = (union ULARGE_INTEGER *) &vmid0_address;
+
+ ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
+ ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
+
+ ib_packet->control = (1 << 23) | (1 << 31) |
+ ((size_in_bytes / sizeof(uint32_t)) & 0xfffff);
+
+ ib_packet->bitfields5.pasid = pasid;
+
+ /*
+ * for now we use release mem for GPU-CPU synchronization
+ * Consider WaitRegMem + WriteData as a better alternative
+ * we get a GART allocations ( gpu/cpu mapping),
+ * for the sync variable, and wait until:
+ * (a) Sync with HW
+ * (b) Sync var is written by CP to mem.
+ */
+ rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
+ (sizeof(struct pm4__indirect_buffer_pasid) /
+ sizeof(unsigned int)));
+
+ status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
+ &mem_obj);
+
+ if (status != 0) {
+ pr_err("amdkfd: Failed to allocate GART memory\n");
+ kq->ops.rollback_packet(kq);
+ return status;
+ }
+
+ rm_state = (uint64_t *) mem_obj->cpu_ptr;
+
+ *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
+
+ rm_packet->header.opcode = IT_RELEASE_MEM;
+ rm_packet->header.type = PM4_TYPE_3;
+ rm_packet->header.count = sizeof(struct pm4__release_mem) /
+ sizeof(unsigned int) - 2;
+
+ rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
+ rm_packet->bitfields2.event_index =
+ event_index___release_mem__end_of_pipe;
+
+ rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
+ rm_packet->bitfields2.atc = 0;
+ rm_packet->bitfields2.tc_wb_action_ena = 1;
+
+ addr.quad_part = mem_obj->gpu_addr;
+
+ rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
+ rm_packet->address_hi = addr.u.high_part;
+
+ rm_packet->bitfields3.data_sel =
+ data_sel___release_mem__send_64_bit_data;
+
+ rm_packet->bitfields3.int_sel =
+ int_sel___release_mem__send_data_after_write_confirm;
+
+ rm_packet->bitfields3.dst_sel =
+ dst_sel___release_mem__memory_controller;
+
+ rm_packet->data_lo = QUEUESTATE__ACTIVE;
+
+ kq->ops.submit_packet(kq);
+
+ /* Wait till CP writes sync code: */
+ status = amdkfd_fence_wait_timeout(
+ (unsigned int *) rm_state,
+ QUEUESTATE__ACTIVE, 1500);
+
+ kfd_gtt_sa_free(dbgdev->dev, mem_obj);
+
+ return status;
+}
+
+static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
+{
+ BUG_ON(!dbgdev);
+
+ /*
+ * no action is needed in this case,
+ * just make sure diq will not be used
+ */
+
+ dbgdev->kq = NULL;
+
+ return 0;
+}
+
+static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
+{
+ struct queue_properties properties;
+ unsigned int qid;
+ struct kernel_queue *kq = NULL;
+ int status;
+
+ BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->dev);
+
+ status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
+ &properties, 0, KFD_QUEUE_TYPE_DIQ,
+ &qid);
+
+ if (status) {
+ pr_err("amdkfd: Failed to create DIQ\n");
+ return status;
+ }
+
+ pr_debug("DIQ Created with queue id: %d\n", qid);
+
+ kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
+
+ if (kq == NULL) {
+ pr_err("amdkfd: Error getting DIQ\n");
+ pqm_destroy_queue(dbgdev->pqm, qid);
+ return -EFAULT;
+ }
+
+ dbgdev->kq = kq;
+
+ return status;
+}
+
+static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
+{
+ BUG_ON(!dbgdev || !dbgdev->dev);
+
+ /* disable watch address */
+ dbgdev_address_watch_disable_nodiq(dbgdev->dev);
+ return 0;
+}
+
+static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
+{
+ /* todo - disable address watch */
+ int status;
+
+ BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->kq);
+
+ status = pqm_destroy_queue(dbgdev->pqm,
+ dbgdev->kq->queue->properties.queue_id);
+ dbgdev->kq = NULL;
+
+ return status;
+}
+
+static void dbgdev_address_watch_set_registers(
+ const struct dbg_address_watch_info *adw_info,
+ union TCP_WATCH_ADDR_H_BITS *addrHi,
+ union TCP_WATCH_ADDR_L_BITS *addrLo,
+ union TCP_WATCH_CNTL_BITS *cntl,
+ unsigned int index, unsigned int vmid)
+{
+ union ULARGE_INTEGER addr;
+
+ BUG_ON(!adw_info || !addrHi || !addrLo || !cntl);
+
+ addr.quad_part = 0;
+ addrHi->u32All = 0;
+ addrLo->u32All = 0;
+ cntl->u32All = 0;
+
+ if (adw_info->watch_mask != NULL)
+ cntl->bitfields.mask =
+ (uint32_t) (adw_info->watch_mask[index] &
+ ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
+ else
+ cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
+
+ addr.quad_part = (unsigned long long) adw_info->watch_address[index];
+
+ addrHi->bitfields.addr = addr.u.high_part &
+ ADDRESS_WATCH_REG_ADDHIGH_MASK;
+ addrLo->bitfields.addr =
+ (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
+
+ cntl->bitfields.mode = adw_info->watch_mode[index];
+ cntl->bitfields.vmid = (uint32_t) vmid;
+ /* for now assume it is an ATC address */
+ cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
+
+ pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
+ pr_debug("\t\t%20s %08x\n", "set reg add high :",
+ addrHi->bitfields.addr);
+ pr_debug("\t\t%20s %08x\n", "set reg add low :",
+ addrLo->bitfields.addr);
+}
+
+static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
+ struct dbg_address_watch_info *adw_info)
+{
+ union TCP_WATCH_ADDR_H_BITS addrHi;
+ union TCP_WATCH_ADDR_L_BITS addrLo;
+ union TCP_WATCH_CNTL_BITS cntl;
+ struct kfd_process_device *pdd;
+ unsigned int i;
+
+ BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
+
+ /* taking the vmid for that process on the safe way using pdd */
+ pdd = kfd_get_process_device_data(dbgdev->dev,
+ adw_info->process);
+ if (!pdd) {
+ pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
+ return -EFAULT;
+ }
+
+ addrHi.u32All = 0;
+ addrLo.u32All = 0;
+ cntl.u32All = 0;
+
+ if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
+ (adw_info->num_watch_points == 0)) {
+ pr_err("amdkfd: num_watch_points is invalid\n");
+ return -EINVAL;
+ }
+
+ if ((adw_info->watch_mode == NULL) ||
+ (adw_info->watch_address == NULL)) {
+ pr_err("amdkfd: adw_info fields are not valid\n");
+ return -EINVAL;
+ }
+
+ for (i = 0 ; i < adw_info->num_watch_points ; i++) {
+ dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
+ &cntl, i, pdd->qpd.vmid);
+
+ pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
+ pr_debug("\t\t%20s %08x\n", "register index :", i);
+ pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
+ pr_debug("\t\t%20s %08x\n", "Address Low is :",
+ addrLo.bitfields.addr);
+ pr_debug("\t\t%20s %08x\n", "Address high is :",
+ addrHi.bitfields.addr);
+ pr_debug("\t\t%20s %08x\n", "Address high is :",
+ addrHi.bitfields.addr);
+ pr_debug("\t\t%20s %08x\n", "Control Mask is :",
+ cntl.bitfields.mask);
+ pr_debug("\t\t%20s %08x\n", "Control Mode is :",
+ cntl.bitfields.mode);
+ pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
+ cntl.bitfields.vmid);
+ pr_debug("\t\t%20s %08x\n", "Control atc is :",
+ cntl.bitfields.atc);
+ pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
+
+ pdd->dev->kfd2kgd->address_watch_execute(
+ dbgdev->dev->kgd,
+ i,
+ cntl.u32All,
+ addrHi.u32All,
+ addrLo.u32All);
+ }
+
+ return 0;
+}
+
+static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
+ struct dbg_address_watch_info *adw_info)
+{
+ struct pm4__set_config_reg *packets_vec;
+ union TCP_WATCH_ADDR_H_BITS addrHi;
+ union TCP_WATCH_ADDR_L_BITS addrLo;
+ union TCP_WATCH_CNTL_BITS cntl;
+ struct kfd_mem_obj *mem_obj;
+ unsigned int aw_reg_add_dword;
+ uint32_t *packet_buff_uint;
+ unsigned int i;
+ int status;
+ size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
+ /* we do not control the vmid in DIQ mode, just a place holder */
+ unsigned int vmid = 0;
+
+ BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
+
+ addrHi.u32All = 0;
+ addrLo.u32All = 0;
+ cntl.u32All = 0;
+
+ if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
+ (adw_info->num_watch_points == 0)) {
+ pr_err("amdkfd: num_watch_points is invalid\n");
+ return -EINVAL;
+ }
+
+ if ((NULL == adw_info->watch_mode) ||
+ (NULL == adw_info->watch_address)) {
+ pr_err("amdkfd: adw_info fields are not valid\n");
+ return -EINVAL;
+ }
+
+ status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
+
+ if (status != 0) {
+ pr_err("amdkfd: Failed to allocate GART memory\n");
+ return status;
+ }
+
+ packet_buff_uint = mem_obj->cpu_ptr;
+
+ memset(packet_buff_uint, 0, ib_size);
+
+ packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
+
+ packets_vec[0].header.count = 1;
+ packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
+ packets_vec[0].header.type = PM4_TYPE_3;
+ packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
+ packets_vec[0].bitfields2.insert_vmid = 1;
+ packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
+ packets_vec[1].bitfields2.insert_vmid = 0;
+ packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
+ packets_vec[2].bitfields2.insert_vmid = 0;
+ packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
+ packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
+ packets_vec[3].bitfields2.insert_vmid = 1;
+
+ for (i = 0; i < adw_info->num_watch_points; i++) {
+ dbgdev_address_watch_set_registers(adw_info,
+ &addrHi,
+ &addrLo,
+ &cntl,
+ i,
+ vmid);
+
+ pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
+ pr_debug("\t\t%20s %08x\n", "register index :", i);
+ pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
+ pr_debug("\t\t%20s %p\n", "Add ptr is :",
+ adw_info->watch_address);
+ pr_debug("\t\t%20s %08llx\n", "Add is :",
+ adw_info->watch_address[i]);
+ pr_debug("\t\t%20s %08x\n", "Address Low is :",
+ addrLo.bitfields.addr);
+ pr_debug("\t\t%20s %08x\n", "Address high is :",
+ addrHi.bitfields.addr);
+ pr_debug("\t\t%20s %08x\n", "Control Mask is :",
+ cntl.bitfields.mask);
+ pr_debug("\t\t%20s %08x\n", "Control Mode is :",
+ cntl.bitfields.mode);
+ pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
+ cntl.bitfields.vmid);
+ pr_debug("\t\t%20s %08x\n", "Control atc is :",
+ cntl.bitfields.atc);
+ pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
+
+ aw_reg_add_dword =
+ dbgdev->dev->kfd2kgd->address_watch_get_offset(
+ dbgdev->dev->kgd,
+ i,
+ ADDRESS_WATCH_REG_CNTL);
+
+ aw_reg_add_dword /= sizeof(uint32_t);
+
+ packets_vec[0].bitfields2.reg_offset =
+ aw_reg_add_dword - CONFIG_REG_BASE;
+
+ packets_vec[0].reg_data[0] = cntl.u32All;
+
+ aw_reg_add_dword =
+ dbgdev->dev->kfd2kgd->address_watch_get_offset(
+ dbgdev->dev->kgd,
+ i,
+ ADDRESS_WATCH_REG_ADDR_HI);
+
+ aw_reg_add_dword /= sizeof(uint32_t);
+
+ packets_vec[1].bitfields2.reg_offset =
+ aw_reg_add_dword - CONFIG_REG_BASE;
+ packets_vec[1].reg_data[0] = addrHi.u32All;
+
+ aw_reg_add_dword =
+ dbgdev->dev->kfd2kgd->address_watch_get_offset(
+ dbgdev->dev->kgd,
+ i,
+ ADDRESS_WATCH_REG_ADDR_LO);
+
+ aw_reg_add_dword /= sizeof(uint32_t);
+
+ packets_vec[2].bitfields2.reg_offset =
+ aw_reg_add_dword - CONFIG_REG_BASE;
+ packets_vec[2].reg_data[0] = addrLo.u32All;
+
+ /* enable watch flag if address is not zero*/
+ if (adw_info->watch_address[i] > 0)
+ cntl.bitfields.valid = 1;
+ else
+ cntl.bitfields.valid = 0;
+
+ aw_reg_add_dword =
+ dbgdev->dev->kfd2kgd->address_watch_get_offset(
+ dbgdev->dev->kgd,
+ i,
+ ADDRESS_WATCH_REG_CNTL);
+
+ aw_reg_add_dword /= sizeof(uint32_t);
+
+ packets_vec[3].bitfields2.reg_offset =
+ aw_reg_add_dword - CONFIG_REG_BASE;
+ packets_vec[3].reg_data[0] = cntl.u32All;
+
+ status = dbgdev_diq_submit_ib(
+ dbgdev,
+ adw_info->process->pasid,
+ mem_obj->gpu_addr,
+ packet_buff_uint,
+ ib_size);
+
+ if (status != 0) {
+ pr_err("amdkfd: Failed to submit IB to DIQ\n");
+ break;
+ }
+ }
+
+ kfd_gtt_sa_free(dbgdev->dev, mem_obj);
+ return status;
+}
+
+static int dbgdev_wave_control_set_registers(
+ struct dbg_wave_control_info *wac_info,
+ union SQ_CMD_BITS *in_reg_sq_cmd,
+ union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
+{
+ int status;
+ union SQ_CMD_BITS reg_sq_cmd;
+ union GRBM_GFX_INDEX_BITS reg_gfx_index;
+ struct HsaDbgWaveMsgAMDGen2 *pMsg;
+
+ BUG_ON(!wac_info || !in_reg_sq_cmd || !in_reg_gfx_index);
+
+ reg_sq_cmd.u32All = 0;
+ reg_gfx_index.u32All = 0;
+ pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
+
+ switch (wac_info->mode) {
+ /* Send command to single wave */
+ case HSA_DBG_WAVEMODE_SINGLE:
+ /*
+ * Limit access to the process waves only,
+ * by setting vmid check
+ */
+ reg_sq_cmd.bits.check_vmid = 1;
+ reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
+ reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
+ reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
+
+ reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
+ reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
+ reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
+
+ break;
+
+ /* Send command to all waves with matching VMID */
+ case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
+
+ reg_gfx_index.bits.sh_broadcast_writes = 1;
+ reg_gfx_index.bits.se_broadcast_writes = 1;
+ reg_gfx_index.bits.instance_broadcast_writes = 1;
+
+ reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
+
+ break;
+
+ /* Send command to all CU waves with matching VMID */
+ case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
+
+ reg_sq_cmd.bits.check_vmid = 1;
+ reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
+
+ reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
+ reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
+ reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
+
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ switch (wac_info->operand) {
+ case HSA_DBG_WAVEOP_HALT:
+ reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
+ break;
+
+ case HSA_DBG_WAVEOP_RESUME:
+ reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
+ break;
+
+ case HSA_DBG_WAVEOP_KILL:
+ reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
+ break;
+
+ case HSA_DBG_WAVEOP_DEBUG:
+ reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
+ break;
+
+ case HSA_DBG_WAVEOP_TRAP:
+ if (wac_info->trapId < MAX_TRAPID) {
+ reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
+ reg_sq_cmd.bits.trap_id = wac_info->trapId;
+ } else {
+ status = -EINVAL;
+ }
+ break;
+
+ default:
+ status = -EINVAL;
+ break;
+ }
+
+ if (status == 0) {
+ *in_reg_sq_cmd = reg_sq_cmd;
+ *in_reg_gfx_index = reg_gfx_index;
+ }
+
+ return status;
+}
+
+static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
+ struct dbg_wave_control_info *wac_info)
+{
+
+ int status;
+ union SQ_CMD_BITS reg_sq_cmd;
+ union GRBM_GFX_INDEX_BITS reg_gfx_index;
+ struct kfd_mem_obj *mem_obj;
+ uint32_t *packet_buff_uint;
+ struct pm4__set_config_reg *packets_vec;
+ size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
+
+ BUG_ON(!dbgdev || !wac_info);
+
+ reg_sq_cmd.u32All = 0;
+
+ status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
+ &reg_gfx_index);
+ if (status) {
+ pr_err("amdkfd: Failed to set wave control registers\n");
+ return status;
+ }
+
+ /* we do not control the VMID in DIQ,so reset it to a known value */
+ reg_sq_cmd.bits.vm_id = 0;
+
+ pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
+
+ pr_debug("\t\t mode is: %u\n", wac_info->mode);
+ pr_debug("\t\t operand is: %u\n", wac_info->operand);
+ pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
+ pr_debug("\t\t msg value is: %u\n",
+ wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
+ pr_debug("\t\t vmid is: N/A\n");
+
+ pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
+ pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
+ pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
+ pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
+ pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
+ pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
+ pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
+
+ pr_debug("\t\t ibw is : %u\n",
+ reg_gfx_index.bitfields.instance_broadcast_writes);
+ pr_debug("\t\t ii is : %u\n",
+ reg_gfx_index.bitfields.instance_index);
+ pr_debug("\t\t sebw is : %u\n",
+ reg_gfx_index.bitfields.se_broadcast_writes);
+ pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
+ pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
+ pr_debug("\t\t sbw is : %u\n",
+ reg_gfx_index.bitfields.sh_broadcast_writes);
+
+ pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
+
+ status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
+
+ if (status != 0) {
+ pr_err("amdkfd: Failed to allocate GART memory\n");
+ return status;
+ }
+
+ packet_buff_uint = mem_obj->cpu_ptr;
+
+ memset(packet_buff_uint, 0, ib_size);
+
+ packets_vec = (struct pm4__set_config_reg *) packet_buff_uint;
+ packets_vec[0].header.count = 1;
+ packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
+ packets_vec[0].header.type = PM4_TYPE_3;
+ packets_vec[0].bitfields2.reg_offset =
+ GRBM_GFX_INDEX / (sizeof(uint32_t)) -
+ USERCONFIG_REG_BASE;
+
+ packets_vec[0].bitfields2.insert_vmid = 0;
+ packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
+
+ packets_vec[1].header.count = 1;
+ packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
+ packets_vec[1].header.type = PM4_TYPE_3;
+ packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) -
+ CONFIG_REG_BASE;
+
+ packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
+ packets_vec[1].bitfields2.insert_vmid = 1;
+ packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
+
+ /* Restore the GRBM_GFX_INDEX register */
+
+ reg_gfx_index.u32All = 0;
+ reg_gfx_index.bits.sh_broadcast_writes = 1;
+ reg_gfx_index.bits.instance_broadcast_writes = 1;
+ reg_gfx_index.bits.se_broadcast_writes = 1;
+
+
+ packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
+ packets_vec[2].bitfields2.reg_offset =
+ GRBM_GFX_INDEX / (sizeof(uint32_t)) -
+ USERCONFIG_REG_BASE;
+
+ packets_vec[2].bitfields2.insert_vmid = 0;
+ packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
+
+ status = dbgdev_diq_submit_ib(
+ dbgdev,
+ wac_info->process->pasid,
+ mem_obj->gpu_addr,
+ packet_buff_uint,
+ ib_size);
+
+ if (status != 0)
+ pr_err("amdkfd: Failed to submit IB to DIQ\n");
+
+ kfd_gtt_sa_free(dbgdev->dev, mem_obj);
+
+ return status;
+}
+
+static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
+ struct dbg_wave_control_info *wac_info)
+{
+ int status;
+ union SQ_CMD_BITS reg_sq_cmd;
+ union GRBM_GFX_INDEX_BITS reg_gfx_index;
+ struct kfd_process_device *pdd;
+
+ BUG_ON(!dbgdev || !dbgdev->dev || !wac_info);
+
+ reg_sq_cmd.u32All = 0;
+
+ /* taking the VMID for that process on the safe way using PDD */
+ pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
+
+ if (!pdd) {
+ pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
+ return -EFAULT;
+ }
+ status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
+ &reg_gfx_index);
+ if (status) {
+ pr_err("amdkfd: Failed to set wave control registers\n");
+ return status;
+ }
+
+ /* for non DIQ we need to patch the VMID: */
+
+ reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
+
+ pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
+
+ pr_debug("\t\t mode is: %u\n", wac_info->mode);
+ pr_debug("\t\t operand is: %u\n", wac_info->operand);
+ pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
+ pr_debug("\t\t msg value is: %u\n",
+ wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
+ pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid);
+
+ pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
+ pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
+ pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
+ pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
+ pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
+ pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
+ pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
+
+ pr_debug("\t\t ibw is : %u\n",
+ reg_gfx_index.bitfields.instance_broadcast_writes);
+ pr_debug("\t\t ii is : %u\n",
+ reg_gfx_index.bitfields.instance_index);
+ pr_debug("\t\t sebw is : %u\n",
+ reg_gfx_index.bitfields.se_broadcast_writes);
+ pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
+ pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
+ pr_debug("\t\t sbw is : %u\n",
+ reg_gfx_index.bitfields.sh_broadcast_writes);
+
+ pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
+
+ return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
+ reg_gfx_index.u32All,
+ reg_sq_cmd.u32All);
+}
+
+int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
+{
+ int status = 0;
+ unsigned int vmid;
+ union SQ_CMD_BITS reg_sq_cmd;
+ union GRBM_GFX_INDEX_BITS reg_gfx_index;
+ struct kfd_process_device *pdd;
+ struct dbg_wave_control_info wac_info;
+ int temp;
+ int first_vmid_to_scan = 8;
+ int last_vmid_to_scan = 15;
+
+ first_vmid_to_scan = ffs(dev->shared_resources.compute_vmid_bitmap) - 1;
+ temp = dev->shared_resources.compute_vmid_bitmap >> first_vmid_to_scan;
+ last_vmid_to_scan = first_vmid_to_scan + ffz(temp);
+
+ reg_sq_cmd.u32All = 0;
+ status = 0;
+
+ wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
+ wac_info.operand = HSA_DBG_WAVEOP_KILL;
+
+ pr_debug("Killing all process wavefronts\n");
+
+ /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
+ * ATC_VMID15_PASID_MAPPING
+ * to check which VMID the current process is mapped to. */
+
+ for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
+ if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
+ (dev->kgd, vmid)) {
+ if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
+ (dev->kgd, vmid) == p->pasid) {
+ pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
+ vmid, p->pasid);
+ break;
+ }
+ }
+ }
+
+ if (vmid > last_vmid_to_scan) {
+ pr_err("amdkfd: didn't found vmid for pasid (%d)\n", p->pasid);
+ return -EFAULT;
+ }
+
+ /* taking the VMID for that process on the safe way using PDD */
+ pdd = kfd_get_process_device_data(dev, p);
+ if (!pdd)
+ return -EFAULT;
+
+ status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
+ &reg_gfx_index);
+ if (status != 0)
+ return -EINVAL;
+
+ /* for non DIQ we need to patch the VMID: */
+ reg_sq_cmd.bits.vm_id = vmid;
+
+ dev->kfd2kgd->wave_control_execute(dev->kgd,
+ reg_gfx_index.u32All,
+ reg_sq_cmd.u32All);
+
+ return 0;
+}
+
+void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
+ enum DBGDEV_TYPE type)
+{
+ BUG_ON(!pdbgdev || !pdev);
+
+ pdbgdev->dev = pdev;
+ pdbgdev->kq = NULL;
+ pdbgdev->type = type;
+ pdbgdev->pqm = NULL;
+
+ switch (type) {
+ case DBGDEV_TYPE_NODIQ:
+ pdbgdev->dbgdev_register = dbgdev_register_nodiq;
+ pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
+ pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
+ pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
+ break;
+ case DBGDEV_TYPE_DIQ:
+ default:
+ pdbgdev->dbgdev_register = dbgdev_register_diq;
+ pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
+ pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq;
+ pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
+ break;
+ }
+
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h
new file mode 100644
index 000000000000..4b0dd5aa5306
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef KFD_DBGDEV_H_
+#define KFD_DBGDEV_H_
+
+enum {
+ SQ_CMD_VMID_OFFSET = 28,
+ ADDRESS_WATCH_CNTL_OFFSET = 24
+};
+
+enum {
+ PRIV_QUEUE_SYNC_TIME_MS = 200
+};
+
+/* CONTEXT reg space definition */
+enum {
+ CONTEXT_REG_BASE = 0xA000,
+ CONTEXT_REG_END = 0xA400,
+ CONTEXT_REG_SIZE = CONTEXT_REG_END - CONTEXT_REG_BASE
+};
+
+/* USER CONFIG reg space definition */
+enum {
+ USERCONFIG_REG_BASE = 0xC000,
+ USERCONFIG_REG_END = 0x10000,
+ USERCONFIG_REG_SIZE = USERCONFIG_REG_END - USERCONFIG_REG_BASE
+};
+
+/* CONFIG reg space definition */
+enum {
+ CONFIG_REG_BASE = 0x2000, /* in dwords */
+ CONFIG_REG_END = 0x2B00,
+ CONFIG_REG_SIZE = CONFIG_REG_END - CONFIG_REG_BASE
+};
+
+/* SH reg space definition */
+enum {
+ SH_REG_BASE = 0x2C00,
+ SH_REG_END = 0x3000,
+ SH_REG_SIZE = SH_REG_END - SH_REG_BASE
+};
+
+enum SQ_IND_CMD_CMD {
+ SQ_IND_CMD_CMD_NULL = 0x00000000,
+ SQ_IND_CMD_CMD_HALT = 0x00000001,
+ SQ_IND_CMD_CMD_RESUME = 0x00000002,
+ SQ_IND_CMD_CMD_KILL = 0x00000003,
+ SQ_IND_CMD_CMD_DEBUG = 0x00000004,
+ SQ_IND_CMD_CMD_TRAP = 0x00000005,
+};
+
+enum SQ_IND_CMD_MODE {
+ SQ_IND_CMD_MODE_SINGLE = 0x00000000,
+ SQ_IND_CMD_MODE_BROADCAST = 0x00000001,
+ SQ_IND_CMD_MODE_BROADCAST_QUEUE = 0x00000002,
+ SQ_IND_CMD_MODE_BROADCAST_PIPE = 0x00000003,
+ SQ_IND_CMD_MODE_BROADCAST_ME = 0x00000004,
+};
+
+union SQ_IND_INDEX_BITS {
+ struct {
+ uint32_t wave_id:4;
+ uint32_t simd_id:2;
+ uint32_t thread_id:6;
+ uint32_t:1;
+ uint32_t force_read:1;
+ uint32_t read_timeout:1;
+ uint32_t unindexed:1;
+ uint32_t index:16;
+
+ } bitfields, bits;
+ uint32_t u32All;
+ signed int i32All;
+ float f32All;
+};
+
+union SQ_IND_CMD_BITS {
+ struct {
+ uint32_t data:32;
+ } bitfields, bits;
+ uint32_t u32All;
+ signed int i32All;
+ float f32All;
+};
+
+union SQ_CMD_BITS {
+ struct {
+ uint32_t cmd:3;
+ uint32_t:1;
+ uint32_t mode:3;
+ uint32_t check_vmid:1;
+ uint32_t trap_id:3;
+ uint32_t:5;
+ uint32_t wave_id:4;
+ uint32_t simd_id:2;
+ uint32_t:2;
+ uint32_t queue_id:3;
+ uint32_t:1;
+ uint32_t vm_id:4;
+ } bitfields, bits;
+ uint32_t u32All;
+ signed int i32All;
+ float f32All;
+};
+
+union SQ_IND_DATA_BITS {
+ struct {
+ uint32_t data:32;
+ } bitfields, bits;
+ uint32_t u32All;
+ signed int i32All;
+ float f32All;
+};
+
+union GRBM_GFX_INDEX_BITS {
+ struct {
+ uint32_t instance_index:8;
+ uint32_t sh_index:8;
+ uint32_t se_index:8;
+ uint32_t:5;
+ uint32_t sh_broadcast_writes:1;
+ uint32_t instance_broadcast_writes:1;
+ uint32_t se_broadcast_writes:1;
+ } bitfields, bits;
+ uint32_t u32All;
+ signed int i32All;
+ float f32All;
+};
+
+union TCP_WATCH_ADDR_H_BITS {
+ struct {
+ uint32_t addr:16;
+ uint32_t:16;
+
+ } bitfields, bits;
+ uint32_t u32All;
+ signed int i32All;
+ float f32All;
+};
+
+union TCP_WATCH_ADDR_L_BITS {
+ struct {
+ uint32_t:6;
+ uint32_t addr:26;
+ } bitfields, bits;
+ uint32_t u32All;
+ signed int i32All;
+ float f32All;
+};
+
+enum {
+ QUEUESTATE__INVALID = 0, /* so by default we'll get invalid state */
+ QUEUESTATE__ACTIVE_COMPLETION_PENDING,
+ QUEUESTATE__ACTIVE
+};
+
+union ULARGE_INTEGER {
+ struct {
+ uint32_t low_part;
+ uint32_t high_part;
+ } u;
+ unsigned long long quad_part;
+};
+
+
+#define KFD_CIK_VMID_START_OFFSET (8)
+#define KFD_CIK_VMID_END_OFFSET (KFD_CIK_VMID_START_OFFSET + (8))
+
+
+void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
+ enum DBGDEV_TYPE type);
+
+#endif /* KFD_DBGDEV_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
new file mode 100644
index 000000000000..56d676396342
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+
+#include "kfd_priv.h"
+#include "cik_regs.h"
+#include "kfd_pm4_headers.h"
+#include "kfd_pm4_headers_diq.h"
+#include "kfd_dbgmgr.h"
+#include "kfd_dbgdev.h"
+
+static DEFINE_MUTEX(kfd_dbgmgr_mutex);
+
+struct mutex *kfd_get_dbgmgr_mutex(void)
+{
+ return &kfd_dbgmgr_mutex;
+}
+
+
+static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr)
+{
+ BUG_ON(!pmgr);
+
+ kfree(pmgr->dbgdev);
+
+ pmgr->dbgdev = NULL;
+ pmgr->pasid = 0;
+ pmgr->dev = NULL;
+}
+
+void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr)
+{
+ if (pmgr != NULL) {
+ kfd_dbgmgr_uninitialize(pmgr);
+ kfree(pmgr);
+ }
+}
+
+bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
+{
+ enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ;
+ struct kfd_dbgmgr *new_buff;
+
+ BUG_ON(pdev == NULL);
+ BUG_ON(!pdev->init_complete);
+
+ new_buff = kfd_alloc_struct(new_buff);
+ if (!new_buff) {
+ pr_err("amdkfd: Failed to allocate dbgmgr instance\n");
+ return false;
+ }
+
+ new_buff->pasid = 0;
+ new_buff->dev = pdev;
+ new_buff->dbgdev = kfd_alloc_struct(new_buff->dbgdev);
+ if (!new_buff->dbgdev) {
+ pr_err("amdkfd: Failed to allocate dbgdev instance\n");
+ kfree(new_buff);
+ return false;
+ }
+
+ /* get actual type of DBGDevice cpsch or not */
+ if (sched_policy == KFD_SCHED_POLICY_NO_HWS)
+ type = DBGDEV_TYPE_NODIQ;
+
+ kfd_dbgdev_init(new_buff->dbgdev, pdev, type);
+ *ppmgr = new_buff;
+
+ return true;
+}
+
+long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
+{
+ BUG_ON(!p || !pmgr || !pmgr->dbgdev);
+
+ if (pmgr->pasid != 0) {
+ pr_debug("H/W debugger is already active using pasid %d\n",
+ pmgr->pasid);
+ return -EBUSY;
+ }
+
+ /* remember pasid */
+ pmgr->pasid = p->pasid;
+
+ /* provide the pqm for diq generation */
+ pmgr->dbgdev->pqm = &p->pqm;
+
+ /* activate the actual registering */
+ pmgr->dbgdev->dbgdev_register(pmgr->dbgdev);
+
+ return 0;
+}
+
+long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
+{
+ BUG_ON(!p || !pmgr || !pmgr->dbgdev);
+
+ /* Is the requests coming from the already registered process? */
+ if (pmgr->pasid != p->pasid) {
+ pr_debug("H/W debugger is not registered by calling pasid %d\n",
+ p->pasid);
+ return -EINVAL;
+ }
+
+ pmgr->dbgdev->dbgdev_unregister(pmgr->dbgdev);
+
+ pmgr->pasid = 0;
+
+ return 0;
+}
+
+long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
+ struct dbg_wave_control_info *wac_info)
+{
+ BUG_ON(!pmgr || !pmgr->dbgdev || !wac_info);
+
+ /* Is the requests coming from the already registered process? */
+ if (pmgr->pasid != wac_info->process->pasid) {
+ pr_debug("H/W debugger support was not registered for requester pasid %d\n",
+ wac_info->process->pasid);
+ return -EINVAL;
+ }
+
+ return (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, wac_info);
+}
+
+long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr,
+ struct dbg_address_watch_info *adw_info)
+{
+ BUG_ON(!pmgr || !pmgr->dbgdev || !adw_info);
+
+
+ /* Is the requests coming from the already registered process? */
+ if (pmgr->pasid != adw_info->process->pasid) {
+ pr_debug("H/W debugger support was not registered for requester pasid %d\n",
+ adw_info->process->pasid);
+ return -EINVAL;
+ }
+
+ return (long) pmgr->dbgdev->dbgdev_address_watch(pmgr->dbgdev,
+ adw_info);
+}
+
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
new file mode 100644
index 000000000000..257a745ad0b5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
@@ -0,0 +1,294 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef KFD_DBGMGR_H_
+#define KFD_DBGMGR_H_
+
+#include "kfd_priv.h"
+
+/* must align with hsakmttypes definition */
+#pragma pack(push, 4)
+
+enum HSA_DBG_WAVEOP {
+ HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */
+ HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */
+ HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */
+ HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter
+ debug mode */
+ HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take
+ a trap */
+ HSA_DBG_NUM_WAVEOP = 5,
+ HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF
+};
+
+enum HSA_DBG_WAVEMODE {
+ /* send command to a single wave */
+ HSA_DBG_WAVEMODE_SINGLE = 0,
+ /*
+ * Broadcast to all wavefronts of all processes is not
+ * supported for HSA user mode
+ */
+
+ /* send to waves within current process */
+ HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2,
+ /* send to waves within current process on CU */
+ HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3,
+ HSA_DBG_NUM_WAVEMODE = 3,
+ HSA_DBG_MAX_WAVEMODE = 0xFFFFFFFF
+};
+
+enum HSA_DBG_WAVEMSG_TYPE {
+ HSA_DBG_WAVEMSG_AUTO = 0,
+ HSA_DBG_WAVEMSG_USER = 1,
+ HSA_DBG_WAVEMSG_ERROR = 2,
+ HSA_DBG_NUM_WAVEMSG,
+ HSA_DBG_MAX_WAVEMSG = 0xFFFFFFFF
+};
+
+enum HSA_DBG_WATCH_MODE {
+ HSA_DBG_WATCH_READ = 0, /* Read operations only */
+ HSA_DBG_WATCH_NONREAD = 1, /* Write or Atomic operations only */
+ HSA_DBG_WATCH_ATOMIC = 2, /* Atomic Operations only */
+ HSA_DBG_WATCH_ALL = 3, /* Read, Write or Atomic operations */
+ HSA_DBG_WATCH_NUM,
+ HSA_DBG_WATCH_SIZE = 0xFFFFFFFF
+};
+
+/* This structure is hardware specific and may change in the future */
+struct HsaDbgWaveMsgAMDGen2 {
+ union {
+ struct ui32 {
+ uint32_t UserData:8; /* user data */
+ uint32_t ShaderArray:1; /* Shader array */
+ uint32_t Priv:1; /* Privileged */
+ uint32_t Reserved0:4; /* This field is reserved,
+ should be 0 */
+ uint32_t WaveId:4; /* wave id */
+ uint32_t SIMD:2; /* SIMD id */
+ uint32_t HSACU:4; /* Compute unit */
+ uint32_t ShaderEngine:2;/* Shader engine */
+ uint32_t MessageType:2; /* see HSA_DBG_WAVEMSG_TYPE */
+ uint32_t Reserved1:4; /* This field is reserved,
+ should be 0 */
+ } ui32;
+ uint32_t Value;
+ };
+ uint32_t Reserved2;
+};
+
+union HsaDbgWaveMessageAMD {
+ struct HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2;
+ /* for future HsaDbgWaveMsgAMDGen3; */
+};
+
+struct HsaDbgWaveMessage {
+ void *MemoryVA; /* ptr to associated host-accessible data */
+ union HsaDbgWaveMessageAMD DbgWaveMsg;
+};
+
+/*
+ * TODO: This definitions to be MOVED to kfd_event, once it is implemented.
+ *
+ * HSA sync primitive, Event and HW Exception notification API definitions.
+ * The API functions allow the runtime to define a so-called sync-primitive,
+ * a SW object combining a user-mode provided "syncvar" and a scheduler event
+ * that can be signaled through a defined GPU interrupt. A syncvar is
+ * a process virtual memory location of a certain size that can be accessed
+ * by CPU and GPU shader code within the process to set and query the content
+ * within that memory. The definition of the content is determined by the HSA
+ * runtime and potentially GPU shader code interfacing with the HSA runtime.
+ * The syncvar values may be commonly written through an PM4 WRITE_DATA packet
+ * in the user mode instruction stream. The OS scheduler event is typically
+ * associated and signaled by an interrupt issued by the GPU, but other HSA
+ * system interrupt conditions from other HW (e.g. IOMMUv2) may be surfaced
+ * by the KFD by this mechanism, too. */
+
+/* these are the new definitions for events */
+enum HSA_EVENTTYPE {
+ HSA_EVENTTYPE_SIGNAL = 0, /* user-mode generated GPU signal */
+ HSA_EVENTTYPE_NODECHANGE = 1, /* HSA node change (attach/detach) */
+ HSA_EVENTTYPE_DEVICESTATECHANGE = 2, /* HSA device state change
+ (start/stop) */
+ HSA_EVENTTYPE_HW_EXCEPTION = 3, /* GPU shader exception event */
+ HSA_EVENTTYPE_SYSTEM_EVENT = 4, /* GPU SYSCALL with parameter info */
+ HSA_EVENTTYPE_DEBUG_EVENT = 5, /* GPU signal for debugging */
+ HSA_EVENTTYPE_PROFILE_EVENT = 6,/* GPU signal for profiling */
+ HSA_EVENTTYPE_QUEUE_EVENT = 7, /* GPU signal queue idle state
+ (EOP pm4) */
+ /* ... */
+ HSA_EVENTTYPE_MAXID,
+ HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF
+};
+
+/* Sub-definitions for various event types: Syncvar */
+struct HsaSyncVar {
+ union SyncVar {
+ void *UserData; /* pointer to user mode data */
+ uint64_t UserDataPtrValue; /* 64bit compatibility of value */
+ } SyncVar;
+ uint64_t SyncVarSize;
+};
+
+/* Sub-definitions for various event types: NodeChange */
+
+enum HSA_EVENTTYPE_NODECHANGE_FLAGS {
+ HSA_EVENTTYPE_NODECHANGE_ADD = 0,
+ HSA_EVENTTYPE_NODECHANGE_REMOVE = 1,
+ HSA_EVENTTYPE_NODECHANGE_SIZE = 0xFFFFFFFF
+};
+
+struct HsaNodeChange {
+ /* HSA node added/removed on the platform */
+ enum HSA_EVENTTYPE_NODECHANGE_FLAGS Flags;
+};
+
+/* Sub-definitions for various event types: DeviceStateChange */
+enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS {
+ /* device started (and available) */
+ HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0,
+ /* device stopped (i.e. unavailable) */
+ HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1,
+ HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE = 0xFFFFFFFF
+};
+
+enum HSA_DEVICE {
+ HSA_DEVICE_CPU = 0,
+ HSA_DEVICE_GPU = 1,
+ MAX_HSA_DEVICE = 2
+};
+
+struct HsaDeviceStateChange {
+ uint32_t NodeId; /* F-NUMA node that contains the device */
+ enum HSA_DEVICE Device; /* device type: GPU or CPU */
+ enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; /* event flags */
+};
+
+struct HsaEventData {
+ enum HSA_EVENTTYPE EventType; /* event type */
+ union EventData {
+ /*
+ * return data associated with HSA_EVENTTYPE_SIGNAL
+ * and other events
+ */
+ struct HsaSyncVar SyncVar;
+
+ /* data associated with HSA_EVENTTYPE_NODE_CHANGE */
+ struct HsaNodeChange NodeChangeState;
+
+ /* data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE */
+ struct HsaDeviceStateChange DeviceState;
+ } EventData;
+
+ /* the following data entries are internal to the KFD & thunk itself */
+
+ /* internal thunk store for Event data (OsEventHandle) */
+ uint64_t HWData1;
+ /* internal thunk store for Event data (HWAddress) */
+ uint64_t HWData2;
+ /* internal thunk store for Event data (HWData) */
+ uint32_t HWData3;
+};
+
+struct HsaEventDescriptor {
+ /* event type to allocate */
+ enum HSA_EVENTTYPE EventType;
+ /* H-NUMA node containing GPU device that is event source */
+ uint32_t NodeId;
+ /* pointer to user mode syncvar data, syncvar->UserDataPtrValue
+ * may be NULL
+ */
+ struct HsaSyncVar SyncVar;
+};
+
+struct HsaEvent {
+ uint32_t EventId;
+ struct HsaEventData EventData;
+};
+
+#pragma pack(pop)
+
+enum DBGDEV_TYPE {
+ DBGDEV_TYPE_ILLEGAL = 0,
+ DBGDEV_TYPE_NODIQ = 1,
+ DBGDEV_TYPE_DIQ = 2,
+ DBGDEV_TYPE_TEST = 3
+};
+
+struct dbg_address_watch_info {
+ struct kfd_process *process;
+ enum HSA_DBG_WATCH_MODE *watch_mode;
+ uint64_t *watch_address;
+ uint64_t *watch_mask;
+ struct HsaEvent *watch_event;
+ uint32_t num_watch_points;
+};
+
+struct dbg_wave_control_info {
+ struct kfd_process *process;
+ uint32_t trapId;
+ enum HSA_DBG_WAVEOP operand;
+ enum HSA_DBG_WAVEMODE mode;
+ struct HsaDbgWaveMessage dbgWave_msg;
+};
+
+struct kfd_dbgdev {
+
+ /* The device that owns this data. */
+ struct kfd_dev *dev;
+
+ /* kernel queue for DIQ */
+ struct kernel_queue *kq;
+
+ /* a pointer to the pqm of the calling process */
+ struct process_queue_manager *pqm;
+
+ /* type of debug device ( DIQ, non DIQ, etc. ) */
+ enum DBGDEV_TYPE type;
+
+ /* virtualized function pointers to device dbg */
+ int (*dbgdev_register)(struct kfd_dbgdev *dbgdev);
+ int (*dbgdev_unregister)(struct kfd_dbgdev *dbgdev);
+ int (*dbgdev_address_watch)(struct kfd_dbgdev *dbgdev,
+ struct dbg_address_watch_info *adw_info);
+ int (*dbgdev_wave_control)(struct kfd_dbgdev *dbgdev,
+ struct dbg_wave_control_info *wac_info);
+
+};
+
+struct kfd_dbgmgr {
+ unsigned int pasid;
+ struct kfd_dev *dev;
+ struct kfd_dbgdev *dbgdev;
+};
+
+/* prototypes for debug manager functions */
+struct mutex *kfd_get_dbgmgr_mutex(void);
+void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr);
+bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev);
+long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p);
+long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p);
+long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
+ struct dbg_wave_control_info *wac_info);
+long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr,
+ struct dbg_address_watch_info *adw_info);
+#endif /* KFD_DBGMGR_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 52cab0f53ebc..1d1e2e952a79 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -33,8 +33,11 @@
static const struct kfd_device_info kaveri_device_info = {
.asic_family = CHIP_KAVERI,
.max_pasid_bits = 16,
+ /* max num of queues for KV.TODO should be a dynamic value */
+ .max_no_of_hqd = 24,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
+ .num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED
};
@@ -294,6 +297,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto dqm_start_error;
}
+ kfd->dbgmgr = NULL;
+
kfd->init_complete = true;
dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor,
kfd->pdev->device);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 4e215bd4d41f..547b0a589693 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -45,7 +45,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
-static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock);
+static int destroy_queues_cpsch(struct device_queue_manager *dqm,
+ bool preempt_static_queues, bool lock);
static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q,
@@ -775,7 +776,7 @@ static int stop_cpsch(struct device_queue_manager *dqm)
BUG_ON(!dqm);
- destroy_queues_cpsch(dqm, true);
+ destroy_queues_cpsch(dqm, true, true);
list_for_each_entry(node, &dqm->queues, list) {
pdd = qpd_to_pdd(node->qpd);
@@ -829,7 +830,8 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
pr_debug("kfd: In %s\n", __func__);
mutex_lock(&dqm->lock);
- destroy_queues_cpsch(dqm, false);
+ /* here we actually preempt the DIQ */
+ destroy_queues_cpsch(dqm, true, false);
list_del(&kq->list);
dqm->queue_count--;
qpd->is_debug = false;
@@ -913,7 +915,7 @@ out:
return retval;
}
-static int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
+int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
unsigned int fence_value,
unsigned long timeout)
{
@@ -935,13 +937,16 @@ static int destroy_sdma_queues(struct device_queue_manager *dqm,
unsigned int sdma_engine)
{
return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
- KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false,
+ KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, 0, false,
sdma_engine);
}
-static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
+static int destroy_queues_cpsch(struct device_queue_manager *dqm,
+ bool preempt_static_queues, bool lock)
{
int retval;
+ enum kfd_preempt_type_filter preempt_type;
+ struct kfd_process *p;
BUG_ON(!dqm);
@@ -960,8 +965,12 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
destroy_sdma_queues(dqm, 1);
}
+ preempt_type = preempt_static_queues ?
+ KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES :
+ KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES;
+
retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
- KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0);
+ preempt_type, 0, false, 0);
if (retval != 0)
goto out;
@@ -969,8 +978,13 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
KFD_FENCE_COMPLETED);
/* should be timed out */
- amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
+ retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
+ if (retval != 0) {
+ p = kfd_get_process(current);
+ p->reset_wavefronts = true;
+ goto out;
+ }
pm_release_ib(&dqm->packets);
dqm->active_runlist = false;
@@ -989,7 +1003,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock)
if (lock)
mutex_lock(&dqm->lock);
- retval = destroy_queues_cpsch(dqm, false);
+ retval = destroy_queues_cpsch(dqm, false, false);
if (retval != 0) {
pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption");
goto out;
@@ -1024,13 +1038,27 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
{
int retval;
struct mqd_manager *mqd;
+ bool preempt_all_queues;
BUG_ON(!dqm || !qpd || !q);
+ preempt_all_queues = false;
+
retval = 0;
/* remove queue from list to prevent rescheduling after preemption */
mutex_lock(&dqm->lock);
+
+ if (qpd->is_debug) {
+ /*
+ * error, currently we do not allow to destroy a queue
+ * of a currently debugged process
+ */
+ retval = -EBUSY;
+ goto failed_try_destroy_debugged_queue;
+
+ }
+
mqd = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd) {
@@ -1062,6 +1090,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
return 0;
failed:
+failed_try_destroy_debugged_queue:
+
mutex_unlock(&dqm->lock);
return retval;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 57278e2d72e0..ec4036a09f3e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -88,9 +88,11 @@ struct device_queue_manager_ops {
struct queue *q,
struct qcm_process_device *qpd,
int *allocate_vmid);
+
int (*destroy_queue)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q);
+
int (*update_queue)(struct device_queue_manager *dqm,
struct queue *q);
@@ -100,8 +102,10 @@ struct device_queue_manager_ops {
int (*register_process)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
+
int (*unregister_process)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
+
int (*initialize)(struct device_queue_manager *dqm);
int (*start)(struct device_queue_manager *dqm);
int (*stop)(struct device_queue_manager *dqm);
@@ -109,9 +113,11 @@ struct device_queue_manager_ops {
int (*create_kernel_queue)(struct device_queue_manager *dqm,
struct kernel_queue *kq,
struct qcm_process_device *qpd);
+
void (*destroy_kernel_queue)(struct device_queue_manager *dqm,
struct kernel_queue *kq,
struct qcm_process_device *qpd);
+
bool (*set_cache_memory_policy)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index e2533d875f43..99b6d28a11c3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -163,7 +163,7 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
num_queues = 0;
list_for_each_entry(cur, &qpd->queues_list, list)
num_queues++;
- packet->bitfields10.num_queues = num_queues;
+ packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : num_queues;
packet->sh_mem_config = qpd->sh_mem_config;
packet->sh_mem_bases = qpd->sh_mem_bases;
@@ -177,9 +177,10 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
}
static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
- struct queue *q)
+ struct queue *q, bool is_static)
{
struct pm4_map_queues *packet;
+ bool use_static = is_static;
BUG_ON(!pm || !buffer || !q);
@@ -209,6 +210,7 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
case KFD_QUEUE_TYPE_SDMA:
packet->bitfields2.engine_sel =
engine_sel__mes_map_queues__sdma0;
+ use_static = false; /* no static queues under SDMA */
break;
default:
BUG();
@@ -218,6 +220,9 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
packet->mes_map_queues_ordinals[0].bitfields3.doorbell_offset =
q->properties.doorbell_off;
+ packet->mes_map_queues_ordinals[0].bitfields3.is_static =
+ (use_static == true) ? 1 : 0;
+
packet->mes_map_queues_ordinals[0].mqd_addr_lo =
lower_32_bits(q->gart_mqd_addr);
@@ -271,9 +276,11 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
pm_release_ib(pm);
return -ENOMEM;
}
+
retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd);
if (retval != 0)
return retval;
+
proccesses_mapped++;
inc_wptr(&rl_wptr, sizeof(struct pm4_map_process),
alloc_size_bytes);
@@ -281,23 +288,36 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
list_for_each_entry(kq, &qpd->priv_queue_list, list) {
if (kq->queue->properties.is_active != true)
continue;
+
+ pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n",
+ kq->queue->queue, qpd->is_debug);
+
retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr],
- kq->queue);
+ kq->queue, qpd->is_debug);
if (retval != 0)
return retval;
- inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues),
- alloc_size_bytes);
+
+ inc_wptr(&rl_wptr,
+ sizeof(struct pm4_map_queues),
+ alloc_size_bytes);
}
list_for_each_entry(q, &qpd->queues_list, list) {
if (q->properties.is_active != true)
continue;
- retval = pm_create_map_queue(pm,
- &rl_buffer[rl_wptr], q);
+
+ pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n",
+ q->queue, qpd->is_debug);
+
+ retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr],
+ q, qpd->is_debug);
+
if (retval != 0)
return retval;
- inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues),
- alloc_size_bytes);
+
+ inc_wptr(&rl_wptr,
+ sizeof(struct pm4_map_queues),
+ alloc_size_bytes);
}
}
@@ -488,7 +508,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
packet = (struct pm4_unmap_queues *)buffer;
memset(buffer, 0, sizeof(struct pm4_unmap_queues));
-
+ pr_debug("kfd: static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n",
+ mode, reset, type);
packet->header.u32all = build_pm4_header(IT_UNMAP_QUEUES,
sizeof(struct pm4_unmap_queues));
switch (type) {
@@ -529,6 +550,11 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
packet->bitfields2.queue_sel =
queue_sel__mes_unmap_queues__perform_request_on_all_active_queues;
break;
+ case KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES:
+ /* in this case, we do not preempt static queues */
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only;
+ break;
default:
BUG();
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
index 071ad5724bd2..5b393f3e34a9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
@@ -237,7 +237,8 @@ struct pm4_map_queues {
struct {
union {
struct {
- uint32_t reserved5:2;
+ uint32_t is_static:1;
+ uint32_t reserved5:1;
uint32_t doorbell_offset:21;
uint32_t reserved6:3;
uint32_t queue:6;
@@ -328,7 +329,8 @@ enum unmap_queues_action_enum {
enum unmap_queues_queue_sel_enum {
queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0,
queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1,
- queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2
+ queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2,
+ queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only = 3
};
enum unmap_queues_engine_sel_enum {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h
new file mode 100644
index 000000000000..a0ff34878163
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h
@@ -0,0 +1,290 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef KFD_PM4_HEADERS_DIQ_H_
+#define KFD_PM4_HEADERS_DIQ_H_
+
+/*--------------------_INDIRECT_BUFFER-------------------- */
+
+#ifndef _PM4__INDIRECT_BUFFER_DEFINED
+#define _PM4__INDIRECT_BUFFER_DEFINED
+enum _INDIRECT_BUFFER_cache_policy_enum {
+ cache_policy___indirect_buffer__lru = 0,
+ cache_policy___indirect_buffer__stream = 1,
+ cache_policy___indirect_buffer__bypass = 2
+};
+
+enum {
+ IT_INDIRECT_BUFFER_PASID = 0x5C
+};
+
+struct pm4__indirect_buffer_pasid {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+ unsigned int reserved1:2;
+ unsigned int ib_base_lo:30;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ union {
+ struct {
+ unsigned int ib_base_hi:16;
+ unsigned int reserved2:16;
+ } bitfields3;
+ unsigned int ordinal3;
+ };
+
+ union {
+ unsigned int control;
+ unsigned int ordinal4;
+ };
+
+ union {
+ struct {
+ unsigned int pasid:10;
+ unsigned int reserved4:22;
+ } bitfields5;
+ unsigned int ordinal5;
+ };
+
+};
+
+#endif
+
+/*--------------------_RELEASE_MEM-------------------- */
+
+#ifndef _PM4__RELEASE_MEM_DEFINED
+#define _PM4__RELEASE_MEM_DEFINED
+enum _RELEASE_MEM_event_index_enum {
+ event_index___release_mem__end_of_pipe = 5,
+ event_index___release_mem__shader_done = 6
+};
+
+enum _RELEASE_MEM_cache_policy_enum {
+ cache_policy___release_mem__lru = 0,
+ cache_policy___release_mem__stream = 1,
+ cache_policy___release_mem__bypass = 2
+};
+
+enum _RELEASE_MEM_dst_sel_enum {
+ dst_sel___release_mem__memory_controller = 0,
+ dst_sel___release_mem__tc_l2 = 1,
+ dst_sel___release_mem__queue_write_pointer_register = 2,
+ dst_sel___release_mem__queue_write_pointer_poll_mask_bit = 3
+};
+
+enum _RELEASE_MEM_int_sel_enum {
+ int_sel___release_mem__none = 0,
+ int_sel___release_mem__send_interrupt_only = 1,
+ int_sel___release_mem__send_interrupt_after_write_confirm = 2,
+ int_sel___release_mem__send_data_after_write_confirm = 3
+};
+
+enum _RELEASE_MEM_data_sel_enum {
+ data_sel___release_mem__none = 0,
+ data_sel___release_mem__send_32_bit_low = 1,
+ data_sel___release_mem__send_64_bit_data = 2,
+ data_sel___release_mem__send_gpu_clock_counter = 3,
+ data_sel___release_mem__send_cp_perfcounter_hi_lo = 4,
+ data_sel___release_mem__store_gds_data_to_memory = 5
+};
+
+struct pm4__release_mem {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /*header */
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+ unsigned int event_type:6;
+ unsigned int reserved1:2;
+ enum _RELEASE_MEM_event_index_enum event_index:4;
+ unsigned int tcl1_vol_action_ena:1;
+ unsigned int tc_vol_action_ena:1;
+ unsigned int reserved2:1;
+ unsigned int tc_wb_action_ena:1;
+ unsigned int tcl1_action_ena:1;
+ unsigned int tc_action_ena:1;
+ unsigned int reserved3:6;
+ unsigned int atc:1;
+ enum _RELEASE_MEM_cache_policy_enum cache_policy:2;
+ unsigned int reserved4:5;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ union {
+ struct {
+ unsigned int reserved5:16;
+ enum _RELEASE_MEM_dst_sel_enum dst_sel:2;
+ unsigned int reserved6:6;
+ enum _RELEASE_MEM_int_sel_enum int_sel:3;
+ unsigned int reserved7:2;
+ enum _RELEASE_MEM_data_sel_enum data_sel:3;
+ } bitfields3;
+ unsigned int ordinal3;
+ };
+
+ union {
+ struct {
+ unsigned int reserved8:2;
+ unsigned int address_lo_32b:30;
+ } bitfields4;
+ struct {
+ unsigned int reserved9:3;
+ unsigned int address_lo_64b:29;
+ } bitfields5;
+ unsigned int ordinal4;
+ };
+
+ unsigned int address_hi;
+
+ unsigned int data_lo;
+
+ unsigned int data_hi;
+
+};
+#endif
+
+
+/*--------------------_SET_CONFIG_REG-------------------- */
+
+#ifndef _PM4__SET_CONFIG_REG_DEFINED
+#define _PM4__SET_CONFIG_REG_DEFINED
+
+struct pm4__set_config_reg {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /*header */
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+ unsigned int reg_offset:16;
+ unsigned int reserved1:7;
+ unsigned int vmid_shift:5;
+ unsigned int insert_vmid:1;
+ unsigned int reserved2:3;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ unsigned int reg_data[1]; /*1..N of these fields */
+
+};
+#endif
+
+/*--------------------_WAIT_REG_MEM-------------------- */
+
+#ifndef _PM4__WAIT_REG_MEM_DEFINED
+#define _PM4__WAIT_REG_MEM_DEFINED
+enum _WAIT_REG_MEM_function_enum {
+ function___wait_reg_mem__always_pass = 0,
+ function___wait_reg_mem__less_than_ref_value = 1,
+ function___wait_reg_mem__less_than_equal_to_the_ref_value = 2,
+ function___wait_reg_mem__equal_to_the_reference_value = 3,
+ function___wait_reg_mem__not_equal_reference_value = 4,
+ function___wait_reg_mem__greater_than_or_equal_reference_value = 5,
+ function___wait_reg_mem__greater_than_reference_value = 6,
+ function___wait_reg_mem__reserved = 7
+};
+
+enum _WAIT_REG_MEM_mem_space_enum {
+ mem_space___wait_reg_mem__register_space = 0,
+ mem_space___wait_reg_mem__memory_space = 1
+};
+
+enum _WAIT_REG_MEM_operation_enum {
+ operation___wait_reg_mem__wait_reg_mem = 0,
+ operation___wait_reg_mem__wr_wait_wr_reg = 1
+};
+
+struct pm4__wait_reg_mem {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /*header */
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+ enum _WAIT_REG_MEM_function_enum function:3;
+ unsigned int reserved1:1;
+ enum _WAIT_REG_MEM_mem_space_enum mem_space:2;
+ enum _WAIT_REG_MEM_operation_enum operation:2;
+ unsigned int reserved2:24;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ union {
+ struct {
+ unsigned int reserved3:2;
+ unsigned int memory_poll_addr_lo:30;
+ } bitfields3;
+ struct {
+ unsigned int register_poll_addr:16;
+ unsigned int reserved4:16;
+ } bitfields4;
+ struct {
+ unsigned int register_write_addr:16;
+ unsigned int reserved5:16;
+ } bitfields5;
+ unsigned int ordinal3;
+ };
+
+ union {
+ struct {
+ unsigned int poll_address_hi:16;
+ unsigned int reserved6:16;
+ } bitfields6;
+ struct {
+ unsigned int register_write_addr:16;
+ unsigned int reserved7:16;
+ } bitfields7;
+ unsigned int ordinal4;
+ };
+
+ unsigned int reference;
+
+ unsigned int mask;
+
+ union {
+ struct {
+ unsigned int poll_interval:16;
+ unsigned int reserved8:16;
+ } bitfields8;
+ unsigned int ordinal7;
+ };
+
+};
+#endif
+
+
+#endif /* KFD_PM4_HEADERS_DIQ_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index b6f838f56589..cb79046e5c80 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -128,6 +128,7 @@ struct kfd_device_info {
unsigned int asic_family;
const struct kfd_event_interrupt_class *event_interrupt_class;
unsigned int max_pasid_bits;
+ unsigned int max_no_of_hqd;
size_t ih_ring_entry_size;
uint8_t num_of_watch_points;
uint16_t mqd_size_aligned;
@@ -167,8 +168,8 @@ struct kfd_dev {
const struct kfd2kgd_calls *kfd2kgd;
struct mutex doorbell_mutex;
- unsigned long doorbell_available_index[DIV_ROUND_UP(
- KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)];
+ DECLARE_BITMAP(doorbell_available_index,
+ KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
void *gtt_mem;
uint64_t gtt_start_gpu_addr;
@@ -195,6 +196,9 @@ struct kfd_dev {
* from the HW ring into a SW ring.
*/
bool interrupts_active;
+
+ /* Debug manager */
+ struct kfd_dbgmgr *dbgmgr;
};
/* KGD2KFD callbacks */
@@ -231,6 +235,7 @@ struct device *kfd_chardev(void);
enum kfd_preempt_type_filter {
KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE,
KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES,
+ KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES,
KFD_PREEMPT_TYPE_FILTER_BY_PASID
};
@@ -503,8 +508,6 @@ struct kfd_process {
/* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */
struct kfd_queue **queues;
- unsigned long allocated_queue_bitmap[DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)];
-
/*Is the user space process 32 bit?*/
bool is_32bit_user_mode;
@@ -516,6 +519,11 @@ struct kfd_process {
event_pages */
u32 next_nonsignal_event_id;
size_t signal_event_count;
+ /*
+ * This flag tells if we should reset all wavefronts on
+ * process termination
+ */
+ bool reset_wavefronts;
};
/**
@@ -650,6 +658,12 @@ int pqm_create_queue(struct process_queue_manager *pqm,
int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
struct queue_properties *p);
+struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
+ unsigned int qid);
+
+int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
+ unsigned int fence_value,
+ unsigned long timeout);
/* Packet Manager */
@@ -717,4 +731,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
uint64_t *event_page_offset, uint32_t *event_slot_index);
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
+int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index dc910af2bb3c..56b904f5bdb1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -31,6 +31,7 @@
struct mm_struct;
#include "kfd_priv.h"
+#include "kfd_dbgmgr.h"
/*
* Initial size for the array of queues.
@@ -172,6 +173,9 @@ static void kfd_process_wq_release(struct work_struct *work)
pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n",
pdd->dev->id, p->pasid);
+ if (p->reset_wavefronts)
+ dbgdev_wave_reset_wavefronts(pdd->dev, p);
+
amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
list_del(&pdd->per_device_list);
@@ -301,6 +305,8 @@ static struct kfd_process *create_process(const struct task_struct *thread)
if (kfd_init_apertures(process) != 0)
goto err_init_apretures;
+ process->reset_wavefronts = false;
+
return process;
err_init_apretures:
@@ -399,7 +405,12 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid)
mutex_lock(&p->mutex);
+ if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid))
+ kfd_dbgmgr_destroy(dev->dbgmgr);
+
pqm_uninit(&p->pqm);
+ if (p->reset_wavefronts)
+ dbgdev_wave_reset_wavefronts(dev, p);
pdd = kfd_get_process_device_data(dev, p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 530b82c4e78b..7b69070f7ecc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -158,6 +158,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct queue *q;
struct process_queue_node *pqn;
struct kernel_queue *kq;
+ int num_queues = 0;
+ struct queue *cur;
BUG_ON(!pqm || !dev || !properties || !qid);
@@ -172,6 +174,20 @@ int pqm_create_queue(struct process_queue_manager *pqm,
return -1;
}
+ /*
+ * for debug process, verify that it is within the static queues limit
+ * currently limit is set to half of the total avail HQD slots
+ * If we are just about to create DIQ, the is_debug flag is not set yet
+ * Hence we also check the type as well
+ */
+ if ((pdd->qpd.is_debug) ||
+ (type == KFD_QUEUE_TYPE_DIQ)) {
+ list_for_each_entry(cur, &pdd->qpd.queues_list, list)
+ num_queues++;
+ if (num_queues >= dev->device_info->max_no_of_hqd/2)
+ return (-ENOSPC);
+ }
+
retval = find_available_queue_slot(pqm, qid);
if (retval != 0)
return retval;
@@ -341,7 +357,7 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
return 0;
}
-static __attribute__((unused)) struct kernel_queue *pqm_get_kernel_queue(
+struct kernel_queue *pqm_get_kernel_queue(
struct process_queue_manager *pqm,
unsigned int qid)
{
OpenPOWER on IntegriCloud