summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile53
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c23
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c88
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.h10
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c68
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c58
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h9
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_module.c122
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c23
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c23
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h46
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c22
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c48
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h1
22 files changed, 407 insertions, 212 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
index 3858820a0055..fbf0ee5201c3 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -3,7 +3,7 @@
#
config HSA_AMD
- tristate "HSA kernel driver for AMD GPU devices"
+ bool "HSA kernel driver for AMD GPU devices"
depends on DRM_AMDGPU && X86_64
imply AMD_IOMMU_V2
select MMU_NOTIFIER
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index ffd096fffc1c..69ec96998bb9 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -23,26 +23,41 @@
# Makefile for Heterogenous System Architecture support for AMD GPU devices
#
-ccflags-y := -Idrivers/gpu/drm/amd/include/ \
- -Idrivers/gpu/drm/amd/include/asic_reg
-
-amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
- kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
- kfd_process.o kfd_queue.o kfd_mqd_manager.o \
- kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \
- kfd_mqd_manager_v9.o \
- kfd_kernel_queue.o kfd_kernel_queue_cik.o \
- kfd_kernel_queue_vi.o kfd_kernel_queue_v9.o \
- kfd_packet_manager.o kfd_process_queue_manager.o \
- kfd_device_queue_manager.o kfd_device_queue_manager_cik.o \
- kfd_device_queue_manager_vi.o kfd_device_queue_manager_v9.o \
- kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
- kfd_int_process_v9.o kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o
+AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
+ $(AMDKFD_PATH)/kfd_device.o \
+ $(AMDKFD_PATH)/kfd_chardev.o \
+ $(AMDKFD_PATH)/kfd_topology.o \
+ $(AMDKFD_PATH)/kfd_pasid.o \
+ $(AMDKFD_PATH)/kfd_doorbell.o \
+ $(AMDKFD_PATH)/kfd_flat_memory.o \
+ $(AMDKFD_PATH)/kfd_process.o \
+ $(AMDKFD_PATH)/kfd_queue.o \
+ $(AMDKFD_PATH)/kfd_mqd_manager.o \
+ $(AMDKFD_PATH)/kfd_mqd_manager_cik.o \
+ $(AMDKFD_PATH)/kfd_mqd_manager_vi.o \
+ $(AMDKFD_PATH)/kfd_mqd_manager_v9.o \
+ $(AMDKFD_PATH)/kfd_kernel_queue.o \
+ $(AMDKFD_PATH)/kfd_kernel_queue_cik.o \
+ $(AMDKFD_PATH)/kfd_kernel_queue_vi.o \
+ $(AMDKFD_PATH)/kfd_kernel_queue_v9.o \
+ $(AMDKFD_PATH)/kfd_packet_manager.o \
+ $(AMDKFD_PATH)/kfd_process_queue_manager.o \
+ $(AMDKFD_PATH)/kfd_device_queue_manager.o \
+ $(AMDKFD_PATH)/kfd_device_queue_manager_cik.o \
+ $(AMDKFD_PATH)/kfd_device_queue_manager_vi.o \
+ $(AMDKFD_PATH)/kfd_device_queue_manager_v9.o \
+ $(AMDKFD_PATH)/kfd_interrupt.o \
+ $(AMDKFD_PATH)/kfd_events.o \
+ $(AMDKFD_PATH)/cik_event_interrupt.o \
+ $(AMDKFD_PATH)/kfd_int_process_v9.o \
+ $(AMDKFD_PATH)/kfd_dbgdev.o \
+ $(AMDKFD_PATH)/kfd_dbgmgr.o \
+ $(AMDKFD_PATH)/kfd_crat.o
ifneq ($(CONFIG_AMD_IOMMU_V2),)
-amdkfd-y += kfd_iommu.o
+AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o
endif
-amdkfd-$(CONFIG_DEBUG_FS) += kfd_debugfs.o
-
-obj-$(CONFIG_HSA_AMD) += amdkfd.o
+ifneq ($(CONFIG_DEBUG_FS),)
+AMDKFD_FILES += $(AMDKFD_PATH)/kfd_debugfs.o
+endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 297b36c26a05..14d5b5fa822d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -447,6 +447,24 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
return retval;
}
+static int kfd_ioctl_get_queue_wave_state(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_get_queue_wave_state_args *args = data;
+ int r;
+
+ mutex_lock(&p->mutex);
+
+ r = pqm_get_wave_state(&p->pqm, args->queue_id,
+ (void __user *)args->ctl_stack_address,
+ &args->ctl_stack_used_size,
+ &args->save_area_used_size);
+
+ mutex_unlock(&p->mutex);
+
+ return r;
+}
+
static int kfd_ioctl_set_memory_policy(struct file *filep,
struct kfd_process *p, void *data)
{
@@ -1210,7 +1228,7 @@ err_unlock:
return ret;
}
-static bool kfd_dev_is_large_bar(struct kfd_dev *dev)
+bool kfd_dev_is_large_bar(struct kfd_dev *dev)
{
struct kfd_local_mem_info mem_info;
@@ -1615,6 +1633,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,
kfd_ioctl_set_cu_mask, 0),
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,
+ kfd_ioctl_get_queue_wave_state, 0)
+
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index ee4996029a86..56412b0e7e1c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -346,15 +346,15 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
struct list_head *device_list)
{
struct kfd_iolink_properties *props = NULL, *props2;
- struct kfd_topology_device *dev, *cpu_dev;
+ struct kfd_topology_device *dev, *to_dev;
uint32_t id_from;
uint32_t id_to;
id_from = iolink->proximity_domain_from;
id_to = iolink->proximity_domain_to;
- pr_debug("Found IO link entry in CRAT table with id_from=%d\n",
- id_from);
+ pr_debug("Found IO link entry in CRAT table with id_from=%d, id_to %d\n",
+ id_from, id_to);
list_for_each_entry(dev, device_list, list) {
if (id_from == dev->proximity_domain) {
props = kfd_alloc_struct(props);
@@ -369,6 +369,8 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
props->weight = 20;
+ else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
+ props->weight = 15;
else
props->weight = node_distance(id_from, id_to);
@@ -389,20 +391,23 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
/* CPU topology is created before GPUs are detected, so CPU->GPU
* links are not built at that time. If a PCIe type is discovered, it
* means a GPU is detected and we are adding GPU->CPU to the topology.
- * At this time, also add the corresponded CPU->GPU link.
+ * At this time, also add the corresponded CPU->GPU link if GPU
+ * is large bar.
+ * For xGMI, we only added the link with one direction in the crat
+ * table, add corresponded reversed direction link now.
*/
- if (props && props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) {
- cpu_dev = kfd_topology_device_by_proximity_domain(id_to);
- if (!cpu_dev)
+ if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) {
+ to_dev = kfd_topology_device_by_proximity_domain(id_to);
+ if (!to_dev)
return -ENODEV;
/* same everything but the other direction */
props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL);
props2->node_from = id_to;
props2->node_to = id_from;
props2->kobj = NULL;
- cpu_dev->io_link_count++;
- cpu_dev->node_props.io_links_count++;
- list_add_tail(&props2->list, &cpu_dev->io_link_props);
+ to_dev->io_link_count++;
+ to_dev->node_props.io_links_count++;
+ list_add_tail(&props2->list, &to_dev->io_link_props);
}
return 0;
@@ -642,6 +647,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
break;
case CHIP_VEGA10:
+ case CHIP_VEGA20:
pcache_info = vega10_cache_info;
num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
break;
@@ -1037,7 +1043,7 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
*
* Return 0 if successful else return -ve value
*/
-static int kfd_fill_gpu_direct_io_link(int *avail_size,
+static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
struct kfd_dev *kdev,
struct crat_subtype_iolink *sub_type_hdr,
uint32_t proximity_domain)
@@ -1052,6 +1058,8 @@ static int kfd_fill_gpu_direct_io_link(int *avail_size,
sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;
+ if (kfd_dev_is_large_bar(kdev))
+ sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
/* Fill in IOLINK subtype.
* TODO: Fill-in other fields of iolink subtype
@@ -1069,6 +1077,29 @@ static int kfd_fill_gpu_direct_io_link(int *avail_size,
return 0;
}
+static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
+ struct kfd_dev *kdev,
+ struct crat_subtype_iolink *sub_type_hdr,
+ uint32_t proximity_domain_from,
+ uint32_t proximity_domain_to)
+{
+ *avail_size -= sizeof(struct crat_subtype_iolink);
+ if (*avail_size < 0)
+ return -ENOMEM;
+
+ memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
+
+ sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
+ sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
+ sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED |
+ CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
+
+ sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
+ sub_type_hdr->proximity_domain_from = proximity_domain_from;
+ sub_type_hdr->proximity_domain_to = proximity_domain_to;
+ return 0;
+}
+
/* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU
*
* @pcrat_image: Fill in VCRAT for GPU
@@ -1081,14 +1112,16 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
{
struct crat_header *crat_table = (struct crat_header *)pcrat_image;
struct crat_subtype_generic *sub_type_hdr;
+ struct kfd_local_mem_info local_mem_info;
+ struct kfd_topology_device *peer_dev;
struct crat_subtype_computeunit *cu;
struct kfd_cu_info cu_info;
int avail_size = *size;
uint32_t total_num_of_cu;
int num_of_cache_entries = 0;
int cache_mem_filled = 0;
+ uint32_t nid = 0;
int ret = 0;
- struct kfd_local_mem_info local_mem_info;
if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU)
return -EINVAL;
@@ -1212,7 +1245,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
*/
sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
cache_mem_filled);
- ret = kfd_fill_gpu_direct_io_link(&avail_size, kdev,
+ ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev,
(struct crat_subtype_iolink *)sub_type_hdr, proximity_domain);
if (ret < 0)
@@ -1221,6 +1254,35 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
crat_table->length += sub_type_hdr->length;
crat_table->total_entries++;
+
+ /* Fill in Subtype: IO_LINKS
+ * Direct links from GPU to other GPUs through xGMI.
+ * We will loop GPUs that already be processed (with lower value
+ * of proximity_domain), add the link for the GPUs with same
+ * hive id (from this GPU to other GPU) . The reversed iolink
+ * (from other GPU to this GPU) will be added
+ * in kfd_parse_subtype_iolink.
+ */
+ if (kdev->hive_id) {
+ for (nid = 0; nid < proximity_domain; ++nid) {
+ peer_dev = kfd_topology_device_by_proximity_domain(nid);
+ if (!peer_dev->gpu)
+ continue;
+ if (peer_dev->gpu->hive_id != kdev->hive_id)
+ continue;
+ sub_type_hdr = (typeof(sub_type_hdr))(
+ (char *)sub_type_hdr +
+ sizeof(struct crat_subtype_iolink));
+ ret = kfd_fill_gpu_xgmi_link_to_gpu(
+ &avail_size, kdev,
+ (struct crat_subtype_iolink *)sub_type_hdr,
+ proximity_domain, nid);
+ if (ret < 0)
+ return ret;
+ crat_table->length += sub_type_hdr->length;
+ crat_table->total_entries++;
+ }
+ }
*size = crat_table->length;
pr_info("Virtual CRAT table created for GPU\n");
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
index b5cd182b9edd..7c3f192fe25f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
@@ -232,7 +232,8 @@ struct crat_subtype_ccompute {
#define CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT (1 << 2)
#define CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT (1 << 3)
#define CRAT_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA (1 << 4)
-#define CRAT_IOLINK_FLAGS_RESERVED_MASK 0xffffffe0
+#define CRAT_IOLINK_FLAGS_BI_DIRECTIONAL (1 << 31)
+#define CRAT_IOLINK_FLAGS_RESERVED_MASK 0x7fffffe0
/*
* IO interface types
@@ -248,7 +249,12 @@ struct crat_subtype_ccompute {
#define CRAT_IOLINK_TYPE_RAPID_IO 8
#define CRAT_IOLINK_TYPE_INFINIBAND 9
#define CRAT_IOLINK_TYPE_RESERVED3 10
-#define CRAT_IOLINK_TYPE_OTHER 11
+#define CRAT_IOLINK_TYPE_XGMI 11
+#define CRAT_IOLINK_TYPE_XGOP 12
+#define CRAT_IOLINK_TYPE_GZ 13
+#define CRAT_IOLINK_TYPE_ETHERNET_RDMA 14
+#define CRAT_IOLINK_TYPE_RDMA_OTHER 15
+#define CRAT_IOLINK_TYPE_OTHER 16
#define CRAT_IOLINK_TYPE_MAX 255
#define CRAT_IOLINK_RESERVED_LENGTH 24
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 29ac74f40dce..a9f18ea7e354 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -53,6 +53,7 @@ static const struct kfd_device_info kaveri_device_info = {
.needs_iommu_device = true,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_sdma_queues_per_engine = 2,
};
static const struct kfd_device_info carrizo_device_info = {
@@ -69,6 +70,7 @@ static const struct kfd_device_info carrizo_device_info = {
.needs_iommu_device = true,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_sdma_queues_per_engine = 2,
};
static const struct kfd_device_info raven_device_info = {
@@ -84,6 +86,7 @@ static const struct kfd_device_info raven_device_info = {
.needs_iommu_device = true,
.needs_pci_atomics = true,
.num_sdma_engines = 1,
+ .num_sdma_queues_per_engine = 2,
};
#endif
@@ -101,6 +104,7 @@ static const struct kfd_device_info hawaii_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_sdma_queues_per_engine = 2,
};
static const struct kfd_device_info tonga_device_info = {
@@ -116,21 +120,7 @@ static const struct kfd_device_info tonga_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
-};
-
-static const struct kfd_device_info tonga_vf_device_info = {
- .asic_family = CHIP_TONGA,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = false,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
+ .num_sdma_queues_per_engine = 2,
};
static const struct kfd_device_info fiji_device_info = {
@@ -146,6 +136,7 @@ static const struct kfd_device_info fiji_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
+ .num_sdma_queues_per_engine = 2,
};
static const struct kfd_device_info fiji_vf_device_info = {
@@ -161,6 +152,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_sdma_queues_per_engine = 2,
};
@@ -177,6 +169,7 @@ static const struct kfd_device_info polaris10_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
+ .num_sdma_queues_per_engine = 2,
};
static const struct kfd_device_info polaris10_vf_device_info = {
@@ -192,6 +185,7 @@ static const struct kfd_device_info polaris10_vf_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_sdma_queues_per_engine = 2,
};
static const struct kfd_device_info polaris11_device_info = {
@@ -207,6 +201,7 @@ static const struct kfd_device_info polaris11_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
+ .num_sdma_queues_per_engine = 2,
};
static const struct kfd_device_info vega10_device_info = {
@@ -222,6 +217,7 @@ static const struct kfd_device_info vega10_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_sdma_queues_per_engine = 2,
};
static const struct kfd_device_info vega10_vf_device_info = {
@@ -237,8 +233,24 @@ static const struct kfd_device_info vega10_vf_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_sdma_queues_per_engine = 2,
};
+static const struct kfd_device_info vega20_device_info = {
+ .asic_family = CHIP_VEGA20,
+ .max_pasid_bits = 16,
+ .max_no_of_hqd = 24,
+ .doorbell_size = 8,
+ .ih_ring_entry_size = 8 * sizeof(uint32_t),
+ .event_interrupt_class = &event_interrupt_class_v9,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .supports_cwsr = true,
+ .needs_iommu_device = false,
+ .needs_pci_atomics = false,
+ .num_sdma_engines = 2,
+ .num_sdma_queues_per_engine = 8,
+};
struct kfd_deviceid {
unsigned short did;
@@ -293,7 +305,6 @@ static const struct kfd_deviceid supported_devices[] = {
{ 0x6928, &tonga_device_info }, /* Tonga */
{ 0x6929, &tonga_device_info }, /* Tonga */
{ 0x692B, &tonga_device_info }, /* Tonga */
- { 0x692F, &tonga_vf_device_info }, /* Tonga vf */
{ 0x6938, &tonga_device_info }, /* Tonga */
{ 0x6939, &tonga_device_info }, /* Tonga */
{ 0x7300, &fiji_device_info }, /* Fiji */
@@ -328,6 +339,12 @@ static const struct kfd_deviceid supported_devices[] = {
{ 0x6868, &vega10_device_info }, /* Vega10 */
{ 0x686C, &vega10_vf_device_info }, /* Vega10 vf*/
{ 0x687F, &vega10_device_info }, /* Vega10 */
+ { 0x66a0, &vega20_device_info }, /* Vega20 */
+ { 0x66a1, &vega20_device_info }, /* Vega20 */
+ { 0x66a2, &vega20_device_info }, /* Vega20 */
+ { 0x66a3, &vega20_device_info }, /* Vega20 */
+ { 0x66a7, &vega20_device_info }, /* Vega20 */
+ { 0x66af, &vega20_device_info } /* Vega20 */
};
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
@@ -366,6 +383,10 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
return NULL;
}
+ kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
+ if (!kfd)
+ return NULL;
+
/* Allow BIF to recode atomics to PCIe 3.0 AtomicOps.
* 32 and 64-bit requests are possible and must be
* supported.
@@ -377,12 +398,10 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
dev_info(kfd_device,
"skipped device %x:%x, PCI rejects atomics\n",
pdev->vendor, pdev->device);
+ kfree(kfd);
return NULL;
- }
-
- kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
- if (!kfd)
- return NULL;
+ } else if (!ret)
+ kfd->pci_atomic_requested = true;
kfd->kgd = kgd;
kfd->device_info = device_info;
@@ -419,6 +438,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
{
unsigned int size;
+ kfd->mec_fw_version = kfd->kfd2kgd->get_fw_version(kfd->kgd,
+ KGD_ENGINE_MEC1);
+ kfd->sdma_fw_version = kfd->kfd2kgd->get_fw_version(kfd->kgd,
+ KGD_ENGINE_SDMA1);
kfd->shared_resources = *gpu_resources;
kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1;
@@ -477,6 +500,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto kfd_doorbell_error;
}
+ if (kfd->kfd2kgd->get_hive_id)
+ kfd->hive_id = kfd->kfd2kgd->get_hive_id(kfd->kgd);
+
if (kfd_topology_add_device(kfd)) {
dev_err(kfd_device, "Error adding device to topology\n");
goto kfd_topology_add_device_error;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 4f22e745df51..a3b933967171 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -109,7 +109,7 @@ static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
{
return dqm->dev->device_info->num_sdma_engines
- * KFD_SDMA_QUEUES_PER_ENGINE;
+ * dqm->dev->device_info->num_sdma_queues_per_engine;
}
void program_sh_mem_settings(struct device_queue_manager *dqm,
@@ -667,7 +667,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
struct queue *q;
struct mqd_manager *mqd_mgr;
struct kfd_process_device *pdd;
- uint32_t pd_base;
+ uint64_t pd_base;
int retval = 0;
pdd = qpd_to_pdd(qpd);
@@ -687,7 +687,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
/* Update PD Base in QPD */
qpd->page_table_base = pd_base;
- pr_debug("Updated PD address to 0x%08x\n", pd_base);
+ pr_debug("Updated PD address to 0x%llx\n", pd_base);
if (!list_empty(&qpd->queues_list)) {
dqm->dev->kfd2kgd->set_vm_context_page_table_base(
@@ -738,7 +738,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
{
struct queue *q;
struct kfd_process_device *pdd;
- uint32_t pd_base;
+ uint64_t pd_base;
int retval = 0;
pdd = qpd_to_pdd(qpd);
@@ -758,7 +758,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
/* Update PD Base in QPD */
qpd->page_table_base = pd_base;
- pr_debug("Updated PD address to 0x%08x\n", pd_base);
+ pr_debug("Updated PD address to 0x%llx\n", pd_base);
/* activate all active queues on the qpd */
list_for_each_entry(q, &qpd->queues_list, list) {
@@ -782,7 +782,7 @@ static int register_process(struct device_queue_manager *dqm,
{
struct device_process_node *n;
struct kfd_process_device *pdd;
- uint32_t pd_base;
+ uint64_t pd_base;
int retval;
n = kzalloc(sizeof(*n), GFP_KERNEL);
@@ -800,6 +800,7 @@ static int register_process(struct device_queue_manager *dqm,
/* Update PD Base in QPD */
qpd->page_table_base = pd_base;
+ pr_debug("Updated PD address to 0x%llx\n", pd_base);
retval = dqm->asic_ops.update_qpd(dqm, qpd);
@@ -1363,9 +1364,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
{
int retval;
struct mqd_manager *mqd_mgr;
- bool preempt_all_queues;
-
- preempt_all_queues = false;
retval = 0;
@@ -1549,6 +1547,41 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
return retval;
}
+static int get_wave_state(struct device_queue_manager *dqm,
+ struct queue *q,
+ void __user *ctl_stack,
+ u32 *ctl_stack_used_size,
+ u32 *save_area_used_size)
+{
+ struct mqd_manager *mqd;
+ int r;
+
+ dqm_lock(dqm);
+
+ if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
+ q->properties.is_active || !q->device->cwsr_enabled) {
+ r = -EINVAL;
+ goto dqm_unlock;
+ }
+
+ mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
+ if (!mqd) {
+ r = -ENOMEM;
+ goto dqm_unlock;
+ }
+
+ if (!mqd->get_wave_state) {
+ r = -EINVAL;
+ goto dqm_unlock;
+ }
+
+ r = mqd->get_wave_state(mqd, q->mqd, ctl_stack, ctl_stack_used_size,
+ save_area_used_size);
+
+dqm_unlock:
+ dqm_unlock(dqm);
+ return r;
+}
static int process_termination_cpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
@@ -1670,6 +1703,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.process_termination = process_termination_cpsch;
dqm->ops.evict_process_queues = evict_process_queues_cpsch;
dqm->ops.restore_process_queues = restore_process_queues_cpsch;
+ dqm->ops.get_wave_state = get_wave_state;
break;
case KFD_SCHED_POLICY_NO_HWS:
/* initialize dqm for no cp scheduling */
@@ -1689,6 +1723,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
dqm->ops.restore_process_queues =
restore_process_queues_nocpsch;
+ dqm->ops.get_wave_state = get_wave_state;
break;
default:
pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
@@ -1716,6 +1751,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
break;
case CHIP_VEGA10:
+ case CHIP_VEGA20:
case CHIP_RAVEN:
device_queue_manager_init_v9(&dqm->asic_ops);
break;
@@ -1827,7 +1863,9 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
}
for (pipe = 0; pipe < get_num_sdma_engines(dqm); pipe++) {
- for (queue = 0; queue < KFD_SDMA_QUEUES_PER_ENGINE; queue++) {
+ for (queue = 0;
+ queue < dqm->dev->device_info->num_sdma_queues_per_engine;
+ queue++) {
r = dqm->dev->kfd2kgd->hqd_sdma_dump(
dqm->dev->kgd, pipe, queue, &dump, &n_regs);
if (r)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 00da3169a004..70e38a2e23b9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -33,7 +33,6 @@
#define KFD_UNMAP_LATENCY_MS (4000)
#define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS (2 * KFD_UNMAP_LATENCY_MS + 1000)
-#define KFD_SDMA_QUEUES_PER_ENGINE (2)
struct device_process_node {
struct qcm_process_device *qpd;
@@ -82,6 +81,8 @@ struct device_process_node {
*
* @restore_process_queues: Restore all evicted queues queues of a process
*
+ * @get_wave_state: Retrieves context save state and optionally copies the
+ * control stack, if kept in the MQD, to the given userspace address.
*/
struct device_queue_manager_ops {
@@ -137,6 +138,12 @@ struct device_queue_manager_ops {
struct qcm_process_device *qpd);
int (*restore_process_queues)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
+
+ int (*get_wave_state)(struct device_queue_manager *dqm,
+ struct queue *q,
+ void __user *ctl_stack,
+ u32 *ctl_stack_used_size,
+ u32 *save_area_used_size);
};
struct device_queue_manager_asic_ops {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 97d5423c5673..3d66cec414af 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -400,6 +400,7 @@ int kfd_init_apertures(struct kfd_process *process)
kfd_init_apertures_vi(pdd, id);
break;
case CHIP_VEGA10:
+ case CHIP_VEGA20:
case CHIP_RAVEN:
kfd_init_apertures_v9(pdd, id);
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 9f84b4d9fb88..6c31f7370193 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -322,6 +322,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
break;
case CHIP_VEGA10:
+ case CHIP_VEGA20:
case CHIP_RAVEN:
kernel_queue_init_v9(&kq->ops_asic_specific);
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
index 684a3bf07efd..33830b1a5a54 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
@@ -71,8 +71,7 @@ static int pm_map_process_v9(struct packet_manager *pm,
uint32_t *buffer, struct qcm_process_device *qpd)
{
struct pm4_mes_map_process *packet;
- uint64_t vm_page_table_base_addr =
- (uint64_t)(qpd->page_table_base) << 12;
+ uint64_t vm_page_table_base_addr = qpd->page_table_base;
packet = (struct pm4_mes_map_process *)buffer;
memset(buffer, 0, sizeof(struct pm4_mes_map_process));
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index 6e1f5c7c2d4b..8018163414ff 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -20,21 +20,10 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <linux/module.h>
#include <linux/sched.h>
-#include <linux/moduleparam.h>
#include <linux/device.h>
-#include <linux/printk.h>
#include "kfd_priv.h"
-#define KFD_DRIVER_AUTHOR "AMD Inc. and others"
-
-#define KFD_DRIVER_DESC "Standalone HSA driver for AMD's GPUs"
-#define KFD_DRIVER_DATE "20150421"
-#define KFD_DRIVER_MAJOR 0
-#define KFD_DRIVER_MINOR 7
-#define KFD_DRIVER_PATCHLEVEL 2
-
static const struct kgd2kfd_calls kgd2kfd = {
.exit = kgd2kfd_exit,
.probe = kgd2kfd_probe,
@@ -51,77 +40,7 @@ static const struct kgd2kfd_calls kgd2kfd = {
.post_reset = kgd2kfd_post_reset,
};
-int sched_policy = KFD_SCHED_POLICY_HWS;
-module_param(sched_policy, int, 0444);
-MODULE_PARM_DESC(sched_policy,
- "Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
-
-int hws_max_conc_proc = 8;
-module_param(hws_max_conc_proc, int, 0444);
-MODULE_PARM_DESC(hws_max_conc_proc,
- "Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))");
-
-int cwsr_enable = 1;
-module_param(cwsr_enable, int, 0444);
-MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = off, 1 = on (default))");
-
-int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
-module_param(max_num_of_queues_per_device, int, 0444);
-MODULE_PARM_DESC(max_num_of_queues_per_device,
- "Maximum number of supported queues per device (1 = Minimum, 4096 = default)");
-
-int send_sigterm;
-module_param(send_sigterm, int, 0444);
-MODULE_PARM_DESC(send_sigterm,
- "Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
-
-int debug_largebar;
-module_param(debug_largebar, int, 0444);
-MODULE_PARM_DESC(debug_largebar,
- "Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)");
-
-int ignore_crat;
-module_param(ignore_crat, int, 0444);
-MODULE_PARM_DESC(ignore_crat,
- "Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)");
-
-int noretry;
-module_param(noretry, int, 0644);
-MODULE_PARM_DESC(noretry,
- "Set sh_mem_config.retry_disable on GFXv9+ dGPUs (0 = retry enabled (default), 1 = retry disabled)");
-
-int halt_if_hws_hang;
-module_param(halt_if_hws_hang, int, 0644);
-MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
-
-
-static int amdkfd_init_completed;
-
-
-int kgd2kfd_init(unsigned int interface_version,
- const struct kgd2kfd_calls **g2f)
-{
- if (!amdkfd_init_completed)
- return -EPROBE_DEFER;
-
- /*
- * Only one interface version is supported,
- * no kfd/kgd version skew allowed.
- */
- if (interface_version != KFD_INTERFACE_VERSION)
- return -EINVAL;
-
- *g2f = &kgd2kfd;
-
- return 0;
-}
-EXPORT_SYMBOL(kgd2kfd_init);
-
-void kgd2kfd_exit(void)
-{
-}
-
-static int __init kfd_module_init(void)
+static int kfd_init(void)
{
int err;
@@ -129,7 +48,7 @@ static int __init kfd_module_init(void)
if ((sched_policy < KFD_SCHED_POLICY_HWS) ||
(sched_policy > KFD_SCHED_POLICY_NO_HWS)) {
pr_err("sched_policy has invalid value\n");
- return -1;
+ return -EINVAL;
}
/* Verify module parameters */
@@ -137,7 +56,7 @@ static int __init kfd_module_init(void)
(max_num_of_queues_per_device >
KFD_MAX_NUM_OF_QUEUES_PER_DEVICE)) {
pr_err("max_num_of_queues_per_device must be between 1 to KFD_MAX_NUM_OF_QUEUES_PER_DEVICE\n");
- return -1;
+ return -EINVAL;
}
err = kfd_chardev_init();
@@ -154,10 +73,6 @@ static int __init kfd_module_init(void)
kfd_debugfs_init();
- amdkfd_init_completed = 1;
-
- dev_info(kfd_device, "Initialized module\n");
-
return 0;
err_create_wq:
@@ -168,23 +83,30 @@ err_ioctl:
return err;
}
-static void __exit kfd_module_exit(void)
+static void kfd_exit(void)
{
- amdkfd_init_completed = 0;
-
kfd_debugfs_fini();
kfd_process_destroy_wq();
kfd_topology_shutdown();
kfd_chardev_exit();
- pr_info("amdkfd: Removed module\n");
}
-module_init(kfd_module_init);
-module_exit(kfd_module_exit);
+int kgd2kfd_init(unsigned int interface_version,
+ const struct kgd2kfd_calls **g2f)
+{
+ int err;
+
+ err = kfd_init();
+ if (err)
+ return err;
+
+ *g2f = &kgd2kfd;
+
+ return 0;
+}
+EXPORT_SYMBOL(kgd2kfd_init);
-MODULE_AUTHOR(KFD_DRIVER_AUTHOR);
-MODULE_DESCRIPTION(KFD_DRIVER_DESC);
-MODULE_LICENSE("GPL and additional rights");
-MODULE_VERSION(__stringify(KFD_DRIVER_MAJOR) "."
- __stringify(KFD_DRIVER_MINOR) "."
- __stringify(KFD_DRIVER_PATCHLEVEL));
+void kgd2kfd_exit(void)
+{
+ kfd_exit();
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index 3bc25ab84f34..e33019a7a883 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -39,6 +39,7 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
case CHIP_POLARIS11:
return mqd_manager_init_vi_tonga(type, dev);
case CHIP_VEGA10:
+ case CHIP_VEGA20:
case CHIP_RAVEN:
return mqd_manager_init_v9(type, dev);
default:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index 4e84052d4e21..f8261313ae7b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -43,6 +43,9 @@
*
* @is_occupied: Checks if the relevant HQD slot is occupied.
*
+ * @get_wave_state: Retrieves context save state and optionally copies the
+ * control stack, if kept in the MQD, to the given userspace address.
+ *
* @mqd_mutex: Mqd manager mutex.
*
* @dev: The kfd device structure coupled with this module.
@@ -85,6 +88,11 @@ struct mqd_manager {
uint64_t queue_address, uint32_t pipe_id,
uint32_t queue_id);
+ int (*get_wave_state)(struct mqd_manager *mm, void *mqd,
+ void __user *ctl_stack,
+ u32 *ctl_stack_used_size,
+ u32 *save_area_used_size);
+
#if defined(CONFIG_DEBUG_FS)
int (*debugfs_show_mqd)(struct seq_file *m, void *data);
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 0cedb37cf513..f381c1cb27bd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -266,6 +266,28 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd,
pipe_id, queue_id);
}
+static int get_wave_state(struct mqd_manager *mm, void *mqd,
+ void __user *ctl_stack,
+ u32 *ctl_stack_used_size,
+ u32 *save_area_used_size)
+{
+ struct v9_mqd *m;
+
+ /* Control stack is located one page after MQD. */
+ void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
+
+ m = get_mqd(mqd);
+
+ *ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
+ m->cp_hqd_cntl_stack_offset;
+ *save_area_used_size = m->cp_hqd_wg_state_offset;
+
+ if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size))
+ return -EFAULT;
+
+ return 0;
+}
+
static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -435,6 +457,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
+ mqd->get_wave_state = get_wave_state;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index b81fda3754da..6469b3456f00 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -269,6 +269,28 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd,
pipe_id, queue_id);
}
+static int get_wave_state(struct mqd_manager *mm, void *mqd,
+ void __user *ctl_stack,
+ u32 *ctl_stack_used_size,
+ u32 *save_area_used_size)
+{
+ struct vi_mqd *m;
+
+ m = get_mqd(mqd);
+
+ *ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
+ m->cp_hqd_cntl_stack_offset;
+ *save_area_used_size = m->cp_hqd_wg_state_offset -
+ m->cp_hqd_cntl_stack_size;
+
+ /* Control stack is not copied to user mode for GFXv8 because
+ * it's part of the context save area that is already
+ * accessible to user mode
+ */
+
+ return 0;
+}
+
static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -436,6 +458,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
+ mqd->get_wave_state = get_wave_state;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 1092631765cb..c6080ed3b6a7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -229,6 +229,7 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
pm->pmf = &kfd_vi_pm_funcs;
break;
case CHIP_VEGA10:
+ case CHIP_VEGA20:
case CHIP_RAVEN:
pm->pmf = &kfd_v9_pm_funcs;
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 92b285ca73aa..53ff86d45d91 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -103,7 +103,6 @@
*/
extern int max_num_of_queues_per_device;
-#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT 4096
#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE \
(KFD_MAX_NUM_OF_PROCESSES * \
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
@@ -149,33 +148,6 @@ extern int noretry;
*/
extern int halt_if_hws_hang;
-/**
- * enum kfd_sched_policy
- *
- * @KFD_SCHED_POLICY_HWS: H/W scheduling policy known as command processor (cp)
- * scheduling. In this scheduling mode we're using the firmware code to
- * schedule the user mode queues and kernel queues such as HIQ and DIQ.
- * the HIQ queue is used as a special queue that dispatches the configuration
- * to the cp and the user mode queues list that are currently running.
- * the DIQ queue is a debugging queue that dispatches debugging commands to the
- * firmware.
- * in this scheduling mode user mode queues over subscription feature is
- * enabled.
- *
- * @KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: The same as above but the over
- * subscription feature disabled.
- *
- * @KFD_SCHED_POLICY_NO_HWS: no H/W scheduling policy is a mode which directly
- * set the command processor registers and sets the queues "manually". This
- * mode is used *ONLY* for debugging proposes.
- *
- */
-enum kfd_sched_policy {
- KFD_SCHED_POLICY_HWS = 0,
- KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION,
- KFD_SCHED_POLICY_NO_HWS
-};
-
enum cache_policy {
cache_policy_coherent,
cache_policy_noncoherent
@@ -204,6 +176,7 @@ struct kfd_device_info {
bool needs_iommu_device;
bool needs_pci_atomics;
unsigned int num_sdma_engines;
+ unsigned int num_sdma_queues_per_engine;
};
struct kfd_mem_obj {
@@ -275,6 +248,10 @@ struct kfd_dev {
/* Debug manager */
struct kfd_dbgmgr *dbgmgr;
+ /* Firmware versions */
+ uint16_t mec_fw_version;
+ uint16_t sdma_fw_version;
+
/* Maximum process number mapped to HW scheduler */
unsigned int max_proc_per_quantum;
@@ -282,6 +259,11 @@ struct kfd_dev {
bool cwsr_enabled;
const void *cwsr_isa;
unsigned int cwsr_isa_size;
+
+ /* xGMI */
+ uint64_t hive_id;
+
+ bool pci_atomic_requested;
};
/* KGD2KFD callbacks */
@@ -525,11 +507,11 @@ struct qcm_process_device {
* All the memory management data should be here too
*/
uint64_t gds_context_area;
+ uint64_t page_table_base;
uint32_t sh_mem_config;
uint32_t sh_mem_bases;
uint32_t sh_mem_ape1_base;
uint32_t sh_mem_ape1_limit;
- uint32_t page_table_base;
uint32_t gds_size;
uint32_t num_gws;
uint32_t num_oac;
@@ -721,6 +703,7 @@ struct amdkfd_ioctl_desc {
unsigned int cmd_drv;
const char *name;
};
+bool kfd_dev_is_large_bar(struct kfd_dev *dev);
int kfd_process_create_wq(void);
void kfd_process_destroy_wq(void);
@@ -880,6 +863,11 @@ int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
struct queue_properties *p);
struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
unsigned int qid);
+int pqm_get_wave_state(struct process_queue_manager *pqm,
+ unsigned int qid,
+ void __user *ctl_stack,
+ u32 *ctl_stack_used_size,
+ u32 *save_area_used_size);
int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
unsigned int fence_value,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 4694386cc623..0039e451d9af 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -322,8 +322,10 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n",
pdd->dev->id, p->pasid);
- if (pdd->drm_file)
+ if (pdd->drm_file) {
+ pdd->dev->kfd2kgd->release_process_vm(pdd->dev->kgd, pdd->vm);
fput(pdd->drm_file);
+ }
else if (pdd->vm)
pdd->dev->kfd2kgd->destroy_process_vm(
pdd->dev->kgd, pdd->vm);
@@ -687,11 +689,11 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
if (drm_file)
ret = dev->kfd2kgd->acquire_process_vm(
- dev->kgd, drm_file,
+ dev->kgd, drm_file, p->pasid,
&pdd->vm, &p->kgd_process_info, &p->ef);
else
ret = dev->kfd2kgd->create_process_vm(
- dev->kgd, &pdd->vm, &p->kgd_process_info, &p->ef);
+ dev->kgd, p->pasid, &pdd->vm, &p->kgd_process_info, &p->ef);
if (ret) {
pr_err("Failed to create process VM object\n");
return ret;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index c8cad9c078ae..fcaaf93681ac 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -408,6 +408,28 @@ struct kernel_queue *pqm_get_kernel_queue(
return NULL;
}
+int pqm_get_wave_state(struct process_queue_manager *pqm,
+ unsigned int qid,
+ void __user *ctl_stack,
+ u32 *ctl_stack_used_size,
+ u32 *save_area_used_size)
+{
+ struct process_queue_node *pqn;
+
+ pqn = get_queue_by_qid(pqm, qid);
+ if (!pqn) {
+ pr_debug("amdkfd: No queue %d exists for operation\n",
+ qid);
+ return -EFAULT;
+ }
+
+ return pqn->q->device->dqm->ops.get_wave_state(pqn->q->device->dqm,
+ pqn->q,
+ ctl_stack,
+ ctl_stack_used_size,
+ save_area_used_size);
+}
+
#if defined(CONFIG_DEBUG_FS)
int pqm_debugfs_mqds(struct seq_file *m, void *data)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 80f5db4ef75f..e3843c5929ed 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -454,6 +454,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->node_props.location_id);
sysfs_show_32bit_prop(buffer, "drm_render_minor",
dev->node_props.drm_render_minor);
+ sysfs_show_64bit_prop(buffer, "hive_id",
+ dev->node_props.hive_id);
if (dev->gpu) {
log_max_watch_addr =
@@ -480,11 +482,11 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
(unsigned long long int) 0);
sysfs_show_32bit_prop(buffer, "fw_version",
- dev->gpu->kfd2kgd->get_fw_version(
- dev->gpu->kgd,
- KGD_ENGINE_MEC1));
+ dev->gpu->mec_fw_version);
sysfs_show_32bit_prop(buffer, "capability",
dev->node_props.capability);
+ sysfs_show_32bit_prop(buffer, "sdma_fw_version",
+ dev->gpu->sdma_fw_version);
}
return sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute",
@@ -1125,17 +1127,40 @@ static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev)
static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
{
- struct kfd_iolink_properties *link;
+ struct kfd_iolink_properties *link, *cpu_link;
+ struct kfd_topology_device *cpu_dev;
+ uint32_t cap;
+ uint32_t cpu_flag = CRAT_IOLINK_FLAGS_ENABLED;
+ uint32_t flag = CRAT_IOLINK_FLAGS_ENABLED;
if (!dev || !dev->gpu)
return;
- /* GPU only creates direck links so apply flags setting to all */
- if (dev->gpu->device_info->asic_family == CHIP_HAWAII)
- list_for_each_entry(link, &dev->io_link_props, list)
- link->flags = CRAT_IOLINK_FLAGS_ENABLED |
- CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
- CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
+ pcie_capability_read_dword(dev->gpu->pdev,
+ PCI_EXP_DEVCAP2, &cap);
+
+ if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+ PCI_EXP_DEVCAP2_ATOMIC_COMP64)))
+ cpu_flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
+ CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
+
+ if (!dev->gpu->pci_atomic_requested ||
+ dev->gpu->device_info->asic_family == CHIP_HAWAII)
+ flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
+ CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
+
+ /* GPU only creates direct links so apply flags setting to all */
+ list_for_each_entry(link, &dev->io_link_props, list) {
+ link->flags = flag;
+ cpu_dev = kfd_topology_device_by_proximity_domain(
+ link->node_to);
+ if (cpu_dev) {
+ list_for_each_entry(cpu_link,
+ &cpu_dev->io_link_props, list)
+ if (cpu_link->node_to == link->node_from)
+ cpu_link->flags = cpu_flag;
+ }
+ }
}
int kfd_topology_add_device(struct kfd_dev *gpu)
@@ -1230,6 +1255,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
dev->node_props.drm_render_minor =
gpu->shared_resources.drm_render_minor;
+ dev->node_props.hive_id = gpu->hive_id;
+
kfd_fill_mem_clk_max_info(dev);
kfd_fill_iolink_non_crat_info(dev);
@@ -1251,6 +1278,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
break;
case CHIP_VEGA10:
+ case CHIP_VEGA20:
case CHIP_RAVEN:
dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 7d9c3f948dff..92a19be07344 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -49,6 +49,7 @@
#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000
struct kfd_node_properties {
+ uint64_t hive_id;
uint32_t cpu_cores_count;
uint32_t simd_count;
uint32_t mem_banks_count;
OpenPOWER on IntegriCloud