summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c94
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c22
-rw-r--r--include/uapi/linux/kfd_ioctl.h27
3 files changed, 128 insertions, 15 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 6fe24964540b..7d4009418ec3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -825,6 +825,97 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
return 0;
}
+static int kfd_ioctl_get_process_apertures_new(struct file *filp,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_get_process_apertures_new_args *args = data;
+ struct kfd_process_device_apertures *pa;
+ struct kfd_process_device *pdd;
+ uint32_t nodes = 0;
+ int ret;
+
+ dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
+
+ if (args->num_of_nodes == 0) {
+ /* Return number of nodes, so that user space can alloacate
+ * sufficient memory
+ */
+ mutex_lock(&p->mutex);
+
+ if (!kfd_has_process_device_data(p))
+ goto out_unlock;
+
+ /* Run over all pdd of the process */
+ pdd = kfd_get_first_process_device_data(p);
+ do {
+ args->num_of_nodes++;
+ pdd = kfd_get_next_process_device_data(p, pdd);
+ } while (pdd);
+
+ goto out_unlock;
+ }
+
+ /* Fill in process-aperture information for all available
+ * nodes, but not more than args->num_of_nodes as that is
+ * the amount of memory allocated by user
+ */
+ pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
+ args->num_of_nodes), GFP_KERNEL);
+ if (!pa)
+ return -ENOMEM;
+
+ mutex_lock(&p->mutex);
+
+ if (!kfd_has_process_device_data(p)) {
+ args->num_of_nodes = 0;
+ kfree(pa);
+ goto out_unlock;
+ }
+
+ /* Run over all pdd of the process */
+ pdd = kfd_get_first_process_device_data(p);
+ do {
+ pa[nodes].gpu_id = pdd->dev->id;
+ pa[nodes].lds_base = pdd->lds_base;
+ pa[nodes].lds_limit = pdd->lds_limit;
+ pa[nodes].gpuvm_base = pdd->gpuvm_base;
+ pa[nodes].gpuvm_limit = pdd->gpuvm_limit;
+ pa[nodes].scratch_base = pdd->scratch_base;
+ pa[nodes].scratch_limit = pdd->scratch_limit;
+
+ dev_dbg(kfd_device,
+ "gpu id %u\n", pdd->dev->id);
+ dev_dbg(kfd_device,
+ "lds_base %llX\n", pdd->lds_base);
+ dev_dbg(kfd_device,
+ "lds_limit %llX\n", pdd->lds_limit);
+ dev_dbg(kfd_device,
+ "gpuvm_base %llX\n", pdd->gpuvm_base);
+ dev_dbg(kfd_device,
+ "gpuvm_limit %llX\n", pdd->gpuvm_limit);
+ dev_dbg(kfd_device,
+ "scratch_base %llX\n", pdd->scratch_base);
+ dev_dbg(kfd_device,
+ "scratch_limit %llX\n", pdd->scratch_limit);
+ nodes++;
+
+ pdd = kfd_get_next_process_device_data(p, pdd);
+ } while (pdd && (nodes < args->num_of_nodes));
+ mutex_unlock(&p->mutex);
+
+ args->num_of_nodes = nodes;
+ ret = copy_to_user(
+ (void __user *)args->kfd_process_device_apertures_ptr,
+ pa,
+ (nodes * sizeof(struct kfd_process_device_apertures)));
+ kfree(pa);
+ return ret ? -EFAULT : 0;
+
+out_unlock:
+ mutex_unlock(&p->mutex);
+ return 0;
+}
+
static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
void *data)
{
@@ -1017,6 +1108,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
kfd_ioctl_set_trap_handler, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
+ kfd_ioctl_get_process_apertures_new, 0),
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 7377513050e6..a06b0100af96 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -282,14 +282,14 @@
(((uint64_t)(base) & \
0xFFFFFF0000000000UL) | 0xFFFFFFFFFFL)
-#define MAKE_SCRATCH_APP_BASE(gpu_num) \
- (((uint64_t)(gpu_num) << 61) + 0x100000000L)
+#define MAKE_SCRATCH_APP_BASE() \
+ (((uint64_t)(0x1UL) << 61) + 0x100000000L)
#define MAKE_SCRATCH_APP_LIMIT(base) \
(((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
-#define MAKE_LDS_APP_BASE(gpu_num) \
- (((uint64_t)(gpu_num) << 61) + 0x0)
+#define MAKE_LDS_APP_BASE() \
+ (((uint64_t)(0x1UL) << 61) + 0x0)
#define MAKE_LDS_APP_LIMIT(base) \
(((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
@@ -314,7 +314,7 @@ int kfd_init_apertures(struct kfd_process *process)
return -1;
}
/*
- * For 64 bit process aperture will be statically reserved in
+ * For 64 bit process apertures will be statically reserved in
* the x86_64 non canonical process address space
* amdkfd doesn't currently support apertures for 32 bit process
*/
@@ -323,12 +323,11 @@ int kfd_init_apertures(struct kfd_process *process)
pdd->gpuvm_base = pdd->gpuvm_limit = 0;
pdd->scratch_base = pdd->scratch_limit = 0;
} else {
- /*
- * node id couldn't be 0 - the three MSB bits of
- * aperture shoudn't be 0
+ /* Same LDS and scratch apertures can be used
+ * on all GPUs. This allows using more dGPUs
+ * than placement options for apertures.
*/
- pdd->lds_base = MAKE_LDS_APP_BASE(id + 1);
-
+ pdd->lds_base = MAKE_LDS_APP_BASE();
pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1);
@@ -336,8 +335,7 @@ int kfd_init_apertures(struct kfd_process *process)
pdd->gpuvm_limit =
MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base);
- pdd->scratch_base = MAKE_SCRATCH_APP_BASE(id + 1);
-
+ pdd->scratch_base = MAKE_SCRATCH_APP_BASE();
pdd->scratch_limit =
MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
}
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 111d73ba2d96..52014370e2e5 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -107,8 +107,6 @@ struct kfd_ioctl_get_clock_counters_args {
__u32 pad;
};
-#define NUM_OF_SUPPORTED_GPUS 7
-
struct kfd_process_device_apertures {
__u64 lds_base; /* from KFD */
__u64 lds_limit; /* from KFD */
@@ -120,6 +118,12 @@ struct kfd_process_device_apertures {
__u32 pad;
};
+/*
+ * AMDKFD_IOC_GET_PROCESS_APERTURES is deprecated. Use
+ * AMDKFD_IOC_GET_PROCESS_APERTURES_NEW instead, which supports an
+ * unlimited number of GPUs.
+ */
+#define NUM_OF_SUPPORTED_GPUS 7
struct kfd_ioctl_get_process_apertures_args {
struct kfd_process_device_apertures
process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */
@@ -129,6 +133,19 @@ struct kfd_ioctl_get_process_apertures_args {
__u32 pad;
};
+struct kfd_ioctl_get_process_apertures_new_args {
+ /* User allocated. Pointer to struct kfd_process_device_apertures
+ * filled in by Kernel
+ */
+ __u64 kfd_process_device_apertures_ptr;
+ /* to KFD - indicates amount of memory present in
+ * kfd_process_device_apertures_ptr
+ * from KFD - Number of entries filled by KFD.
+ */
+ __u32 num_of_nodes;
+ __u32 pad;
+};
+
#define MAX_ALLOWED_NUM_POINTS 100
#define MAX_ALLOWED_AW_BUFF_SIZE 4096
#define MAX_ALLOWED_WAC_BUFF_SIZE 128
@@ -332,7 +349,11 @@ struct kfd_ioctl_set_trap_handler_args {
#define AMDKFD_IOC_SET_TRAP_HANDLER \
AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args)
+#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW \
+ AMDKFD_IOWR(0x14, \
+ struct kfd_ioctl_get_process_apertures_new_args)
+
#define AMDKFD_COMMAND_START 0x01
-#define AMDKFD_COMMAND_END 0x14
+#define AMDKFD_COMMAND_END 0x15
#endif
OpenPOWER on IntegriCloud