diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/Kconfig | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 16 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_module.c | 31 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 44 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process.c | 14 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 21 |
7 files changed, 64 insertions, 74 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index fbf0ee5201c3..c3613604a4f8 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -4,8 +4,8 @@ config HSA_AMD bool "HSA kernel driver for AMD GPU devices" - depends on DRM_AMDGPU && X86_64 - imply AMD_IOMMU_V2 + depends on DRM_AMDGPU && (X86_64 || ARM64) + imply AMD_IOMMU_V2 if X86_64 select MMU_NOTIFIER help Enable this if you want to use HSA features on AMD GPU devices. diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index b7bc7d7d048f..2e7c44955f43 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -863,6 +863,7 @@ static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size, return 0; } +#ifdef CONFIG_X86_64 static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size, uint32_t *num_entries, struct crat_subtype_iolink *sub_type_hdr) @@ -905,6 +906,7 @@ static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size, return 0; } +#endif /* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU * @@ -920,7 +922,9 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) struct crat_subtype_generic *sub_type_hdr; int avail_size = *size; int numa_node_id; +#ifdef CONFIG_X86_64 uint32_t entries = 0; +#endif int ret = 0; if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_CPU) @@ -982,6 +986,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) sub_type_hdr->length); /* Fill in Subtype: IO Link */ +#ifdef CONFIG_X86_64 ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size, &entries, (struct crat_subtype_iolink *)sub_type_hdr); @@ -992,6 +997,9 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + sub_type_hdr->length * entries); +#else + pr_info("IO link not available for non x86 platforms\n"); +#endif crat_table->num_domains++; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 8372556b52eb..c6c9530e704e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -134,12 +134,18 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) */ q->doorbell_id = q->properties.queue_id; } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { - /* For SDMA queues on SOC15, use static doorbell - * assignments based on the engine and queue. + /* For SDMA queues on SOC15 with 8-byte doorbell, use static + * doorbell assignments based on the engine and queue id. + * The doobell index distance between RLC (2*i) and (2*i+1) + * for a SDMA engine is 512. */ - q->doorbell_id = dev->shared_resources.sdma_doorbell - [q->properties.sdma_engine_id] - [q->properties.sdma_queue_id]; + uint32_t *idx_offset = + dev->shared_resources.sdma_doorbell_idx; + + q->doorbell_id = idx_offset[q->properties.sdma_engine_id] + + (q->properties.sdma_queue_id & 1) + * KFD_QUEUE_DOORBELL_MIRROR_OFFSET + + (q->properties.sdma_queue_id >> 1); } else { /* For CP queues on SOC15 reserve a free doorbell ID */ unsigned int found; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 8018163414ff..932007eb9168 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -23,22 +23,7 @@ #include <linux/sched.h> #include <linux/device.h> #include "kfd_priv.h" - -static const struct kgd2kfd_calls kgd2kfd = { - .exit = kgd2kfd_exit, - .probe = kgd2kfd_probe, - .device_init = kgd2kfd_device_init, - .device_exit = kgd2kfd_device_exit, - .interrupt = kgd2kfd_interrupt, - .suspend = kgd2kfd_suspend, - .resume = kgd2kfd_resume, - .quiesce_mm = kgd2kfd_quiesce_mm, - .resume_mm = kgd2kfd_resume_mm, - .schedule_evict_and_restore_process = - kgd2kfd_schedule_evict_and_restore_process, - .pre_reset = kgd2kfd_pre_reset, - .post_reset = kgd2kfd_post_reset, -}; +#include "amdgpu_amdkfd.h" static int kfd_init(void) { @@ -91,20 +76,10 @@ static void kfd_exit(void) kfd_chardev_exit(); } -int kgd2kfd_init(unsigned int interface_version, - const struct kgd2kfd_calls **g2f) +int kgd2kfd_init() { - int err; - - err = kfd_init(); - if (err) - return err; - - *g2f = &kgd2kfd; - - return 0; + return kfd_init(); } -EXPORT_SYMBOL(kgd2kfd_init); void kgd2kfd_exit(void) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 0689d4ccbbc0..0eeee3c6d6dc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -97,17 +97,29 @@ #define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2) #define KFD_CWSR_TMA_OFFSET PAGE_SIZE +#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE \ + (KFD_MAX_NUM_OF_PROCESSES * \ + KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) + +#define KFD_KERNEL_QUEUE_SIZE 2048 + +/* + * 512 = 0x200 + * The doorbell index distance between SDMA RLC (2*i) and (2*i+1) in the + * same SDMA engine on SOC15, which has 8-byte doorbells for SDMA. + * 512 8-byte doorbell distance (i.e. one page away) ensures that SDMA RLC + * (2*i+1) doorbells (in terms of the lower 12 bit address) lie exactly in + * the OFFSET and SIZE set in registers like BIF_SDMA0_DOORBELL_RANGE. + */ +#define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512 + + /* * Kernel module parameter to specify maximum number of supported queues per * device */ extern int max_num_of_queues_per_device; -#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE \ - (KFD_MAX_NUM_OF_PROCESSES * \ - KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) - -#define KFD_KERNEL_QUEUE_SIZE 2048 /* Kernel module parameter to specify the scheduling policy */ extern int sched_policy; @@ -266,14 +278,6 @@ struct kfd_dev { bool pci_atomic_requested; }; -/* KGD2KFD callbacks */ -void kgd2kfd_exit(void); -struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, - struct pci_dev *pdev, const struct kfd2kgd_calls *f2g); -bool kgd2kfd_device_init(struct kfd_dev *kfd, - const struct kgd2kfd_shared_resources *gpu_resources); -void kgd2kfd_device_exit(struct kfd_dev *kfd); - enum kfd_mempool { KFD_MEMPOOL_SYSTEM_CACHEABLE = 1, KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2, @@ -541,11 +545,6 @@ struct qcm_process_device { /* Approx. time before evicting the process again */ #define PROCESS_ACTIVE_TIME_MS 10 -int kgd2kfd_quiesce_mm(struct mm_struct *mm); -int kgd2kfd_resume_mm(struct mm_struct *mm); -int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, - struct dma_fence *fence); - /* 8 byte handle containing GPU ID in the most significant 4 bytes and * idr_handle in the least significant 4 bytes */ @@ -800,20 +799,11 @@ int kfd_numa_node_to_apic_id(int numa_node_id); /* Interrupts */ int kfd_interrupt_init(struct kfd_dev *dev); void kfd_interrupt_exit(struct kfd_dev *dev); -void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry); bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry, uint32_t *patched_ihre, bool *flag); -/* Power Management */ -void kgd2kfd_suspend(struct kfd_dev *kfd); -int kgd2kfd_resume(struct kfd_dev *kfd); - -/* GPU reset */ -int kgd2kfd_pre_reset(struct kfd_dev *kfd); -int kgd2kfd_post_reset(struct kfd_dev *kfd); - /* amdkfd Apertures */ int kfd_init_apertures(struct kfd_process *process); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 80b36e860a0a..4bdae78bab8e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -607,13 +607,17 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd, if (!qpd->doorbell_bitmap) return -ENOMEM; - /* Mask out any reserved doorbells */ - for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS; i++) - if ((dev->shared_resources.reserved_doorbell_mask & i) == - dev->shared_resources.reserved_doorbell_val) { + /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */ + for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) { + if (i >= dev->shared_resources.non_cp_doorbells_start + && i <= dev->shared_resources.non_cp_doorbells_end) { set_bit(i, qpd->doorbell_bitmap); - pr_debug("reserved doorbell 0x%03x\n", i); + set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET, + qpd->doorbell_bitmap); + pr_debug("reserved doorbell 0x%03x and 0x%03x\n", i, + i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET); } + } return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 5f5b2acedbac..09da91644f9f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1093,8 +1093,6 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) * the GPU device is not already present in the topology device * list then return NULL. This means a new topology device has to * be created for this GPU. - * TODO: Rather than assiging @gpu to first topology device withtout - * gpu attached, it will better to have more stringent check. */ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) { @@ -1102,12 +1100,20 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) struct kfd_topology_device *out_dev = NULL; down_write(&topology_lock); - list_for_each_entry(dev, &topology_device_list, list) + list_for_each_entry(dev, &topology_device_list, list) { + /* Discrete GPUs need their own topology device list + * entries. Don't assign them to CPU/APU nodes. + */ + if (!gpu->device_info->needs_iommu_device && + dev->node_props.cpu_cores_count) + continue; + if (!dev->gpu && (dev->node_props.simd_count > 0)) { dev->gpu = gpu; out_dev = dev; break; } + } up_write(&topology_lock); return out_dev; } @@ -1392,7 +1398,6 @@ int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev) static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask) { - const struct cpuinfo_x86 *cpuinfo; int first_cpu_of_numa_node; if (!cpumask || cpumask == cpu_none_mask) @@ -1400,9 +1405,11 @@ static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask) first_cpu_of_numa_node = cpumask_first(cpumask); if (first_cpu_of_numa_node >= nr_cpu_ids) return -1; - cpuinfo = &cpu_data(first_cpu_of_numa_node); - - return cpuinfo->apicid; +#ifdef CONFIG_X86_64 + return cpu_data(first_cpu_of_numa_node).apicid; +#else + return first_cpu_of_numa_node; +#endif } /* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor |