summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/misc/habanalabs/debugfs.c7
-rw-r--r--drivers/misc/habanalabs/goya/goya.c126
-rw-r--r--drivers/misc/habanalabs/goya/goyaP.h12
-rw-r--r--drivers/misc/habanalabs/habanalabs.h6
4 files changed, 137 insertions, 14 deletions
diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c
index ba418aaa404c..886f8ea82499 100644
--- a/drivers/misc/habanalabs/debugfs.c
+++ b/drivers/misc/habanalabs/debugfs.c
@@ -355,7 +355,7 @@ static int mmu_show(struct seq_file *s, void *data)
struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev;
- struct hl_ctx *ctx = hdev->user_ctx;
+ struct hl_ctx *ctx;
u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0,
hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0,
@@ -367,6 +367,11 @@ static int mmu_show(struct seq_file *s, void *data)
if (!hdev->mmu_enable)
return 0;
+ if (dev_entry->mmu_asid == HL_KERNEL_ASID_ID)
+ ctx = hdev->kernel_ctx;
+ else
+ ctx = hdev->user_ctx;
+
if (!ctx) {
dev_err(hdev->dev, "no ctx available\n");
return 0;
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 106074466dca..4e41f2669e6d 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -297,6 +297,11 @@ static u32 goya_all_events[] = {
GOYA_ASYNC_EVENT_ID_DMA_BM_CH4
};
+static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
+static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
+static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
+static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
+
void goya_get_fixed_properties(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -554,6 +559,10 @@ int goya_late_init(struct hl_device *hdev)
return rc;
}
+ rc = goya_mmu_add_mappings_for_device_cpu(hdev);
+ if (rc)
+ return rc;
+
rc = goya_init_cpu_queues(hdev);
if (rc)
return rc;
@@ -2065,10 +2074,12 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
goya_disable_external_queues(hdev);
goya_disable_internal_queues(hdev);
- if (hard_reset)
+ if (hard_reset) {
goya_disable_msix(hdev);
- else
+ goya_mmu_remove_device_cpu_mappings(hdev);
+ } else {
goya_sync_irqs(hdev);
+ }
}
/*
@@ -4584,7 +4595,7 @@ int goya_context_switch(struct hl_device *hdev, u32 asid)
return 0;
}
-int goya_mmu_clear_pgt_range(struct hl_device *hdev)
+static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct goya_device *goya = hdev->asic_specific;
@@ -4598,7 +4609,7 @@ int goya_mmu_clear_pgt_range(struct hl_device *hdev)
return goya_memset_device_memory(hdev, addr, size, 0, true);
}
-int goya_mmu_set_dram_default_page(struct hl_device *hdev)
+static int goya_mmu_set_dram_default_page(struct hl_device *hdev)
{
struct goya_device *goya = hdev->asic_specific;
u64 addr = hdev->asic_prop.mmu_dram_default_page_addr;
@@ -4611,7 +4622,112 @@ int goya_mmu_set_dram_default_page(struct hl_device *hdev)
return goya_memset_device_memory(hdev, addr, size, val, true);
}
-void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
+static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct goya_device *goya = hdev->asic_specific;
+ s64 off, cpu_off;
+ int rc;
+
+ if (!(goya->hw_cap_initialized & HW_CAP_MMU))
+ return 0;
+
+ for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
+ rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
+ prop->dram_base_address + off, PAGE_SIZE_2MB);
+ if (rc) {
+ dev_err(hdev->dev, "Map failed for address 0x%llx\n",
+ prop->dram_base_address + off);
+ goto unmap;
+ }
+ }
+
+ if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
+ rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
+ hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "Map failed for CPU accessible memory\n");
+ off -= PAGE_SIZE_2MB;
+ goto unmap;
+ }
+ } else {
+ for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) {
+ rc = hl_mmu_map(hdev->kernel_ctx,
+ VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
+ hdev->cpu_accessible_dma_address + cpu_off,
+ PAGE_SIZE_4KB);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Map failed for CPU accessible memory\n");
+ cpu_off -= PAGE_SIZE_4KB;
+ goto unmap_cpu;
+ }
+ }
+ }
+
+ goya->device_cpu_mmu_mappings_done = true;
+
+ return 0;
+
+unmap_cpu:
+ for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
+ if (hl_mmu_unmap(hdev->kernel_ctx,
+ VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
+ PAGE_SIZE_4KB))
+ dev_warn_ratelimited(hdev->dev,
+ "failed to unmap address 0x%llx\n",
+ VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
+unmap:
+ for (; off >= 0 ; off -= PAGE_SIZE_2MB)
+ if (hl_mmu_unmap(hdev->kernel_ctx,
+ prop->dram_base_address + off, PAGE_SIZE_2MB))
+ dev_warn_ratelimited(hdev->dev,
+ "failed to unmap address 0x%llx\n",
+ prop->dram_base_address + off);
+
+ return rc;
+}
+
+void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct goya_device *goya = hdev->asic_specific;
+ u32 off, cpu_off;
+
+ if (!(goya->hw_cap_initialized & HW_CAP_MMU))
+ return;
+
+ if (!goya->device_cpu_mmu_mappings_done)
+ return;
+
+ if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
+ if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
+ PAGE_SIZE_2MB))
+ dev_warn(hdev->dev,
+ "Failed to unmap CPU accessible memory\n");
+ } else {
+ for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
+ if (hl_mmu_unmap(hdev->kernel_ctx,
+ VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
+ PAGE_SIZE_4KB))
+ dev_warn_ratelimited(hdev->dev,
+ "failed to unmap address 0x%llx\n",
+ VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
+ }
+
+ for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
+ if (hl_mmu_unmap(hdev->kernel_ctx,
+ prop->dram_base_address + off, PAGE_SIZE_2MB))
+ dev_warn_ratelimited(hdev->dev,
+ "Failed to unmap address 0x%llx\n",
+ prop->dram_base_address + off);
+
+ goya->device_cpu_mmu_mappings_done = false;
+}
+
+static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
{
struct goya_device *goya = hdev->asic_specific;
int i;
diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h
index 066b1d306977..f8c611883dc1 100644
--- a/drivers/misc/habanalabs/goya/goyaP.h
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -126,6 +126,12 @@
#define VA_DDR_SPACE_SIZE (VA_DDR_SPACE_END - \
VA_DDR_SPACE_START) /* 128GB */
+#if (HL_CPU_ACCESSIBLE_MEM_SIZE != SZ_2M)
+#error "HL_CPU_ACCESSIBLE_MEM_SIZE must be exactly 2MB to enable MMU mapping"
+#endif
+
+#define VA_CPU_ACCESSIBLE_MEM_ADDR 0x8000000000ull
+
#define DMA_MAX_TRANSFER_SIZE U32_MAX
#define HW_CAP_PLL 0x00000001
@@ -157,6 +163,7 @@ struct goya_device {
u64 ddr_bar_cur_addr;
u32 events_stat[GOYA_ASYNC_EVENT_ID_SIZE];
u32 hw_cap_initialized;
+ u8 device_cpu_mmu_mappings_done;
};
void goya_get_fixed_properties(struct hl_device *hdev);
@@ -204,10 +211,6 @@ int goya_armcp_info_get(struct hl_device *hdev);
int goya_debug_coresight(struct hl_device *hdev, void *data);
void goya_halt_coresight(struct hl_device *hdev);
-void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
-int goya_mmu_clear_pgt_range(struct hl_device *hdev);
-int goya_mmu_set_dram_default_page(struct hl_device *hdev);
-
int goya_suspend(struct hl_device *hdev);
int goya_resume(struct hl_device *hdev);
@@ -225,5 +228,6 @@ void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
dma_addr_t *dma_handle);
void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
void *vaddr);
+void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev);
#endif /* GOYAP_H_ */
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 0462b7727da7..5e4a631b3d88 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -320,10 +320,8 @@ struct hl_cs_job;
#define HL_EQ_LENGTH 64
#define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
-/* KMD <-> ArmCP shared memory size (EQ + PQ + 2MB for packets) */
-#define HL_CPU_ACCESSIBLE_MEM_SIZE (HL_EQ_SIZE_IN_BYTES + \
- HL_QUEUE_SIZE_IN_BYTES + \
- SZ_2M)
+/* KMD <-> ArmCP shared memory size */
+#define HL_CPU_ACCESSIBLE_MEM_SIZE SZ_2M
/**
* struct hl_hw_queue - describes a H/W transport queue.
OpenPOWER on IntegriCloud