summaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs/goya
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/goya')
-rw-r--r--drivers/misc/habanalabs/goya/goya.c188
-rw-r--r--drivers/misc/habanalabs/goya/goyaP.h21
-rw-r--r--drivers/misc/habanalabs/goya/goya_coresight.c142
-rw-r--r--drivers/misc/habanalabs/goya/goya_hwmgr.c140
4 files changed, 367 insertions, 124 deletions
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 271c5c8f53b4..7344e8a222ae 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -9,6 +9,7 @@
#include "include/hw_ip/mmu/mmu_general.h"
#include "include/hw_ip/mmu/mmu_v1_0.h"
#include "include/goya/asic_reg/goya_masks.h"
+#include "include/goya/goya_reg_map.h"
#include <linux/pci.h>
#include <linux/genalloc.h>
@@ -41,8 +42,8 @@
* PQ, CQ and CP are not secured.
* PQ, CB and the data are on the SRAM/DRAM.
*
- * Since QMAN DMA is secured, KMD is parsing the DMA CB:
- * - KMD checks DMA pointer
+ * Since QMAN DMA is secured, the driver is parsing the DMA CB:
+ * - checks DMA pointer
* - WREG, MSG_PROT are not allowed.
* - MSG_LONG/SHORT are allowed.
*
@@ -55,15 +56,15 @@
* QMAN DMA: PQ, CQ and CP are secured.
* MMU is set to bypass on the Secure props register of the QMAN.
* The reasons we don't enable MMU for PQ, CQ and CP are:
- * - PQ entry is in kernel address space and KMD doesn't map it.
+ * - PQ entry is in kernel address space and the driver doesn't map it.
* - CP writes to MSIX register and to kernel address space (completion
* queue).
*
- * DMA is not secured but because CP is secured, KMD still needs to parse the
- * CB, but doesn't need to check the DMA addresses.
+ * DMA is not secured but because CP is secured, the driver still needs to parse
+ * the CB, but doesn't need to check the DMA addresses.
*
- * For QMAN DMA 0, DMA is also secured because only KMD uses this DMA and KMD
- * doesn't map memory in MMU.
+ * For QMAN DMA 0, DMA is also secured because only the driver uses this DMA and
+ * the driver doesn't map memory in MMU.
*
* QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
*
@@ -71,6 +72,9 @@
*
*/
+#define GOYA_UBOOT_FW_FILE "habanalabs/goya/goya-u-boot.bin"
+#define GOYA_LINUX_FW_FILE "habanalabs/goya/goya-fit.itb"
+
#define GOYA_MMU_REGS_NUM 63
#define GOYA_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
@@ -335,18 +339,21 @@ void goya_get_fixed_properties(struct hl_device *hdev)
for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
- prop->hw_queues_props[i].kmd_only = 0;
+ prop->hw_queues_props[i].driver_only = 0;
+ prop->hw_queues_props[i].requires_kernel_cb = 1;
}
for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
- prop->hw_queues_props[i].kmd_only = 1;
+ prop->hw_queues_props[i].driver_only = 1;
+ prop->hw_queues_props[i].requires_kernel_cb = 0;
}
for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
NUMBER_OF_INT_HW_QUEUES; i++) {
prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
- prop->hw_queues_props[i].kmd_only = 0;
+ prop->hw_queues_props[i].driver_only = 0;
+ prop->hw_queues_props[i].requires_kernel_cb = 0;
}
for (; i < HL_MAX_QUEUES; i++)
@@ -376,6 +383,23 @@ void goya_get_fixed_properties(struct hl_device *hdev)
prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
prop->dram_page_size = PAGE_SIZE_2MB;
+ prop->dmmu.hop0_shift = HOP0_SHIFT;
+ prop->dmmu.hop1_shift = HOP1_SHIFT;
+ prop->dmmu.hop2_shift = HOP2_SHIFT;
+ prop->dmmu.hop3_shift = HOP3_SHIFT;
+ prop->dmmu.hop4_shift = HOP4_SHIFT;
+ prop->dmmu.hop0_mask = HOP0_MASK;
+ prop->dmmu.hop1_mask = HOP1_MASK;
+ prop->dmmu.hop2_mask = HOP2_MASK;
+ prop->dmmu.hop3_mask = HOP3_MASK;
+ prop->dmmu.hop4_mask = HOP4_MASK;
+ prop->dmmu.huge_page_size = PAGE_SIZE_2MB;
+
+ /* No difference between PMMU and DMMU except of page size */
+ memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
+ prop->dmmu.page_size = PAGE_SIZE_2MB;
+ prop->pmmu.page_size = PAGE_SIZE_4KB;
+
prop->va_space_host_start_address = VA_HOST_SPACE_START;
prop->va_space_host_end_address = VA_HOST_SPACE_END;
prop->va_space_dram_start_address = VA_DDR_SPACE_START;
@@ -392,6 +416,9 @@ void goya_get_fixed_properties(struct hl_device *hdev)
prop->tpc_enabled_mask = TPC_ENABLED_MASK;
prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
+
+ strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME,
+ CARD_NAME_MAX_LEN);
}
/*
@@ -1006,36 +1033,34 @@ int goya_init_cpu_queues(struct hl_device *hdev)
eq = &hdev->event_queue;
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_0,
- lower_32_bits(cpu_pq->bus_address));
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_1,
- upper_32_bits(cpu_pq->bus_address));
+ WREG32(mmCPU_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
+ WREG32(mmCPU_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_2, lower_32_bits(eq->bus_address));
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_3, upper_32_bits(eq->bus_address));
+ WREG32(mmCPU_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
+ WREG32(mmCPU_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_8,
+ WREG32(mmCPU_CQ_BASE_ADDR_LOW,
lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_9,
+ WREG32(mmCPU_CQ_BASE_ADDR_HIGH,
upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_5, HL_QUEUE_SIZE_IN_BYTES);
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_4, HL_EQ_SIZE_IN_BYTES);
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_10, HL_CPU_ACCESSIBLE_MEM_SIZE);
+ WREG32(mmCPU_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
+ WREG32(mmCPU_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
+ WREG32(mmCPU_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
/* Used for EQ CI */
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, 0);
+ WREG32(mmCPU_EQ_CI, 0);
WREG32(mmCPU_IF_PF_PQ_PI, 0);
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_7, PQ_INIT_STATUS_READY_FOR_CP);
+ WREG32(mmCPU_PQ_INIT_STATUS, PQ_INIT_STATUS_READY_FOR_CP);
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
GOYA_ASYNC_EVENT_ID_PI_UPDATE);
err = hl_poll_timeout(
hdev,
- mmPSOC_GLOBAL_CONF_SCRATCHPAD_7,
+ mmCPU_PQ_INIT_STATUS,
status,
(status == PQ_INIT_STATUS_READY_FOR_HOST),
1000,
@@ -1455,6 +1480,9 @@ static void goya_init_golden_registers(struct hl_device *hdev)
1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT);
WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset,
1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
+
+ WREG32_FIELD(TPC0_CFG_MSS_CONFIG, offset,
+ ICACHE_FETCH_LINE_NUM, 2);
}
WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT);
@@ -1534,7 +1562,6 @@ static void goya_init_mme_cmdq(struct hl_device *hdev)
u32 mtr_base_lo, mtr_base_hi;
u32 so_base_lo, so_base_hi;
u32 gic_base_lo, gic_base_hi;
- u64 qman_base_addr;
mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
@@ -1546,9 +1573,6 @@ static void goya_init_mme_cmdq(struct hl_device *hdev)
gic_base_hi =
upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
- qman_base_addr = hdev->asic_prop.sram_base_address +
- MME_QMAN_BASE_OFFSET;
-
WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO, so_base_lo);
@@ -2063,6 +2087,25 @@ static void goya_disable_msix(struct hl_device *hdev)
goya->hw_cap_initialized &= ~HW_CAP_MSIX;
}
+static void goya_enable_timestamp(struct hl_device *hdev)
+{
+ /* Disable the timestamp counter */
+ WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
+
+ /* Zero the lower/upper parts of the 64-bit counter */
+ WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
+ WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
+
+ /* Enable the counter */
+ WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
+}
+
+static void goya_disable_timestamp(struct hl_device *hdev)
+{
+ /* Disable the timestamp counter */
+ WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
+}
+
static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
{
u32 wait_timeout_ms, cpu_timeout_ms;
@@ -2103,6 +2146,8 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
goya_disable_external_queues(hdev);
goya_disable_internal_queues(hdev);
+ goya_disable_timestamp(hdev);
+
if (hard_reset) {
goya_disable_msix(hdev);
goya_mmu_remove_device_cpu_mappings(hdev);
@@ -2121,13 +2166,11 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
*/
static int goya_push_uboot_to_device(struct hl_device *hdev)
{
- char fw_name[200];
void __iomem *dst;
- snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-u-boot.bin");
dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + UBOOT_FW_OFFSET;
- return hl_fw_push_fw_to_device(hdev, fw_name, dst);
+ return hl_fw_push_fw_to_device(hdev, GOYA_UBOOT_FW_FILE, dst);
}
/*
@@ -2140,18 +2183,16 @@ static int goya_push_uboot_to_device(struct hl_device *hdev)
*/
static int goya_push_linux_to_device(struct hl_device *hdev)
{
- char fw_name[200];
void __iomem *dst;
- snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-fit.itb");
dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
- return hl_fw_push_fw_to_device(hdev, fw_name, dst);
+ return hl_fw_push_fw_to_device(hdev, GOYA_LINUX_FW_FILE, dst);
}
static int goya_pldm_init_cpu(struct hl_device *hdev)
{
- u32 val, unit_rst_val;
+ u32 unit_rst_val;
int rc;
/* Must initialize SRAM scrambler before pushing u-boot to SRAM */
@@ -2159,14 +2200,14 @@ static int goya_pldm_init_cpu(struct hl_device *hdev)
/* Put ARM cores into reset */
WREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL, CPU_RESET_ASSERT);
- val = RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL);
+ RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL);
/* Reset the CA53 MACRO */
unit_rst_val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N, CA53_RESET);
- val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
+ RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N, unit_rst_val);
- val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
+ RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
rc = goya_push_uboot_to_device(hdev);
if (rc)
@@ -2187,7 +2228,7 @@ static int goya_pldm_init_cpu(struct hl_device *hdev)
/* Release ARM core 0 from reset */
WREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL,
CPU_RESET_CORE0_DEASSERT);
- val = RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL);
+ RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL);
return 0;
}
@@ -2205,12 +2246,12 @@ static void goya_read_device_fw_version(struct hl_device *hdev,
switch (fwc) {
case FW_COMP_UBOOT:
- ver_off = RREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_29);
+ ver_off = RREG32(mmUBOOT_VER_OFFSET);
dest = hdev->asic_prop.uboot_ver;
name = "U-Boot";
break;
case FW_COMP_PREBOOT:
- ver_off = RREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_28);
+ ver_off = RREG32(mmPREBOOT_VER_OFFSET);
dest = hdev->asic_prop.preboot_ver;
name = "Preboot";
break;
@@ -2271,6 +2312,10 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout)
10000,
cpu_timeout);
+ /* Read U-Boot version now in case we will later fail */
+ goya_read_device_fw_version(hdev, FW_COMP_UBOOT);
+ goya_read_device_fw_version(hdev, FW_COMP_PREBOOT);
+
if (rc) {
dev_err(hdev->dev, "Error in ARM u-boot!");
switch (status) {
@@ -2308,6 +2353,11 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout)
"ARM status %d - u-boot stopped by user\n",
status);
break;
+ case CPU_BOOT_STATUS_TS_INIT_FAIL:
+ dev_err(hdev->dev,
+ "ARM status %d - Thermal Sensor initialization failed\n",
+ status);
+ break;
default:
dev_err(hdev->dev,
"ARM status %d - Invalid status code\n",
@@ -2317,10 +2367,6 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout)
return -EIO;
}
- /* Read U-Boot version now in case we will later fail */
- goya_read_device_fw_version(hdev, FW_COMP_UBOOT);
- goya_read_device_fw_version(hdev, FW_COMP_PREBOOT);
-
if (!hdev->fw_loading) {
dev_info(hdev->dev, "Skip loading FW\n");
goto out;
@@ -2433,7 +2479,8 @@ int goya_mmu_init(struct hl_device *hdev)
WREG32_AND(mmSTLB_STLB_FEATURE_EN,
(~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
- hdev->asic_funcs->mmu_invalidate_cache(hdev, true);
+ hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
+ VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK);
WREG32(mmMMU_MMU_ENABLE, 1);
WREG32(mmMMU_SPI_MASK, 0xF);
@@ -2455,13 +2502,12 @@ err:
static int goya_hw_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
- u32 val;
int rc;
dev_info(hdev->dev, "Starting initialization of H/W\n");
/* Perform read from the device to make sure device is up */
- val = RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
+ RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
/*
* Let's mark in the H/W that we have reached this point. We check
@@ -2469,7 +2515,7 @@ static int goya_hw_init(struct hl_device *hdev)
* we need to reset the chip before doing H/W init. This register is
* cleared by the H/W upon H/W reset
*/
- WREG32(mmPSOC_GLOBAL_CONF_APP_STATUS, HL_DEVICE_HW_STATE_DIRTY);
+ WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
rc = goya_init_cpu(hdev, GOYA_CPU_TIMEOUT_USEC);
if (rc) {
@@ -2505,13 +2551,15 @@ static int goya_hw_init(struct hl_device *hdev)
goya_init_tpc_qmans(hdev);
+ goya_enable_timestamp(hdev);
+
/* MSI-X must be enabled before CPU queues are initialized */
rc = goya_enable_msix(hdev);
if (rc)
goto disable_queues;
/* Perform read from the device to flush all MSI-X configuration */
- val = RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
+ RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
return 0;
@@ -2831,7 +2879,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
dev_err_ratelimited(hdev->dev,
- "Can't send KMD job on QMAN0 because the device is not idle\n");
+ "Can't send driver job on QMAN0 because the device is not idle\n");
return -EBUSY;
}
@@ -2956,9 +3004,6 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
rc = -EIO;
- } else {
- dev_info(hdev->dev, "queue test on H/W queue %d succeeded\n",
- hw_queue_id);
}
free_pkt:
@@ -3903,7 +3948,7 @@ static int goya_parse_cb_no_ext_queue(struct hl_device *hdev,
return 0;
dev_err(hdev->dev,
- "Internal CB address %px + 0x%x is not in SRAM nor in DRAM\n",
+ "Internal CB address 0x%px + 0x%x is not in SRAM nor in DRAM\n",
parser->user_cb, parser->user_cb_size);
return -EFAULT;
@@ -3913,7 +3958,7 @@ int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
{
struct goya_device *goya = hdev->asic_specific;
- if (!parser->ext_queue)
+ if (parser->queue_type == QUEUE_TYPE_INT)
return goya_parse_cb_no_ext_queue(hdev, parser);
if (goya->hw_cap_initialized & HW_CAP_MMU)
@@ -3949,7 +3994,7 @@ void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address,
void goya_update_eq_ci(struct hl_device *hdev, u32 val)
{
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, val);
+ WREG32(mmCPU_EQ_CI, val);
}
void goya_restore_phase_topology(struct hl_device *hdev)
@@ -4447,6 +4492,7 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
struct goya_device *goya = hdev->asic_specific;
goya->events_stat[event_type]++;
+ goya->events_stat_aggregate[event_type]++;
switch (event_type) {
case GOYA_ASYNC_EVENT_ID_PCIE_IF:
@@ -4528,12 +4574,16 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
}
}
-void *goya_get_events_stat(struct hl_device *hdev, u32 *size)
+void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
{
struct goya_device *goya = hdev->asic_specific;
- *size = (u32) sizeof(goya->events_stat);
+ if (aggregate) {
+ *size = (u32) sizeof(goya->events_stat_aggregate);
+ return goya->events_stat_aggregate;
+ }
+ *size = (u32) sizeof(goya->events_stat);
return goya->events_stat;
}
@@ -4579,7 +4629,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
lin_dma_pkt++;
} while (--lin_dma_pkts_cnt);
- job = hl_cs_allocate_job(hdev, true);
+ job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
if (!job) {
dev_err(hdev->dev, "Failed to allocate a new job\n");
rc = -ENOMEM;
@@ -4808,13 +4858,15 @@ static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid);
}
-static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard)
+static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
+ u32 flags)
{
struct goya_device *goya = hdev->asic_specific;
u32 status, timeout_usec;
int rc;
- if (!(goya->hw_cap_initialized & HW_CAP_MMU))
+ if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
+ hdev->hard_reset_pending)
return;
/* no need in L1 only invalidation in Goya */
@@ -4853,7 +4905,8 @@ static void goya_mmu_invalidate_cache_range(struct hl_device *hdev,
u32 status, timeout_usec, inv_data, pi;
int rc;
- if (!(goya->hw_cap_initialized & HW_CAP_MMU))
+ if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
+ hdev->hard_reset_pending)
return;
/* no need in L1 only invalidation in Goya */
@@ -4934,6 +4987,10 @@ int goya_armcp_info_get(struct hl_device *hdev)
prop->dram_end_address = prop->dram_base_address + dram_size;
}
+ if (!strlen(prop->armcp_info.card_name))
+ strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME,
+ CARD_NAME_MAX_LEN);
+
return 0;
}
@@ -5047,7 +5104,7 @@ static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
{
- return RREG32(mmPSOC_GLOBAL_CONF_APP_STATUS);
+ return RREG32(mmHW_STATE);
}
static const struct hl_asic_funcs goya_funcs = {
@@ -5106,7 +5163,8 @@ static const struct hl_asic_funcs goya_funcs = {
.init_iatu = goya_init_iatu,
.rreg = hl_rreg,
.wreg = hl_wreg,
- .halt_coresight = goya_halt_coresight
+ .halt_coresight = goya_halt_coresight,
+ .get_clk_rate = goya_get_clk_rate
};
/*
diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h
index d7f48c9c41cd..c3230cb6e25c 100644
--- a/drivers/misc/habanalabs/goya/goyaP.h
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -55,6 +55,8 @@
#define DRAM_PHYS_DEFAULT_SIZE 0x100000000ull /* 4GB */
+#define GOYA_DEFAULT_CARD_NAME "HL1000"
+
/* DRAM Memory Map */
#define CPU_FW_IMAGE_SIZE 0x10000000 /* 256MB */
@@ -68,19 +70,19 @@
MMU_PAGE_TABLES_SIZE)
#define MMU_CACHE_MNG_ADDR (MMU_DRAM_DEFAULT_PAGE_ADDR + \
MMU_DRAM_DEFAULT_PAGE_SIZE)
-#define DRAM_KMD_END_ADDR (MMU_CACHE_MNG_ADDR + \
+#define DRAM_DRIVER_END_ADDR (MMU_CACHE_MNG_ADDR + \
MMU_CACHE_MNG_SIZE)
#define DRAM_BASE_ADDR_USER 0x20000000
-#if (DRAM_KMD_END_ADDR > DRAM_BASE_ADDR_USER)
-#error "KMD must reserve no more than 512MB"
+#if (DRAM_DRIVER_END_ADDR > DRAM_BASE_ADDR_USER)
+#error "Driver must reserve no more than 512MB"
#endif
/*
- * SRAM Memory Map for KMD
+ * SRAM Memory Map for Driver
*
- * KMD occupies KMD_SRAM_SIZE bytes from the start of SRAM. It is used for
+ * Driver occupies DRIVER_SRAM_SIZE bytes from the start of SRAM. It is used for
* MME/TPC QMANs
*
*/
@@ -106,10 +108,10 @@
#define TPC7_QMAN_BASE_OFFSET (TPC6_QMAN_BASE_OFFSET + \
(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
-#define SRAM_KMD_RES_OFFSET (TPC7_QMAN_BASE_OFFSET + \
+#define SRAM_DRIVER_RES_OFFSET (TPC7_QMAN_BASE_OFFSET + \
(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
-#if (SRAM_KMD_RES_OFFSET >= GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START)
+#if (SRAM_DRIVER_RES_OFFSET >= GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START)
#error "MME/TPC QMANs SRAM space exceeds limit"
#endif
@@ -162,6 +164,7 @@ struct goya_device {
u64 ddr_bar_cur_addr;
u32 events_stat[GOYA_ASYNC_EVENT_ID_SIZE];
+ u32 events_stat_aggregate[GOYA_ASYNC_EVENT_ID_SIZE];
u32 hw_cap_initialized;
u8 device_cpu_mmu_mappings_done;
};
@@ -215,7 +218,7 @@ int goya_suspend(struct hl_device *hdev);
int goya_resume(struct hl_device *hdev);
void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry);
-void *goya_get_events_stat(struct hl_device *hdev, u32 *size);
+void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size);
void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address,
u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec);
@@ -230,4 +233,6 @@ void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
void *vaddr);
void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev);
+int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
+
#endif /* GOYAP_H_ */
diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c
index d7ec7ad84cc6..c1ee6e2b5dff 100644
--- a/drivers/misc/habanalabs/goya/goya_coresight.c
+++ b/drivers/misc/habanalabs/goya/goya_coresight.c
@@ -8,6 +8,7 @@
#include "goyaP.h"
#include "include/goya/goya_coresight.h"
#include "include/goya/asic_reg/goya_regs.h"
+#include "include/goya/asic_reg/goya_masks.h"
#include <uapi/misc/habanalabs.h>
@@ -15,6 +16,10 @@
#define GOYA_PLDM_CORESIGHT_TIMEOUT_USEC (CORESIGHT_TIMEOUT_USEC * 100)
+#define SPMU_SECTION_SIZE DMA_CH_0_CS_SPMU_MAX_OFFSET
+#define SPMU_EVENT_TYPES_OFFSET 0x400
+#define SPMU_MAX_COUNTERS 6
+
static u64 debug_stm_regs[GOYA_STM_LAST + 1] = {
[GOYA_STM_CPU] = mmCPU_STM_BASE,
[GOYA_STM_DMA_CH_0_CS] = mmDMA_CH_0_CS_STM_BASE,
@@ -226,9 +231,16 @@ static int goya_config_stm(struct hl_device *hdev,
struct hl_debug_params *params)
{
struct hl_debug_params_stm *input;
- u64 base_reg = debug_stm_regs[params->reg_idx] - CFG_BASE;
+ u64 base_reg;
int rc;
+ if (params->reg_idx >= ARRAY_SIZE(debug_stm_regs)) {
+ dev_err(hdev->dev, "Invalid register index in STM\n");
+ return -EINVAL;
+ }
+
+ base_reg = debug_stm_regs[params->reg_idx] - CFG_BASE;
+
WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK);
if (params->enable) {
@@ -288,10 +300,17 @@ static int goya_config_etf(struct hl_device *hdev,
struct hl_debug_params *params)
{
struct hl_debug_params_etf *input;
- u64 base_reg = debug_etf_regs[params->reg_idx] - CFG_BASE;
+ u64 base_reg;
u32 val;
int rc;
+ if (params->reg_idx >= ARRAY_SIZE(debug_etf_regs)) {
+ dev_err(hdev->dev, "Invalid register index in ETF\n");
+ return -EINVAL;
+ }
+
+ base_reg = debug_etf_regs[params->reg_idx] - CFG_BASE;
+
WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK);
val = RREG32(base_reg + 0x304);
@@ -359,33 +378,32 @@ static int goya_config_etr(struct hl_device *hdev,
struct hl_debug_params *params)
{
struct hl_debug_params_etr *input;
- u64 base_reg = mmPSOC_ETR_BASE - CFG_BASE;
u32 val;
int rc;
- WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK);
+ WREG32(mmPSOC_ETR_LAR, CORESIGHT_UNLOCK);
- val = RREG32(base_reg + 0x304);
+ val = RREG32(mmPSOC_ETR_FFCR);
val |= 0x1000;
- WREG32(base_reg + 0x304, val);
+ WREG32(mmPSOC_ETR_FFCR, val);
val |= 0x40;
- WREG32(base_reg + 0x304, val);
+ WREG32(mmPSOC_ETR_FFCR, val);
- rc = goya_coresight_timeout(hdev, base_reg + 0x304, 6, false);
+ rc = goya_coresight_timeout(hdev, mmPSOC_ETR_FFCR, 6, false);
if (rc) {
dev_err(hdev->dev, "Failed to %s ETR on timeout, error %d\n",
params->enable ? "enable" : "disable", rc);
return rc;
}
- rc = goya_coresight_timeout(hdev, base_reg + 0xC, 2, true);
+ rc = goya_coresight_timeout(hdev, mmPSOC_ETR_STS, 2, true);
if (rc) {
dev_err(hdev->dev, "Failed to %s ETR on timeout, error %d\n",
params->enable ? "enable" : "disable", rc);
return rc;
}
- WREG32(base_reg + 0x20, 0);
+ WREG32(mmPSOC_ETR_CTL, 0);
if (params->enable) {
input = params->input;
@@ -405,25 +423,26 @@ static int goya_config_etr(struct hl_device *hdev,
return -EINVAL;
}
- WREG32(base_reg + 0x34, 0x3FFC);
- WREG32(base_reg + 0x4, input->buffer_size);
- WREG32(base_reg + 0x28, input->sink_mode);
- WREG32(base_reg + 0x110, 0x700);
- WREG32(base_reg + 0x118,
+ WREG32(mmPSOC_ETR_BUFWM, 0x3FFC);
+ WREG32(mmPSOC_ETR_RSZ, input->buffer_size);
+ WREG32(mmPSOC_ETR_MODE, input->sink_mode);
+ WREG32(mmPSOC_ETR_AXICTL,
+ 0x700 | PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT);
+ WREG32(mmPSOC_ETR_DBALO,
lower_32_bits(input->buffer_address));
- WREG32(base_reg + 0x11C,
+ WREG32(mmPSOC_ETR_DBAHI,
upper_32_bits(input->buffer_address));
- WREG32(base_reg + 0x304, 3);
- WREG32(base_reg + 0x308, 0xA);
- WREG32(base_reg + 0x20, 1);
+ WREG32(mmPSOC_ETR_FFCR, 3);
+ WREG32(mmPSOC_ETR_PSCR, 0xA);
+ WREG32(mmPSOC_ETR_CTL, 1);
} else {
- WREG32(base_reg + 0x34, 0);
- WREG32(base_reg + 0x4, 0x400);
- WREG32(base_reg + 0x118, 0);
- WREG32(base_reg + 0x11C, 0);
- WREG32(base_reg + 0x308, 0);
- WREG32(base_reg + 0x28, 0);
- WREG32(base_reg + 0x304, 0);
+ WREG32(mmPSOC_ETR_BUFWM, 0);
+ WREG32(mmPSOC_ETR_RSZ, 0x400);
+ WREG32(mmPSOC_ETR_DBALO, 0);
+ WREG32(mmPSOC_ETR_DBAHI, 0);
+ WREG32(mmPSOC_ETR_PSCR, 0);
+ WREG32(mmPSOC_ETR_MODE, 0);
+ WREG32(mmPSOC_ETR_FFCR, 0);
if (params->output_size >= sizeof(u64)) {
u32 rwp, rwphi;
@@ -433,8 +452,8 @@ static int goya_config_etr(struct hl_device *hdev,
* the buffer is set in the RWP register (lower 32
* bits), and in the RWPHI register (upper 8 bits).
*/
- rwp = RREG32(base_reg + 0x18);
- rwphi = RREG32(base_reg + 0x3c) & 0xff;
+ rwp = RREG32(mmPSOC_ETR_RWP);
+ rwphi = RREG32(mmPSOC_ETR_RWPHI) & 0xff;
*(u64 *) params->output = ((u64) rwphi << 32) | rwp;
}
}
@@ -445,11 +464,18 @@ static int goya_config_etr(struct hl_device *hdev,
static int goya_config_funnel(struct hl_device *hdev,
struct hl_debug_params *params)
{
- WREG32(debug_funnel_regs[params->reg_idx] - CFG_BASE + 0xFB0,
- CORESIGHT_UNLOCK);
+ u64 base_reg;
+
+ if (params->reg_idx >= ARRAY_SIZE(debug_funnel_regs)) {
+ dev_err(hdev->dev, "Invalid register index in FUNNEL\n");
+ return -EINVAL;
+ }
+
+ base_reg = debug_funnel_regs[params->reg_idx] - CFG_BASE;
- WREG32(debug_funnel_regs[params->reg_idx] - CFG_BASE,
- params->enable ? 0x33F : 0);
+ WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK);
+
+ WREG32(base_reg, params->enable ? 0x33F : 0);
return 0;
}
@@ -458,9 +484,16 @@ static int goya_config_bmon(struct hl_device *hdev,
struct hl_debug_params *params)
{
struct hl_debug_params_bmon *input;
- u64 base_reg = debug_bmon_regs[params->reg_idx] - CFG_BASE;
+ u64 base_reg;
u32 pcie_base = 0;
+ if (params->reg_idx >= ARRAY_SIZE(debug_bmon_regs)) {
+ dev_err(hdev->dev, "Invalid register index in BMON\n");
+ return -EINVAL;
+ }
+
+ base_reg = debug_bmon_regs[params->reg_idx] - CFG_BASE;
+
WREG32(base_reg + 0x104, 1);
if (params->enable) {
@@ -522,7 +555,7 @@ static int goya_config_bmon(struct hl_device *hdev,
static int goya_config_spmu(struct hl_device *hdev,
struct hl_debug_params *params)
{
- u64 base_reg = debug_spmu_regs[params->reg_idx] - CFG_BASE;
+ u64 base_reg;
struct hl_debug_params_spmu *input = params->input;
u64 *output;
u32 output_arr_len;
@@ -531,6 +564,13 @@ static int goya_config_spmu(struct hl_device *hdev,
u32 cycle_cnt_idx;
int i;
+ if (params->reg_idx >= ARRAY_SIZE(debug_spmu_regs)) {
+ dev_err(hdev->dev, "Invalid register index in SPMU\n");
+ return -EINVAL;
+ }
+
+ base_reg = debug_spmu_regs[params->reg_idx] - CFG_BASE;
+
if (params->enable) {
input = params->input;
@@ -539,7 +579,13 @@ static int goya_config_spmu(struct hl_device *hdev,
if (input->event_types_num < 3) {
dev_err(hdev->dev,
- "not enough values for SPMU enable\n");
+ "not enough event types values for SPMU enable\n");
+ return -EINVAL;
+ }
+
+ if (input->event_types_num > SPMU_MAX_COUNTERS) {
+ dev_err(hdev->dev,
+ "too many event types values for SPMU enable\n");
return -EINVAL;
}
@@ -547,7 +593,8 @@ static int goya_config_spmu(struct hl_device *hdev,
WREG32(base_reg + 0xE04, 0x41013040);
for (i = 0 ; i < input->event_types_num ; i++)
- WREG32(base_reg + 0x400 + i * 4, input->event_types[i]);
+ WREG32(base_reg + SPMU_EVENT_TYPES_OFFSET + i * 4,
+ input->event_types[i]);
WREG32(base_reg + 0xE04, 0x41013041);
WREG32(base_reg + 0xC00, 0x8000003F);
@@ -567,6 +614,12 @@ static int goya_config_spmu(struct hl_device *hdev,
return -EINVAL;
}
+ if (events_num > SPMU_MAX_COUNTERS) {
+ dev_err(hdev->dev,
+ "too many events values for SPMU disable\n");
+ return -EINVAL;
+ }
+
WREG32(base_reg + 0xE04, 0x41013040);
for (i = 0 ; i < events_num ; i++)
@@ -584,24 +637,11 @@ static int goya_config_spmu(struct hl_device *hdev,
return 0;
}
-static int goya_config_timestamp(struct hl_device *hdev,
- struct hl_debug_params *params)
-{
- WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
- if (params->enable) {
- WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
- WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
- WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
- }
-
- return 0;
-}
-
int goya_debug_coresight(struct hl_device *hdev, void *data)
{
struct hl_debug_params *params = data;
u32 val;
- int rc;
+ int rc = 0;
switch (params->op) {
case HL_DEBUG_OP_STM:
@@ -623,7 +663,7 @@ int goya_debug_coresight(struct hl_device *hdev, void *data)
rc = goya_config_spmu(hdev, params);
break;
case HL_DEBUG_OP_TIMESTAMP:
- rc = goya_config_timestamp(hdev, params);
+ /* Do nothing as this opcode is deprecated */
break;
default:
diff --git a/drivers/misc/habanalabs/goya/goya_hwmgr.c b/drivers/misc/habanalabs/goya/goya_hwmgr.c
index 088692c852b6..b2ebc01e27f4 100644
--- a/drivers/misc/habanalabs/goya/goya_hwmgr.c
+++ b/drivers/misc/habanalabs/goya/goya_hwmgr.c
@@ -32,6 +32,37 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
}
}
+int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
+{
+ long value;
+
+ if (hl_device_disabled_or_in_reset(hdev))
+ return -ENODEV;
+
+ value = hl_get_frequency(hdev, MME_PLL, false);
+
+ if (value < 0) {
+ dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n",
+ value);
+ return value;
+ }
+
+ *max_clk = (value / 1000 / 1000);
+
+ value = hl_get_frequency(hdev, MME_PLL, true);
+
+ if (value < 0) {
+ dev_err(hdev->dev,
+ "Failed to retrieve device current clock %ld\n",
+ value);
+ return value;
+ }
+
+ *cur_clk = (value / 1000 / 1000);
+
+ return 0;
+}
+
static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
@@ -230,18 +261,127 @@ static ssize_t ic_clk_curr_show(struct device *dev,
return sprintf(buf, "%lu\n", value);
}
+static ssize_t pm_mng_profile_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+
+ if (hl_device_disabled_or_in_reset(hdev))
+ return -ENODEV;
+
+ return sprintf(buf, "%s\n",
+ (hdev->pm_mng_profile == PM_AUTO) ? "auto" :
+ (hdev->pm_mng_profile == PM_MANUAL) ? "manual" :
+ "unknown");
+}
+
+static ssize_t pm_mng_profile_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+
+ if (hl_device_disabled_or_in_reset(hdev)) {
+ count = -ENODEV;
+ goto out;
+ }
+
+ mutex_lock(&hdev->fpriv_list_lock);
+
+ if (hdev->compute_ctx) {
+ dev_err(hdev->dev,
+ "Can't change PM profile while compute context is opened on the device\n");
+ count = -EPERM;
+ goto unlock_mutex;
+ }
+
+ if (strncmp("auto", buf, strlen("auto")) == 0) {
+ /* Make sure we are in LOW PLL when changing modes */
+ if (hdev->pm_mng_profile == PM_MANUAL) {
+ hdev->curr_pll_profile = PLL_HIGH;
+ hl_device_set_frequency(hdev, PLL_LOW);
+ hdev->pm_mng_profile = PM_AUTO;
+ }
+ } else if (strncmp("manual", buf, strlen("manual")) == 0) {
+ if (hdev->pm_mng_profile == PM_AUTO) {
+ /* Must release the lock because the work thread also
+ * takes this lock. But before we release it, set
+ * the mode to manual so nothing will change if a user
+ * suddenly opens the device
+ */
+ hdev->pm_mng_profile = PM_MANUAL;
+
+ mutex_unlock(&hdev->fpriv_list_lock);
+
+ /* Flush the current work so we can return to the user
+ * knowing that he is the only one changing frequencies
+ */
+ flush_delayed_work(&hdev->work_freq);
+
+ return count;
+ }
+ } else {
+ dev_err(hdev->dev, "value should be auto or manual\n");
+ count = -EINVAL;
+ }
+
+unlock_mutex:
+ mutex_unlock(&hdev->fpriv_list_lock);
+out:
+ return count;
+}
+
+static ssize_t high_pll_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+
+ if (hl_device_disabled_or_in_reset(hdev))
+ return -ENODEV;
+
+ return sprintf(buf, "%u\n", hdev->high_pll);
+}
+
+static ssize_t high_pll_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+ long value;
+ int rc;
+
+ if (hl_device_disabled_or_in_reset(hdev)) {
+ count = -ENODEV;
+ goto out;
+ }
+
+ rc = kstrtoul(buf, 0, &value);
+
+ if (rc) {
+ count = -EINVAL;
+ goto out;
+ }
+
+ hdev->high_pll = value;
+
+out:
+ return count;
+}
+
+static DEVICE_ATTR_RW(high_pll);
static DEVICE_ATTR_RW(ic_clk);
static DEVICE_ATTR_RO(ic_clk_curr);
static DEVICE_ATTR_RW(mme_clk);
static DEVICE_ATTR_RO(mme_clk_curr);
+static DEVICE_ATTR_RW(pm_mng_profile);
static DEVICE_ATTR_RW(tpc_clk);
static DEVICE_ATTR_RO(tpc_clk_curr);
static struct attribute *goya_dev_attrs[] = {
+ &dev_attr_high_pll.attr,
&dev_attr_ic_clk.attr,
&dev_attr_ic_clk_curr.attr,
&dev_attr_mme_clk.attr,
&dev_attr_mme_clk_curr.attr,
+ &dev_attr_pm_mng_profile.attr,
&dev_attr_tpc_clk.attr,
&dev_attr_tpc_clk_curr.attr,
NULL,
OpenPOWER on IntegriCloud